forked from jul095/TrafficMonitoring
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathReadCOCODataset.py
503 lines (426 loc) · 22.2 KB
/
ReadCOCODataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
# ****************************************************************************
# @ReadCOCODataset.py
#
# Read COCO Datasets with filtering methods and different category matching
#
#
# @copyright (c) 2021 Elektronische Fahrwerksysteme GmbH. All rights reserved.
# Dr.-Ludwig-Kraus-Straße 6, 85080 Gaimersheim, DE, https://www.efs-auto.com
# ****************************************************************************
import os
from operator import itemgetter
import matplotlib.pyplot as plt
import numpy as np
import pycocotools.mask as mask_util
import pycocotools.mask as rletools
from detectron2.data import MetadataCatalog
from detectron2.structures import BoxMode
from imantics import Mask
from pycocotools.coco import COCO
class ReadCOCODatasets:
"""
Meta class for collecting different several instances of COCOFile
"""
def __init__(self, data_type='training', vis=False, is_coco_eval=False):
"""
Search datasets in the dataset folder and load them into a list
"""
self.data_type = data_type
self._coco_files = []
self.is_coco_eval = is_coco_eval
dataset_path = os.path.join(os.path.dirname(__file__), '../..', 'data/dataset',
data_type)
folder_of_datasets = os.listdir(dataset_path)
print(dataset_path)
for folder_dataset in folder_of_datasets:
print(folder_dataset)
coco_file = COCOFile(os.path.join(dataset_path, folder_dataset))
self._coco_files.append(coco_file)
self.categories = coco_file.categories
if vis:
# self.plot_statistic_with_raw_categories()
self.plot_statistics_with_merged_categories()
self.get_unique_count_of_categories()
def get_unique_count_of_categories(self):
"""
Get some statistics about the track cound of the dataset
"""
image_statistic = {}
for coco_file in self._coco_files:
processed_track_id = {-1}
for existing_label in coco_file.coco.anns:
annotation = coco_file.coco.loadAnns(existing_label)
track_id = annotation[0]["attributes"]["track_id"]
if track_id not in processed_track_id:
new_category, new_category_id = coco_file.get_mapped_category_by_old_id(
annotation[0]["category_id"])
if new_category_id is not None:
current_count = image_statistic[new_category_id] if new_category_id in image_statistic else 0
image_statistic[new_category_id] = current_count + 1
processed_track_id.add(track_id)
return image_statistic
def get_detectron2_dataset(self):
"""
get the dataset prepared in the necessary detectron2 format
"""
dataset_dicts = []
for coco_file in self._coco_files:
if self.data_type == "training":
# If we want the training dataset, we only want relevant images
# So we have to filter the dataset
dataset_dict = coco_file.get_detectron2_dataset(True, coco_category_ids=self.is_coco_eval)
else:
dataset_dict = coco_file.get_detectron2_dataset(True, coco_category_ids=self.is_coco_eval)
dataset_dicts.extend(dataset_dict)
return dataset_dicts
def get_dataset_seperated(self):
dataset_list = []
for coco_file in self._coco_files:
dataset_list.append(coco_file.get_detectron2_dataset(True))
return dataset_list
def get_count_label_in_barchart(self, axes, data):
"""
create barchart for data statistics
"""
for idx, data_category_label_count in enumerate(data):
if data_category_label_count == 0:
axes.text(x=idx - 0.5, y=20, s=f"{data_category_label_count}",
fontdict=dict(fontsize=15), va='center')
else:
axes.text(x=idx - 0.5, y=0 + 100 / data_category_label_count, s=f"{data_category_label_count}",
fontdict=dict(fontsize=15), va='center')
def autolabel(self, rects, axes):
# attach some text labels
for rect in rects:
height = rect.get_height()
axes.text(rect.get_x() + rect.get_width() / 2., 0.99 * height,
'%d' % int(height),
ha='center', va='bottom', fontsize=14)
def plot_statistics_with_merged_categories(self):
"""
Barcharts for all COCOfile objects
"""
dataset_dicts = self.get_detectron2_dataset()
unique_image_statistics = self.get_unique_count_of_categories()
print(unique_image_statistics)
merged_categories = self._coco_files[0].merge_categories()
print(merged_categories)
amount_per_category = dict((category_id, 0) for category_id in merged_categories.values())
for data_category_label_count in dataset_dicts:
for annotation in data_category_label_count["annotations"]:
current_category = annotation["category_id"]
count = amount_per_category[current_category]
amount_per_category[current_category] = count + 1
amount_per_category = sorted(amount_per_category.items(), key=itemgetter(1), reverse=True)
category_sort_order = [i for i, _ in amount_per_category]
print("merged dataset: ", amount_per_category)
sum_all_labels = sum(list(zip(*amount_per_category))[1])
print("raw categories count all", sum_all_labels)
print("Count Images: ", len(dataset_dicts))
labels = [next((category_name for category_name, id in merged_categories.items() if id == category_id), None)
for category_id, count in amount_per_category]
data_category_label_count = [count for category, count in amount_per_category]
fig_label_count, ax_label_count = plt.subplots(figsize=(15, 5))
rects_label_count = ax_label_count.bar(labels, data_category_label_count, width=0.5, color='gray',
label="number of segmentations")
plt.xticks(rotation=60)
ax_label_count.set_title(
f"Segmentations for {self.data_type} (Total sum: {sum_all_labels},Sum of Images: {len(dataset_dicts)})",
fontsize=12)
# ax_label_count.set_title(
# f"Segmentations: {self.data_type}",
# fontsize=14)
ax_label_count.tick_params(axis='both', which='major', labelsize=14)
ax_label_count.legend(fontsize=14)
unique_image_statistics = sorted(unique_image_statistics.items(), key=itemgetter(1), reverse=True)
sum_all_tracks = sum(list(zip(*unique_image_statistics))[1])
amount_per_category_unique = [(category_id, count) for category_id, count in unique_image_statistics]
# amount_per_category_unique = [(category_unique[0], category_unique[1]) for cat_id in category_sort_order for
# category_unique in amount_per_category_unique if cat_id == category_unique[0]]
self.autolabel(rects_label_count, ax_label_count)
labels_unique = [
next((category_name for category_name, id in merged_categories.items() if id == category_id), None)
for category_id, count in amount_per_category_unique]
data_category_unique_label_count = [count for category, count in unique_image_statistics]
fig_track_count, ax_track_count = plt.subplots(figsize=(15, 5))
rects_track_count = ax_track_count.bar(labels_unique, data_category_unique_label_count, width=0.5,
color='darkblue', label="number of tracks")
ax_track_count.set_title(
f"Unique Tracks for {self.data_type} (Total sum: {sum_all_tracks}, Sum of Images: {len(dataset_dicts)})",
fontsize=12)
# ax_track_count.set_title(
# f"Tracks: {self.data_type}",
# fontsize=14)
ax_track_count.tick_params(axis='both', which='major', labelsize=14)
# ax_track_count.set_yticks(np.arange(0,max(data_category_unique_label_count),20))
ax_track_count.legend(fontsize=14)
self.autolabel(rects_track_count, ax_track_count)
fig_label_count.tight_layout()
fig_track_count.tight_layout()
plt.xticks(rotation=60)
plt.tight_layout()
plt.show()
# self._coco_files[0].get_category_dict_by_id(annotation["category_id"])
def plot_statistic_with_raw_categories(self):
print(self._coco_files[0].categories)
amount_per_category = dict((category["name"], 0) for category in self._coco_files[0].categories)
for coco_file in self._coco_files:
for category in coco_file.categories:
result_element = [(x, y) for x, y in coco_file.result_list
if x["id"] == category["id"]]
if len(result_element) > 0:
# labels.append(result_element[0][0]["name"])
current_value = amount_per_category[
result_element[0][0]["name"]]
amount_per_category[result_element[0][0]["name"]] = result_element[0][1] + current_value
# fig, ax = plt.subplots(2, figsize=(12, 12), subplot_kw=dict(aspect="equal"))
plt.figure(figsize=(15, 12))
amount_per_category = sorted(amount_per_category.items(), key=itemgetter(1),
reverse=True)
print("raw categories without any filtering", amount_per_category)
sum_all_labels = sum(list(zip(*amount_per_category))[1])
print("raw categories count all", sum_all_labels)
plt.bar([category for category, count in amount_per_category],
[count for category, count in amount_per_category])
plt.show()
class COCOFile:
"""
Main Input Logic for the labeled dataset
with mapping and cleanup functionality
"""
# central mapping: raw labeled categories -> final training categories
category_mapping = {
"car": "car",
"cyclist": "bicycle",
"car trailer": "car trailer",
"truck": "truck",
"truck trailer": "truck trailer",
"car-transporter": "truck",
"motorcycle": "motorcycle",
"bus": "bus",
"police car": "transporter",
"firefighter truck": "truck",
"ambulance": "ambulance",
"pedestrian": "person",
"pedestrian with stroller": "pedestrian with stroller",
"pedestrian in wheelchair": "pedestrian in wheelchair",
"scooter": "scooter",
"transporter": "transporter"
}
# These categories will be ignored in the training and not served for detectron2
ignore_categories = ['ambulance', 'bus', 'car trailer', 'truck trailer',
'motorcycle', 'pedestrian with stroller', 'pedestrian in wheelchair']
def __init__(self, dataset_path, instance_file_name="instances_default"):
annotation_file = '{}/annotations/{}.json'.format(
dataset_path, instance_file_name)
self.dataset_path = dataset_path
self.coco = COCO(annotation_file)
self.categories = self.get_categories()
def get_unique_count_of_categories(self):
image_statistic = {}
processed_track_id = {-1}
for existing_label in self.coco.anns:
annotation = self.coco.loadAnns(existing_label)
track_id = annotation[0]["attributes"]["track_id"]
if track_id not in processed_track_id:
new_category, _ = self.get_mapped_category_by_old_id(annotation[0]["category_id"])
current_count = image_statistic[new_category] if new_category in image_statistic else 0
image_statistic[new_category] = current_count + 1
processed_track_id.add(track_id)
return image_statistic
def merge_categories(self):
"""
use the attribute category_mapping for returning a translation map from old to new category ids
"""
new_category_names = sorted(list(set(self.category_mapping.values())))
new_category_names = [category_name for category_name in new_category_names if
category_name not in self.ignore_categories]
id_map = {v: i for i, v in enumerate(new_category_names)}
return id_map
def get_detectron2_metadata(self):
"""
provide metadata for Metadata Catalog in detectron2
"""
id_map = self.merge_categories()
return sorted(id_map.keys()) # things_classes for detectron2
def get_category_dict_by_id(self, category_id):
return next(
(category for category in self.categories
if category["id"] == category_id), None)
def get_category_name_by_new_id(self, category_id):
id_map = self.merge_categories()
return list(id_map.keys())[list(id_map.values()).index(category_id)]
# new_category_name = self.category_mapping
def get_mapped_category_by_old_id(self, category_id):
id_map = self.merge_categories()
old_category = self.get_category_dict_by_id(category_id)
new_category_name = self.category_mapping.get(old_category["name"])
return new_category_name, id_map.get(new_category_name)
def get_old_category_ids_by_new_category(self, new_category_names):
old_category_names = [key for key, value in self.category_mapping.items() if value in new_category_names]
return [category["id"] for category in self.categories if category["name"] in old_category_names]
def get_new_category_id_by_coco_origin_cat(self, coco_id):
id_map = self.merge_categories()
old_category = self.get_category_dict_by_id(coco_id)
return id_map.get(old_category["name"])
def get_relevant_images(self):
"""
return only relevant image ids for optimize trainings
"""
relevant_category_ids = self.get_old_category_ids_by_new_category(
["cyclist", "person", "scooter", 'truck', 'bus', 'transporter'])
filtered_images_ids = set()
for category_id in relevant_category_ids:
relevant_images = self.get_category_count_per_image(category_id)
relevant_images_id = [image_id for image_id, count in
relevant_images[:int(len(relevant_images) * 1.0)]]
filtered_images_ids.update(relevant_images_id)
return filtered_images_ids
def get_detectron2_dataset_original_coco(self):
"""
use this Method if you want to read in the original coco 2017 (instances_train2017) dataset
"""
dataset_dicts = []
cat_ids = self.coco.getCatIds(catNms=["person"])
img_ids = sorted(self.coco.imgs.keys())
imgs = self.coco.loadImgs(img_ids)
anns = [self.coco.imgToAnns[img_id] for img_id in img_ids]
imgs_anns = list(zip(imgs, anns))
ann_keys = ["bbox", "category_id"]
num_instances_without_valid_segmentation = 0
for img_dict, anno_dict in imgs_anns:
if len(set([ann["category_id"] for ann in anno_dict]).intersection(cat_ids)) > 0:
record = {}
record["file_name"] = os.path.join(self.dataset_path, "images",
img_dict["file_name"])
record["height"] = img_dict["height"]
record["width"] = img_dict["width"]
image_id = img_dict["id"]
record["image_id"] = image_id
objs = []
for anno in anno_dict:
assert anno["image_id"] == image_id
assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.'
obj = {key: anno[key] for key in ann_keys if key in anno}
category_id = self.get_new_category_id_by_coco_origin_cat(obj["category_id"])
if category_id is not None:
segm = anno.get("segmentation", None)
if segm: # either list[list[float]] or dict(RLE)
if isinstance(segm, dict):
if isinstance(segm["counts"], list):
# convert to compressed RLE
segm = mask_util.frPyObjects(segm, *segm["size"])
bitmask = rletools.decode(segm)
polygons = Mask(bitmask).polygons()
polygon_points = polygons.points[0].reshape((-1))
if len(polygon_points) >= 6 and len(polygon_points) % 2 == 0:
segm = [polygon_points]
else:
continue
else:
# filter out invalid polygons (< 3 points)
segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
if len(segm) == 0:
num_instances_without_valid_segmentation += 1
continue # ignore this instance
obj["segmentation"] = segm
obj["bbox_mode"] = BoxMode.XYWH_ABS
obj["category_id"] = category_id
objs.append(obj)
record["annotations"] = objs
dataset_dicts.append(record)
return dataset_dicts[0:2000]
def get_detectron2_dataset(self, all_images=True, coco_category_ids=False):
"""
returns the own labeled dataset filtered by ignored cats and with category mapping in
the detectron2 format
"""
dataset_dicts = []
relevant_image_ids = self.get_relevant_images()
metadata_coco_2017 = MetadataCatalog.get('coco_2017_train')
for image_id in self.coco.imgs:
if image_id in relevant_image_ids or all_images:
image = self.coco.loadImgs(image_id)
record = {}
file_path = os.path.join(self.dataset_path, "images",
image[0]["file_name"])
record["file_name"] = file_path
record["image_id"] = image_id # Take care if this number is unique!!!
record["height"] = image[0]["height"]
record["width"] = image[0]["width"]
annotations = self.coco.imgToAnns[image_id]
objs = []
for ann in annotations:
obj = {}
assert ann["image_id"] == image_id
segmentation = ann.get("segmentation", None)
obj["segmentation"] = segmentation
obj["bbox"] = ann["bbox"]
obj["bbox_mode"] = BoxMode.XYWH_ABS
new_category_name, new_category_id = self.get_mapped_category_by_old_id(ann["category_id"])
if coco_category_ids:
try:
new_category_id = metadata_coco_2017.thing_classes.index(new_category_name)
except ValueError:
break
obj["category_id"] = new_category_id
obj["track_id"] = ann.get("attributes", None).get("track_id")
if new_category_name not in self.ignore_categories:
objs.append(obj)
record["annotations"] = objs
dataset_dicts.append(record)
return dataset_dicts
def get_category_count_per_image(self, category_id_to_filter):
image_statistic = {}
for annotation_id in self.coco.anns:
annotation = self.coco.loadAnns(annotation_id)
if len(annotation) > 0:
category_id = annotation[0]["category_id"]
if category_id == category_id_to_filter:
image_id = annotation[0]["image_id"]
current_value = image_statistic[
image_id] if image_id in image_statistic else 0
image_statistic[image_id] = current_value + 1
else:
print("This should not happen here.")
image_statistic = sorted(image_statistic.items(),
key=itemgetter(1),
reverse=True)
return image_statistic
def get_categories(self):
cats = self.coco.loadCats(self.coco.getCatIds())
nms = [cat['name'] for cat in cats]
return cats
def get_count_of_categories(self):
all_label_count = 0
result_list = []
for existing_label in self.coco.catToImgs.keys():
labels_in_category = len(self.coco.catToImgs[existing_label])
all_label_count = all_label_count + labels_in_category
result_list.append(
(self.categories[existing_label - 1], labels_in_category))
return result_list
def plot_statistic(self):
labels = []
amount_per_category = []
for category in self.categories:
result_element = [(x, y) for x, y in self.result_list
if x["id"] == category["id"]]
if len(result_element) > 0:
labels.append(result_element[0][0]["name"])
amount_per_category.append(result_element[0][1])
fig, ax = plt.subplots(2, figsize=(6, 6), subplot_kw=dict(aspect="equal"))
def func(pct, allvals):
absolute = int(pct / 100. * np.sum(allvals))
return "{:.1f}%\n{:d}".format(pct, absolute)
ax[0].set_title("All labels including duplications")
plt.show()
if __name__ == '__main__':
# kitti_file = KITTIFile()
# print(kitti_file.get_detectron2_dataset())
# dataset_path = os.path.join(os.path.dirname(__file__), '../..', 'data/train2017/')
# cocoFile = COCOFile(dataset_path, "instances_train2017")
# cocoFile.get_detectron2_dataset_original_coco()
cocoFile = ReadCOCODatasets('training', vis=True)
cocoFile = ReadCOCODatasets('validation', vis=True)
cocoFile = ReadCOCODatasets('test', vis=True)