-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdivide_data.py
54 lines (48 loc) · 1.56 KB
/
divide_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
"""
Dividing the images directory to tomato and non_tomato
"""
# ID of tomato image in JSON files
TOMATO_ID = "939030726152341c154ba28629341da6_train"
import json
from pprint import pprint
from shutil import copyfile
with open ('train_database.txt') as train_data:
train_json = json.load (train_data)
with open ('test_database.txt') as test_data:
test_json = json.load (test_data)
# process train data
for i in range (len (train_json)):
print ("Processing file " + str(i+1))
cur_image = train_json[i]
file_name = cur_image['name']
boxes = cur_image['boxes']
has_tomato = False
for j in range (len (boxes)):
cur_box = boxes[0]
box_id = cur_box['id']
if box_id == TOMATO_ID:
has_tomato = True
break
src_file = "./Images/Train/" + file_name
dst_file = "./problem2/Train/non_tomato/" + file_name
if has_tomato:
dst_file = "./problem2/Train/tomato/" + file_name
copyfile (src_file, dst_file)
# process test data
for i in range (len (test_json)):
print ("Processing file " + str(i+1))
cur_image = test_json[i]
file_name = cur_image['name']
boxes = cur_image['boxes']
has_tomato = False
for j in range (len (boxes)):
cur_box = boxes[0]
box_id = cur_box['id']
if box_id == TOMATO_ID:
has_tomato = True
break
src_file = "./Images/Test/" + file_name
dst_file = "./problem2/Test/non_tomato/" + file_name
if has_tomato:
dst_file = "./problem2/Test/tomato/" + file_name
copyfile (src_file, dst_file)