-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathto_class_specific.py
156 lines (137 loc) · 4.44 KB
/
to_class_specific.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import os
from PIL import Image
import numpy as np
import pathlib
import random
import shutil
"""
This script converts metaclass dataset into imagenet class dataset.
1. Fetch the mapping from metaclass to imagenet foldername list (to be used during reading the masks to be relabeled)
2. Fetch the mapping from imagenet foldernames to number of part (to be used to get the part starting index of each class)
3. Make the imagenet dataset directory
4. Get the metaclass mask starting indices and imagenet class mask starting indices (to be used during mask relabel)
5. Relabel each mask, for each non-bg pixel, subtract the metaclass starting index and add the imagenet class starting index.
6. Write the .txt file for the new dataset
"""
metaclass_dataset_dir = (
"/data/kornrapatp/PartImageNet/PartSegmentations/All-imagenetclass-segtrain"
)
imagenetclass_dataset_dir = "/data/kornrapatp/PartImageNet/PartSegmentations/All-imagenetclass-segtrain-processed"
CLASSES = {
"Quadruped": 4,
"Biped": 5,
"Fish": 4,
"Bird": 5,
"Snake": 2,
"Reptile": 4,
"Car": 3,
"Bicycle": 4,
"Boat": 2,
"Aeroplane": 5,
"Bottle": 2,
}
metaclass_to_class = {
"Aeroplane": set(),
"Quadruped": set(),
"Biped": set(),
"Fish": set(),
"Bird": set(),
"Snake": set(),
"Reptile": set(),
"Car": set(),
"Bicycle": set(),
"Boat": set(),
"Bottle": set(),
}
# Step 1
for path, subdirs, files in os.walk(metaclass_dataset_dir):
for name in files:
if ".png" in name:
metaclass = path.split("/")[-1]
imagenet_class = name.split("_")[0]
metaclass_to_class[metaclass].add(imagenet_class)
# Step 2
numpart = 1
imagenet_classes_part_num = {}
for k, v in metaclass_to_class.items():
numpart += CLASSES[k] * len(v)
for imagenet_class in v:
imagenet_classes_part_num[imagenet_class] = CLASSES[k]
imagenet_classes_part_num = dict(sorted(imagenet_classes_part_num.items()))
print(f"Total part in new dataset: {numpart}")
# Step 3
# make directories
os.mkdir(imagenetclass_dataset_dir)
for partition in ["train", "val", "test"]:
os.mkdir(imagenetclass_dataset_dir + "/" + partition)
for c in imagenet_classes_part_num.keys():
os.mkdir(f"{imagenetclass_dataset_dir}/{partition}/{c}")
with open(
f"{imagenetclass_dataset_dir}/{partition}/{c}.txt",
"w",
) as f:
f.write("")
# Step 4
classes = sorted(CLASSES.keys())
print(classes)
class_starting_index = {}
curid = 1
for c in classes:
class_starting_index[c] = curid
curid += CLASSES[c]
print(class_starting_index)
imagenet_class_starting_index = {}
imagenet_indices = {}
curid = 1
for c in imagenet_classes_part_num.keys():
imagenet_class_starting_index[c] = curid
imagenet_indices[c] = [
i for i in range(curid, curid + imagenet_classes_part_num[c])
]
curid += imagenet_classes_part_num[c]
# Step 5
def save_pil_image(img, path):
image_path = os.path.join(path)
pil_img = Image.fromarray(img)
pil_img.save(image_path)
fileList = {}
# Rewrite segmentation labels
for path, subdirs, files in os.walk(metaclass_dataset_dir):
for name in files:
className = path.split("/")[-1]
if ".png" in name:
img = np.asarray(Image.open(os.path.join(path, name)))
imagenet_className = name.split("_")[0]
new_img = np.where(
img != 0,
img
- (
class_starting_index[className]
- imagenet_class_starting_index[imagenet_className]
),
np.zeros(img.shape),
).astype(np.int32)
save_pil_image(
new_img,
os.path.join(
imagenetclass_dataset_dir,
path.split("/")[-2],
imagenet_className,
name,
),
)
# Save filenames for .txt file
if path.split("/")[-2] + "/" + imagenet_className not in fileList:
fileList[path.split("/")[-2] + "/" + imagenet_className] = []
fileList[path.split("/")[-2] + "/" + imagenet_className].append(
imagenet_className + "/" + name.split(".")[0] + "\n"
)
# Step 6
for k, v in fileList.items():
v = sorted(v)
with open(
f"{imagenetclass_dataset_dir}/{k}.txt",
"w",
) as f:
for name in v:
f.write(name)