-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpre_prediction.py
175 lines (154 loc) · 4.94 KB
/
pre_prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import os
from PIL import Image
import random
import numpy as np
import shutil
import argparse
"""
This script preprocesses the data before generating the masks by creating a new dataset with mock masks in the test set.
1. Make the mock dataset directories
2. Read filenames of the ground truth masks(for filtering out in the mock dataset) and the imagenet folder names(for retrieving all relevent filenames)
3. Get 'candidate list' (relevant filenames that are not already in the groundtruth masks used to train segmentation model)
4. Sample num_samples from the candidate list
5. Put all of the sampled masks to the test partition for pseudo label generation
6. make directory to put the predicted masks in
"""
# Initialize parser
parser = argparse.ArgumentParser()
# Adding optional argument
parser.add_argument(
"--old-dataset",
help="path of old dataset",
default="/data/kornrapatp/PartImageNet/PartSegmentations/All",
type=str,
)
parser.add_argument(
"--new-dataset",
help="path of new dataset",
default="/data/kornrapatp/PartImageNet/PartSegmentations/All-step3-all-part-new",
type=str,
)
parser.add_argument(
"--jpeg-path",
help="path of jpegs",
default="/data/shared/train_blurred/Images",
type=str,
)
parser.add_argument(
"--num-new-samples",
help="number of new samples to add to dataset",
default=24000,
type=int,
)
parser.add_argument(
"--prediction-path",
help="path of predicted masks",
default="/data/kornrapatp/test",
type=str,
)
# Read arguments from command line
args = parser.parse_args()
jpeg_path = args.jpeg_path
old_dataset_path = args.old_dataset
new_temp_mask_dataset_path = args.new_dataset + "-mask"
new_dataset_path = args.new_dataset
num_new_samples = args.num_new_samples
prediction_path = args.prediction_path
classes = {
"Aeroplane": set(),
"Quadruped": set(),
"Biped": set(),
"Fish": set(),
"Bird": set(),
"Snake": set(),
"Reptile": set(),
"Car": set(),
"Bicycle": set(),
"Boat": set(),
"Bottle": set(),
}
# Step 1
# Make directories
os.mkdir(new_temp_mask_dataset_path)
for partition in ["train", "val", "test"]:
os.mkdir(new_temp_mask_dataset_path + "/" + partition)
for c in classes.keys():
os.mkdir(new_temp_mask_dataset_path + "/" + partition + "/" + c)
# Step 2
# Get all samples from current 1x dataset to ignore during sample generation
old_dataset = set()
count = 0
tran_val_count = 0
for path, subdirs, files in os.walk(old_dataset_path):
for name in files:
if ".png" in name:
if "train" in path or "val" in path:
tran_val_count += 1
count += 1
old_dataset.add(name.split(".")[0])
metaclass = path.split("/")[-1]
classes[metaclass].add(name.split("_")[0])
# Create folder to class mapping
folder_to_class = {}
# print(classes)
for k, v in classes.items():
for folder in v:
folder_to_class[folder] = k
# Step 3
# Get candidate list
count = 0
candidates = []
for folder, className in folder_to_class.items():
print(folder)
for path, subdirs, files in os.walk(f"{jpeg_path}/{folder}"):
for name in files:
count += 1
if name.split(".")[0] not in old_dataset:
candidates.append(className + "-" + name.split(".")[0])
else:
pass
# Step 4
# randomly shuffle candidates and pick only num_new_samples samples to include
random.shuffle(candidates)
print(len(candidates))
candidates = candidates[:num_new_samples]
# Step 5
# Put all of our candidates into test set to generate masks in the next step
for c in classes.keys():
for partition in ["train", "val", "test"]:
if partition == "test":
class_candidate = [
candi.split("-")[1] for candi in candidates if c in candi
]
class_candidate.sort()
# print(class_candidate)
# 0 / 0
with open(
f"{new_temp_mask_dataset_path}/{partition}/{c}.txt", "w"
) as f:
for candi in class_candidate:
f.write(candi.split("_")[0] + "/" + candi)
f.write("\n")
else:
with open(
f"{new_temp_mask_dataset_path}/{partition}/{c}.txt", "w"
) as f:
pass
# Create .png file of correct dimensions inside test dir
def save_pil_image(img, path):
image_path = os.path.join(path)
pil_img = Image.fromarray(img)
pil_img.save(image_path)
for c in classes.keys():
with open(f"{new_temp_mask_dataset_path}/test/{c}.txt") as f:
filenames = f.readlines()
for name in filenames:
name = name.split("/")[-1]
img = Image.open(f'{jpeg_path}/{name.split("_")[0]}/{name[:-1]}.JPEG')
tif = np.zeros(img.size)
print(img.size)
save_pil_image(
tif, f"{new_temp_mask_dataset_path}/test/{c}/{name[:-1]}.png"
)
# Step 6
os.mkdir(prediction_path)