13
13
import shutil
14
14
from tqdm import tqdm
15
15
from multiprocessing import Process , Queue
16
-
16
+ import queue
17
+
17
18
import pycocotools
18
19
from detectron2 .structures import BoxMode
19
20
@@ -32,20 +33,20 @@ def compute_bbox(mask):
32
33
def to_coco (dataset_path ):
33
34
image_paths = [img_path for img_path in absolute_paths (os .path .join (dataset_path , 'images' )) if img_path .endswith ('.png' )]
34
35
#target_paths = absolute_paths(os.path.join(dataset_path, 'targets'))
35
-
36
+
36
37
dataset_dicts = []
37
-
38
+
38
39
for idx , image_path in enumerate (image_paths ):
39
40
target_path = os .path .join (dataset_path , 'targets' , os .path .splitext (os .path .basename (image_path ))[0 ] + '.pkl' )
40
41
with open (target_path , 'rb' ) as f :
41
42
target = pickle .load (f )
42
-
43
+
43
44
record = {}
44
45
record ['file_name' ] = image_path
45
46
record ['image_id' ] = idx
46
47
record ['height' ] = target ['size' ][1 ]
47
48
record ['width' ] = target ['size' ][0 ]
48
-
49
+
49
50
objs = []
50
51
for m in target ['masks' ]:
51
52
annotation = {'segmentation' : pycocotools .mask .encode (np .asarray (m , order = "F" )),
@@ -68,7 +69,7 @@ def get_image_from_url(url):
68
69
response = requests .get (url , stream = True )
69
70
if response .status_code == 200 :
70
71
break
71
- time .sleep (0.5 )
72
+ time .sleep (2 )
72
73
response .raw .decode_content = True
73
74
img = Image .open (BytesIO (response .content )).convert ('RGB' )
74
75
return img
@@ -77,7 +78,7 @@ def augment(image, masks, crop_size):
77
78
# Brightness
78
79
brightness_factor = np .random .normal ()* 0.2 + 1
79
80
image = TF .adjust_brightness (image , brightness_factor )
80
-
81
+
81
82
# Contrast
82
83
contrast_factor = np .random .normal ()* 0.2 + 1
83
84
image = TF .adjust_contrast (image , contrast_factor )
@@ -89,26 +90,26 @@ def augment(image, masks, crop_size):
89
90
translate = np .random .randint (- 30 , 30 , size = 2 ).tolist ()
90
91
image = TF .affine (image , angle , translate , scale , shear , resample = PIL .Image .BILINEAR , fillcolor = None )
91
92
masks = [TF .affine (mask , angle , translate , scale , shear , resample = PIL .Image .BILINEAR , fillcolor = None ) for mask in masks ]
92
-
93
+
93
94
# Random crop
94
95
i , j , h , w = transforms .RandomCrop .get_params (
95
96
image , output_size = (crop_size , crop_size ))
96
-
97
+
97
98
image = TF .crop (image , i , j , h , w )
98
99
masks = [TF .crop (mask , i , j , h , w ) for mask in masks ]
99
100
# Random horizontal flipping
100
101
if np .random .random () > 0.5 :
101
102
image = TF .hflip (image )
102
103
masks = [TF .hflip (mask ) for mask in masks ]
103
-
104
+
104
105
# Random vertical flipping
105
106
if np .random .random () > 0.5 :
106
107
image = TF .vflip (image )
107
108
masks = [TF .vflip (mask ) for mask in masks ]
108
-
109
+
109
110
# squeeze and binarize
110
111
masks = [(np .array (mask )[:, :, 0 ] > 0.5 ).astype (np .uint8 ) for mask in masks ]
111
-
112
+
112
113
# prune masks that have no object or only a sliver of an object
113
114
masks = [mask for mask in masks if mask [10 :- 10 , 10 :- 10 ].any ()]
114
115
return image , masks
@@ -122,76 +123,90 @@ def __init__(self, task_queue, result_queue, img, masks, out_path, crop_size):
122
123
self .masks = masks
123
124
self .out_path = out_path
124
125
self .crop_size = crop_size
125
-
126
+
126
127
def run (self ):
127
128
proc_name = self .name
128
- while True :
129
- index = self .task_queue .get ()
130
- if index == - 1 : break
131
- sub_img , sub_masks = augment (self .img , self .masks , self .crop_size )
132
- target = {'masks' : sub_masks , 'size' : sub_img .size }
133
- save_mask_target (sub_img , target , f'{ index :05d} ' , dataset_path = self .out_path )
134
- self .result_queue .put (index )
135
- return
136
-
129
+ while True :#not stopping.is_set():
130
+ try :
131
+ index = self .task_queue .get (True , 1 )
132
+ sub_img , sub_masks = augment (self .img , self .masks , self .crop_size )
133
+ target = {'masks' : sub_masks , 'size' : sub_img .size }
134
+ save_mask_target (sub_img , target , f'{ index :05d} ' , dataset_path = self .out_path )
135
+ self .result_queue .put (index )
136
+ except queue .Empty :
137
+ return
138
+
137
139
def download_dataset (json_path , out_path , samples_per_img = 100 , num_threads = 16 , num_processes = 4 , selected_ids = None , crop_size = 256 ):
138
140
139
141
if os .path .exists (out_path ):
140
142
shutil .rmtree (out_path )
141
143
os .makedirs (os .path .join (out_path , 'images' ))
142
144
os .makedirs (os .path .join (out_path , 'targets' ))
143
-
144
-
145
+
146
+
145
147
total_images = 0
146
148
with open (json_path ) as f :
147
149
data = json .load (f )
148
-
150
+
149
151
if selected_ids is not None :
150
152
# Filter only selected images
151
153
data = [img_obj for img_obj in data if img_obj ['External ID' ] in selected_ids ]
152
154
153
155
task_queue = Queue ()
154
156
result_queue = Queue ()
155
-
156
-
157
+
157
158
with tqdm (total = len (data )* samples_per_img ) as pbar :
158
159
for img_obj in data :
160
+
159
161
img_url = img_obj ['Labeled Data' ]
162
+ if 'objects' not in img_obj ['Label' ]:
163
+ continue
164
+
160
165
mask_urls = [instance ['instanceURI' ] for instance in img_obj ['Label' ]['objects' ]]
161
166
167
+
162
168
img = get_image_from_url (img_url )
169
+
163
170
masks = list (ThreadPool (num_threads ).imap_unordered (get_image_from_url , mask_urls ))
164
-
171
+
172
+ for _ in range (samples_per_img ):
173
+ task_queue .put (total_images )
174
+ total_images += 1
175
+
165
176
workers = []
166
177
for proc_index in range (num_processes ):
167
178
p = Worker (task_queue , result_queue , img , masks , out_path , crop_size )
168
179
p .daemon = True
169
180
p .start ()
170
181
workers .append (p )
171
-
172
- for _ in range (samples_per_img ):
173
- task_queue .put (total_images )
174
- total_images += 1
175
-
176
- for index in range (samples_per_img ):
177
- i = result_queue .get ()
178
- pbar .update (1 )
179
-
180
- for index in range (num_processes ):
181
- task_queue .put (- 1 )
182
+
183
+
184
+
182
185
for worker in workers :
183
- worker .join ()
186
+ worker .join (200 )
187
+ for worker in workers :
188
+ if worker .is_alive ():
189
+ print ("Process timed out" )
190
+
191
+ pbar .update (samples_per_img )
192
+ # for index in range(samples_per_img):
193
+ # while True:
194
+ # try:
195
+ # i = result_queue.get(True, 10)
196
+ # pbar.update(1)
197
+ # except queue.Empty:
198
+ # break
199
+
184
200
185
-
186
201
def main ():
187
202
##########################
188
- json_path = 'datasets/dataset_export_2020- 08-18 .json'
189
- samples_per_img = 1 # 300
203
+ json_path = 'datasets/export-2020- 08-21T20 16 28.026Z .json'
204
+ samples_per_img = 300
190
205
crop_size = 256
191
206
##########################
192
207
print ('download dataset' )
193
-
194
-
208
+
209
+
195
210
train_dataset = [
196
211
'image_part_001.jpg' ,
197
212
'image_part_002.jpg' ,
@@ -201,34 +216,34 @@ def main():
201
216
'image_part_006.jpg' ,
202
217
'image_part_007.jpg' ,
203
218
'image_part_008.jpg' ,
204
-
205
- # 'MC171180.JPG',
206
- # 'MC171177.JPG',
207
- # 'MC171179.JPG',
208
- # 'MC171181.JPG',
209
- # 'MC171178.JPG',
210
-
219
+
220
+ 'MC171180.JPG' ,
221
+ 'MC171177.JPG' ,
222
+ 'MC171179.JPG' ,
223
+ 'MC171181.JPG' ,
224
+ 'MC171178.JPG' ,
225
+
211
226
'1758_part_004.jpg' ,
212
227
'1758_part_011.jpg' ,
213
228
'1758_part_008.jpg' ,
214
229
'1755_part_004.jpg' ,
215
230
'1730_part_009.jpg' ,
216
231
'0467_part_010.jpg' ,
217
232
'0467_part_003.jpg' ,
218
- # '1773_train.JPG',
219
- # '1133_train.JPG',
220
- # '0576_train.JPG',
233
+ '1773_train.JPG' ,
234
+ '1133_train.JPG' ,
235
+ '0576_train.JPG' ,
221
236
]
222
-
223
-
224
-
237
+
238
+
239
+
225
240
download_dataset (json_path ,
226
241
'datasets/cells_train_256' ,
227
242
samples_per_img = samples_per_img ,
228
243
selected_ids = train_dataset ,
229
244
crop_size = crop_size ,
230
- num_processes = 1 ,
231
- num_threads = 7 )
245
+ num_processes = 12 ,
246
+ num_threads = 16 )
232
247
233
248
if __name__ == '__main__' :
234
- main ()
249
+ main ()
0 commit comments