Skip to content

Commit bd30631

Browse files
committed
Updated data to hopefully not hang on dataset preprocessing; adjusted predict to use bbox nms
1 parent 1c9a2a7 commit bd30631

File tree

8 files changed

+118
-165
lines changed

8 files changed

+118
-165
lines changed

Diff for: .gitignore

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
__pycache__/
22
.ipynb_checkpoints
33
*.pyc
4-
datasets/
5-
output/
6-
input/
7-
models/
4+
datasets
5+
output
6+
input
7+
models
8+
*.jpeg

Diff for: data.py

+76-61
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
import shutil
1414
from tqdm import tqdm
1515
from multiprocessing import Process, Queue
16-
16+
import queue
17+
1718
import pycocotools
1819
from detectron2.structures import BoxMode
1920

@@ -32,20 +33,20 @@ def compute_bbox(mask):
3233
def to_coco(dataset_path):
3334
image_paths = [img_path for img_path in absolute_paths(os.path.join(dataset_path, 'images')) if img_path.endswith('.png')]
3435
#target_paths = absolute_paths(os.path.join(dataset_path, 'targets'))
35-
36+
3637
dataset_dicts = []
37-
38+
3839
for idx, image_path in enumerate(image_paths):
3940
target_path = os.path.join(dataset_path, 'targets', os.path.splitext(os.path.basename(image_path))[0] + '.pkl')
4041
with open(target_path, 'rb') as f:
4142
target = pickle.load(f)
42-
43+
4344
record = {}
4445
record['file_name'] = image_path
4546
record['image_id'] = idx
4647
record['height'] = target['size'][1]
4748
record['width'] = target['size'][0]
48-
49+
4950
objs = []
5051
for m in target['masks']:
5152
annotation = {'segmentation': pycocotools.mask.encode(np.asarray(m, order="F")),
@@ -68,7 +69,7 @@ def get_image_from_url(url):
6869
response = requests.get(url, stream=True)
6970
if response.status_code == 200:
7071
break
71-
time.sleep(0.5)
72+
time.sleep(2)
7273
response.raw.decode_content = True
7374
img = Image.open(BytesIO(response.content)).convert('RGB')
7475
return img
@@ -77,7 +78,7 @@ def augment(image, masks, crop_size):
7778
# Brightness
7879
brightness_factor = np.random.normal()*0.2 + 1
7980
image = TF.adjust_brightness(image, brightness_factor)
80-
81+
8182
# Contrast
8283
contrast_factor = np.random.normal()*0.2 + 1
8384
image = TF.adjust_contrast(image, contrast_factor)
@@ -89,26 +90,26 @@ def augment(image, masks, crop_size):
8990
translate = np.random.randint(-30, 30, size=2).tolist()
9091
image = TF.affine(image, angle, translate, scale, shear, resample=PIL.Image.BILINEAR, fillcolor=None)
9192
masks = [TF.affine(mask, angle, translate, scale, shear, resample=PIL.Image.BILINEAR, fillcolor=None) for mask in masks]
92-
93+
9394
# Random crop
9495
i, j, h, w = transforms.RandomCrop.get_params(
9596
image, output_size=(crop_size, crop_size))
96-
97+
9798
image = TF.crop(image, i, j, h, w)
9899
masks = [TF.crop(mask, i, j, h, w) for mask in masks]
99100
# Random horizontal flipping
100101
if np.random.random() > 0.5:
101102
image = TF.hflip(image)
102103
masks = [TF.hflip(mask) for mask in masks]
103-
104+
104105
# Random vertical flipping
105106
if np.random.random() > 0.5:
106107
image = TF.vflip(image)
107108
masks = [TF.vflip(mask) for mask in masks]
108-
109+
109110
# squeeze and binarize
110111
masks = [(np.array(mask)[:, :, 0] > 0.5).astype(np.uint8) for mask in masks]
111-
112+
112113
# prune masks that have no object or only a sliver of an object
113114
masks = [mask for mask in masks if mask[10:-10, 10:-10].any()]
114115
return image, masks
@@ -122,76 +123,90 @@ def __init__(self, task_queue, result_queue, img, masks, out_path, crop_size):
122123
self.masks = masks
123124
self.out_path = out_path
124125
self.crop_size = crop_size
125-
126+
126127
def run(self):
127128
proc_name = self.name
128-
while True:
129-
index = self.task_queue.get()
130-
if index == -1: break
131-
sub_img, sub_masks = augment(self.img, self.masks, self.crop_size)
132-
target = {'masks': sub_masks, 'size': sub_img.size}
133-
save_mask_target(sub_img, target, f'{index:05d}', dataset_path=self.out_path)
134-
self.result_queue.put(index)
135-
return
136-
129+
while True:#not stopping.is_set():
130+
try:
131+
index = self.task_queue.get(True, 1)
132+
sub_img, sub_masks = augment(self.img, self.masks, self.crop_size)
133+
target = {'masks': sub_masks, 'size': sub_img.size}
134+
save_mask_target(sub_img, target, f'{index:05d}', dataset_path=self.out_path)
135+
self.result_queue.put(index)
136+
except queue.Empty:
137+
return
138+
137139
def download_dataset(json_path, out_path, samples_per_img=100, num_threads=16, num_processes=4, selected_ids=None, crop_size=256):
138140

139141
if os.path.exists(out_path):
140142
shutil.rmtree(out_path)
141143
os.makedirs(os.path.join(out_path, 'images'))
142144
os.makedirs(os.path.join(out_path, 'targets'))
143-
144-
145+
146+
145147
total_images = 0
146148
with open(json_path) as f:
147149
data = json.load(f)
148-
150+
149151
if selected_ids is not None:
150152
# Filter only selected images
151153
data = [img_obj for img_obj in data if img_obj['External ID'] in selected_ids]
152154

153155
task_queue = Queue()
154156
result_queue = Queue()
155-
156-
157+
157158
with tqdm(total=len(data)*samples_per_img) as pbar:
158159
for img_obj in data:
160+
159161
img_url = img_obj['Labeled Data']
162+
if 'objects' not in img_obj['Label']:
163+
continue
164+
160165
mask_urls = [instance['instanceURI'] for instance in img_obj['Label']['objects']]
161166

167+
162168
img = get_image_from_url(img_url)
169+
163170
masks = list(ThreadPool(num_threads).imap_unordered(get_image_from_url, mask_urls))
164-
171+
172+
for _ in range(samples_per_img):
173+
task_queue.put(total_images)
174+
total_images += 1
175+
165176
workers = []
166177
for proc_index in range(num_processes):
167178
p = Worker(task_queue, result_queue, img, masks, out_path, crop_size)
168179
p.daemon = True
169180
p.start()
170181
workers.append(p)
171-
172-
for _ in range(samples_per_img):
173-
task_queue.put(total_images)
174-
total_images += 1
175-
176-
for index in range(samples_per_img):
177-
i = result_queue.get()
178-
pbar.update(1)
179-
180-
for index in range(num_processes):
181-
task_queue.put(-1)
182+
183+
184+
182185
for worker in workers:
183-
worker.join()
186+
worker.join(200)
187+
for worker in workers:
188+
if worker.is_alive():
189+
print("Process timed out")
190+
191+
pbar.update(samples_per_img)
192+
# for index in range(samples_per_img):
193+
# while True:
194+
# try:
195+
# i = result_queue.get(True, 10)
196+
# pbar.update(1)
197+
# except queue.Empty:
198+
# break
199+
184200

185-
186201
def main():
187202
##########################
188-
json_path = 'datasets/dataset_export_2020-08-18.json'
189-
samples_per_img = 1 #300
203+
json_path = 'datasets/export-2020-08-21T20 16 28.026Z.json'
204+
samples_per_img = 300
190205
crop_size = 256
191206
##########################
192207
print('download dataset')
193-
194-
208+
209+
195210
train_dataset = [
196211
'image_part_001.jpg',
197212
'image_part_002.jpg',
@@ -201,34 +216,34 @@ def main():
201216
'image_part_006.jpg',
202217
'image_part_007.jpg',
203218
'image_part_008.jpg',
204-
205-
#'MC171180.JPG',
206-
#'MC171177.JPG',
207-
#'MC171179.JPG',
208-
#'MC171181.JPG',
209-
#'MC171178.JPG',
210-
219+
220+
'MC171180.JPG',
221+
'MC171177.JPG',
222+
'MC171179.JPG',
223+
'MC171181.JPG',
224+
'MC171178.JPG',
225+
211226
'1758_part_004.jpg',
212227
'1758_part_011.jpg',
213228
'1758_part_008.jpg',
214229
'1755_part_004.jpg',
215230
'1730_part_009.jpg',
216231
'0467_part_010.jpg',
217232
'0467_part_003.jpg',
218-
#'1773_train.JPG',
219-
#'1133_train.JPG',
220-
#'0576_train.JPG',
233+
'1773_train.JPG',
234+
'1133_train.JPG',
235+
'0576_train.JPG',
221236
]
222-
223-
224-
237+
238+
239+
225240
download_dataset(json_path,
226241
'datasets/cells_train_256',
227242
samples_per_img=samples_per_img,
228243
selected_ids=train_dataset,
229244
crop_size=crop_size,
230-
num_processes = 1,
231-
num_threads = 7)
245+
num_processes = 12,
246+
num_threads = 16)
232247

233248
if __name__ == '__main__':
234-
main()
249+
main()

Diff for: datasets/cells_train_256/images/00000.png

-48 KB
Binary file not shown.

Diff for: datasets/cells_train_256/targets/00000.pkl

-41 Bytes
Binary file not shown.

Diff for: datasets/dataset_export_2020-08-18.json

-64
This file was deleted.

0 commit comments

Comments
 (0)