-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathimage_anno_transforms.py
121 lines (92 loc) · 3.26 KB
/
image_anno_transforms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import random
from PIL import Image, ImageOps
import torchvision.transforms as transforms
def normalize_image():
"""Zero-mean input image for faster training."""
return transforms.Normalize(
mean=[0.5]*3,
std=[0.25]*3)
class RandomCropWithAnno:
"""
Crop the given PIL.Image at a random location along with annotations.
Do not drop or crop annotations.
Args:
pad_val: percent of image width or height to pad
"""
def __init__(self, pad_val):
self.pad_val = pad_val
def __call__(self, img, anno):
"""
Args:
img (PIL.Image): Image to be cropped.
anno: corresponding annotation.
Returns:
PIL.Image: Cropped image + annotation.
"""
ow, oh = img.size
border = tuple([int(ow*self.pad_val), int(oh*self.pad_val)] * 2)
img_padded = ImageOps.expand(img, border=border, fill=0)
w, h = img_padded.size
x1 = random.randint(0, w - ow)
y1 = random.randint(0, h - oh)
img_out = img_padded.crop((x1, y1, x1 + ow, y1 + oh))
anno_offs_x = border[0] - x1
anno_offs_y = border[1] - y1
for obj in anno:
bbox = obj['bbox']
bbox += [anno_offs_x, anno_offs_y] * 2
return img_out, anno
class RandomHorizontalFlipWithAnno:
"""Horizontally flip the given PIL.Image + annotation randomly with a probability of 0.5."""
def __call__(self, img, anno):
"""
Args:
img (PIL.Image): Image to be flipped.
anno: corresponding annotation.
Returns:
PIL.Image: Randomly flipped image + annotation.
"""
if random.random() < 0.5:
img = img.transpose(Image.FLIP_LEFT_RIGHT)
width = img.width
for obj in anno:
bbox = obj['bbox']
new_left = width - bbox[2]
new_right = width - bbox[0]
bbox[0] = new_left
bbox[2] = new_right
return img, anno
class ComposeVariadic:
"""
Composes several transforms together. Processes lists of image + annotation + whatever.
Args:
transforms (list of ``Transform`` objects): list of transforms to compose.
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, *args):
for t in self.transforms:
args = t(*args)
return args
class MapImageAndAnnoToInputWindow:
"""Transform to map any image + anno to network input format."""
def __init__(self, input_resolution):
self.input_resolution = input_resolution
self.transform = transforms.Compose([
transforms.Resize(input_resolution, interpolation=Image.BILINEAR),
transforms.ToTensor(),
normalize_image(),
])
def __call__(self, img, anno):
img_out = self.transform(img)
if anno is not None:
anno_out = []
for obj in anno:
obj_out = {
'type': obj['type'],
'bbox': obj['bbox'] / ([img.width, img.height] * 2) # left top right bottom
}
anno_out.append(obj_out)
else:
anno_out = None
return img_out, anno_out