-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathselective_search.py
178 lines (156 loc) · 8.46 KB
/
selective_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import cv2
import argparse
import xml.etree.ElementTree as ET
# python selective_search.py ./JPEGImages/000480.jpg ./Annotations/000480.xml color
def get_intersection_area(box1, box2):
"""
Calculates the intersection area of two bounding boxes where (x1,y1) indicates the top left corner and (x2,y2)
indicates the bottom right corner
:param box1: List of coordinates(x1,y1,x2,y2) of box1
:param box2: List of coordinates(x1,y1,x2,y2) of box2
:return: float: area of intersection of the two boxes
"""
x1 = max(box1[0], box2[0])
x2 = min(box1[2], box2[2])
y1 = max(box1[1], box2[1])
y2 = min(box1[3], box2[3])
# Check for the condition if there is no overlap between the bounding boxes (either height or width
# of intersection box are negative)
if (x2 - x1 < 0) or (y2 - y1 < 0):
return 0.0
else:
return (x2 - x1 + 1) * (y2 - y1 + 1)
def calculate_iou(proposal_boxes, gt_boxes):
"""
Returns the bounding boxes that have Intersection over Union (IOU) > 0.5 with the ground truth boxes
:param proposal_boxes: List of proposed bounding boxes(x1,y1,x2,y2) where (x1,y1) indicates the top left corner
and (x2,y2) indicates the bottom right corner of the proposed bounding box
:param gt_boxes: List of ground truth boxes(x1,y1,x2,y2) where (x1,y1) indicates the top left corner and (x2,y2)
indicates the bottom right corner of the ground truth box
:return iou_qualified_boxes: List of all proposed bounding boxes that have IOU > 0.5 with any of the ground
truth boxes
:return final_boxes: List of the best proposed bounding box with each of the ground truth box (if available)
"""
iou_qualified_boxes = []
final_boxes = []
for gt_box in gt_boxes:
best_box_iou = 0
best_box = 0
area_gt_box = (gt_box[2] - gt_box[0]) * (gt_box[3] - gt_box[1])
for prop_box in proposal_boxes:
area_prop_box = (prop_box[2] - prop_box[0] + 1) * (prop_box[3] - prop_box[1] + 1)
intersection_area = get_intersection_area(prop_box, gt_box)
union_area = area_prop_box + area_gt_box - intersection_area
iou = float(intersection_area) / float(union_area)
if iou > 0.5:
iou_qualified_boxes.append(prop_box)
if iou > best_box_iou:
best_box_iou = iou
best_box = prop_box
if best_box_iou != 0:
final_boxes.append(best_box)
return iou_qualified_boxes, final_boxes
def get_groundtruth_boxes(annoted_img_path):
"""
Parses the xml file of the annotated image to obtain the ground truth boxes
:param annoted_img_path: String: File path of the annotated image containing the ground truth
:return gt_boxes: List of ground truth boxes(x1,y1,x2,y2) where (x1,y1) indicates the top left corner and (x2,y2)
indicates the bottom right corner of the ground truth box
"""
gt_boxes = []
tree = ET.parse(annoted_img_path)
root = tree.getroot()
for items in root.findall('object/bndbox'):
xmin = items.find('xmin')
ymin = items.find('ymin')
xmax = items.find('xmax')
ymax = items.find('ymax')
gt_boxes.append([int(xmin.text), int(ymin.text), int(xmax.text), int(ymax.text)])
return gt_boxes
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("input_image_path", default="./HW2_Data/JPEGImages/000480.jpg", type=str,
help="Enter the image path")
parser.add_argument("annotated_image_path", default="./HW2_Data/Annotations/000480.xml", type=str,
help="Enter the annotated image path")
parser.add_argument("strategy", default="color", type=str,
help="Enter the strategy - color for color strategy, all for all strategies")
args = parser.parse_args()
img_path = args.input_image_path
annotated_img_path = args.annotated_image_path
img = cv2.imread(img_path)
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
# Convert image from BGR (default color in OpenCV) to RGB
rgb_im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
ss.addImage(rgb_im)
gs = cv2.ximgproc.segmentation.createGraphSegmentation()
# gs.setK(150)
# gs.setSigma(0.8)
ss.addGraphSegmentation(gs)
# Creating strategy using color similarity
if args.strategy == "color":
strategy_color = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategyColor()
ss.addStrategy(strategy_color)
# Creating strategy using all similarities (size,color,fill,texture)
elif args.strategy == "all":
strategy_color = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategyColor()
strategy_fill = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategyFill()
strategy_size = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategySize()
strategy_texture = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategyTexture()
strategy_multiple = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategyMultiple(
strategy_color, strategy_fill, strategy_size, strategy_texture)
ss.addStrategy(strategy_multiple)
get_boxes = ss.process()
print("Total proposals = ", len(get_boxes))
proposal_box_limit = 100
# Convert (x,y,w,h) parameters for the top 100 proposal boxes returned from ss.process() command into
# (x, y, x+w, y+h) parameters to be consistent with the xml tags of the ground truth boxes where
# (x,y) indicates the top left corner and (x+w,y+h) indicates the bottom right corner of bounding box
boxes = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in get_boxes[0:proposal_box_limit]]
output_img_proposal_top100 = img.copy()
output_img_iou_qualified = img.copy()
output_img_final = img.copy()
# Fetch all ground truth boxes from the annotated image file
gt_boxes = get_groundtruth_boxes(annotated_img_path)
print("Number of Ground Truth Boxes = ", len(gt_boxes))
# Draw bounding boxes for top 100 proposals
for i in range(0, len(boxes)):
top_x, top_y, bottom_x, bottom_y = boxes[i]
cv2.rectangle(output_img_proposal_top100, (top_x, top_y), (bottom_x, bottom_y), (0, 255, 0), 1, cv2.LINE_AA)
cv2.imshow("Output_Top_100_Proposals", output_img_proposal_top100)
cv2.imwrite("./Results/Output_Top_100_Proposals.png", output_img_proposal_top100)
cv2.waitKey()
cv2.destroyAllWindows()
# Fetch all proposed bounding boxes that have IOU > 0.5 with any of the ground truth boxes and also the bounding box
# that has the maximum/best overlap for each ground truth box
iou_qualified_boxes, final_boxes = calculate_iou(boxes, gt_boxes)
print("Number of Qualified Boxes with IOU > 0.5 = ", len(iou_qualified_boxes))
print("Qualified Boxes = ", iou_qualified_boxes)
# Draw bounding boxes for iou_qualified_boxes
for i in range(0, len(iou_qualified_boxes)):
top_x, top_y, bottom_x, bottom_y = iou_qualified_boxes[i]
cv2.rectangle(output_img_iou_qualified, (top_x, top_y), (bottom_x, bottom_y), (0, 255, 0), 1, cv2.LINE_AA)
for i in range(0, len(gt_boxes)):
top_x, top_y, bottom_x, bottom_y = gt_boxes[i]
cv2.rectangle(output_img_iou_qualified, (top_x, top_y), (bottom_x, bottom_y), (0, 0, 255), 1, cv2.LINE_AA)
cv2.imshow("Output_IOU_Qualified_Proposals", output_img_iou_qualified)
cv2.imwrite("./Results/Output_IOU_Qualified_Proposals.png", output_img_iou_qualified)
cv2.waitKey()
cv2.destroyAllWindows()
print("Number of final boxes = ", len(final_boxes))
print("Final boxes = ", final_boxes)
# Recall is calculated as the fraction of ground truth boxes that overlap with at least one proposal box with
# Intersection over Union (IoU) > 0.5
recall = len(final_boxes) / len(gt_boxes)
print("Recall = ", recall)
# Draw bounding boxes for final_boxes
for i in range(0, len(final_boxes)):
top_x, top_y, bottom_x, bottom_y = final_boxes[i]
cv2.rectangle(output_img_final, (top_x, top_y), (bottom_x, bottom_y), (0, 255, 0), 1, cv2.LINE_AA)
for i in range(0, len(gt_boxes)):
top_x, top_y, bottom_x, bottom_y = gt_boxes[i]
cv2.rectangle(output_img_final, (top_x, top_y), (bottom_x, bottom_y), (0, 0, 255), 1, cv2.LINE_AA)
cv2.imshow("Output_Final_Boxes", output_img_final)
cv2.imwrite("./Results/output_img_final.png", output_img_final)
cv2.waitKey()
cv2.destroyAllWindows()