-
Notifications
You must be signed in to change notification settings - Fork 0
/
left_right_consistency.py
167 lines (139 loc) · 8.26 KB
/
left_right_consistency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
"""
This is the python script version of the
confidence estimation technique.
Very little explanations are provided for the methods in this script.
To see a detailed overview of this implementation see the jupyter
notebook with the same name.
"""
import argparse
import time as t
import cv2
import numpy as np
from sgm_numpy import sgm
from evaluation_funcs import calc_roc, calc_auc
from helper_funcs import filter_disparity, select_disparity, normalize, colorize_image
def left_right_consistency(left_disp, right_disp, csize, height, width, left_is_reference=True):
"""
Creates a confidence map (H x W) using the left-right
consistency technique from the left and right disparity maps (H x W).
The confidence map is calculated for the left image by setting left_is_reference = True and
for the right image by setting left_is_reference = False.
Arguments:
- left_disp: Array containing the left image disparities, with dimension H x W.
- right_disp: Array containing the right image disparities, with dimension H x W.
- csize: kernel size for the census transform.
- height: number of rows of the image.
- width: number of columns of the image.
- left_is_reference: Bool, specifying whether left or right image is the reference image.
The confidence map will only be calculated for the reference image.
Returns: Confidence map with normalized values (0-1) and
dimensions H x W.
"""
# Create an array containing the x-coords for each pixel
x_coords = np.repeat(np.arange(width)[None, :], repeats=height, axis=0)
# Calculate left image confidences
if left_is_reference:
new_x_coords = x_coords - left_disp
# Prevent x indices going past the left border
new_x_coords[new_x_coords < 0] = 0
# Gets the disparities in the right image for pixels with x coords = x - d1
new_right_disparities = np.take_along_axis(right_disp, indices=new_x_coords, axis=1)
# Calculate LRC
lrc = -np.abs(left_disp - new_right_disparities)
# Calculate right image confidences
else:
new_x_coords = x_coords + right_disp
# Prevent x indices going past the right border
new_x_coords[new_x_coords >= width] = width - 1
# Gets the disparities in the left image for pixels with x coords = x + d1
new_left_disparities = np.take_along_axis(left_disp, indices=new_x_coords, axis=1)
# Calculate LRC
lrc = -np.abs(right_disp - new_left_disparities)
# Normalize to get confidence values between 0-1
minimum = np.min(lrc)
maximum = np.max(lrc)
confidence = (lrc - minimum) / (maximum - minimum)
# Set border pixels confidence to 0 (since disparity couldn't be calculated there)
confidence[:csize[0], :] = 0 # top border
confidence[-csize[0]:, :] = 0 # bottom border
confidence[:, :csize[1]] = 0 # left border
confidence[:, -csize[1]:] = 0 # right border
return confidence
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--left', default='teddy/im2.png', help='name (path) to the left image')
parser.add_argument('--right', default='teddy/im6.png', help='name (path) to the right image')
parser.add_argument('--left_gt', default='teddy/disp2.png', help='name (path) to the left ground-truth image')
parser.add_argument('--right_gt', default='teddy/disp6.png', help='name (path) to the right ground-truth image')
parser.add_argument('--output', default='disparity_map.png', help='name of the output image')
parser.add_argument('--disp', default=64, type=int, help='maximum disparity for the stereo pair')
parser.add_argument('--save', default=False, type=bool, help='save disparity and confidence maps')
parser.add_argument('--eval', default=True, type=bool, help='evaluate disparity map with 3 pixel error')
parser.add_argument('--p1', default=10, type=int, help='penalty for disparity difference = 1')
parser.add_argument('--p2', default=120, type=int, help='penalty for disparity difference > 1')
parser.add_argument('--csize', default=[5, 5], nargs="+", type=int, help='size of the kernel for the census transform')
parser.add_argument('--bsize', default=[3, 3], nargs="+", type=int, help='size of the kernel for blurring the images and median filtering')
parser.add_argument('--tau', default=3, type=int, help='Used for D1 error rate: number of disparities with difference > tau')
parser.add_argument('--conf_threshold', default=0.95, type=float, help='Retain disparitys with confidence greater than the conf_threshold')
args = parser.parse_args()
left_name = args.left
right_name = args.right
left_gt_name = args.left_gt
right_gt_name = args.right_gt
output_name = args.output
save_images = args.save
evaluation = args.eval
max_disparity = args.disp
P1 = args.p1
P2 = args.p2
csize = args.csize
bsize = args.bsize
tau = args.tau
conf_threshold = args.conf_threshold
dawn = t.time()
print('\nLoading images...')
left = cv2.imread(left_name, cv2.IMREAD_GRAYSCALE)
right = cv2.imread(right_name, cv2.IMREAD_GRAYSCALE)
height = left.shape[0]
width = left.shape[1]
assert left.shape[0] == right.shape[0] and left.shape[1] == right.shape[1], 'left & right must have the same shape.'
assert max_disparity > 0, 'maximum disparity must be greater than 0.'
print("\nPerforming SGM...")
left_aggregation_volume, right_aggregation_volume = sgm(left, right, max_disparity, P1, P2, csize, bsize, height, width)
left_disparity = select_disparity(left_aggregation_volume)
right_disparity = select_disparity(right_aggregation_volume)
print("\nCalculating confidences...")
left_confidences = left_right_consistency(left_disparity, right_disparity, csize, height, width, left_is_reference=True)
right_confidences = left_right_consistency(left_disparity, right_disparity, csize, height, width, left_is_reference=False)
print('\nSelecting best disparities...')
left_disparity_map = np.uint8(normalize(select_disparity(left_aggregation_volume), max_disparity))
right_disparity_map = np.uint8(normalize(select_disparity(right_aggregation_volume), max_disparity))
print('\nApplying median filter...')
left_disparity_map = cv2.medianBlur(left_disparity_map, bsize[0])
right_disparity_map = cv2.medianBlur(right_disparity_map, bsize[0])
print("\nFiltering disparities...")
left_filtered_disp = filter_disparity(left_disparity_map, left_confidences, conf_threshold)
right_filtered_disp = filter_disparity(right_disparity_map, right_confidences, conf_threshold)
if save_images:
cv2.imwrite(f'left_{output_name}', colorize_image(left_disparity_map, "jet"))
cv2.imwrite(f'right_{output_name}', colorize_image(right_disparity_map, "jet"))
cv2.imwrite(f'left_conf_{output_name}', colorize_image(np.uint8(left_confidences * 255), "winter"))
cv2.imwrite(f'right_conf_{output_name}', colorize_image(np.uint8(right_confidences * 255), "winter"))
cv2.imwrite(f'left_filtered_{output_name}', colorize_image(left_filtered_disp, "jet"))
cv2.imwrite(f'right_filtered_{output_name}', colorize_image(right_filtered_disp, "jet"))
if evaluation:
left_gt = cv2.imread(left_gt_name, cv2.IMREAD_GRAYSCALE)
right_gt = cv2.imread(right_gt_name, cv2.IMREAD_GRAYSCALE)
# Calculate ROC/optimal ROC curves
left_roc, left_optimal_roc, left_mean_error, left_sorted_conf, ratio_pixels = calc_roc(left_disparity_map, left_gt, left_confidences, tau)
right_roc, right_optimal_roc, right_mean_error, right_sorted_conf, ratio_pixels = calc_roc(right_disparity_map, right_gt, right_confidences, tau)
# Calculate AUC/optimal AUC
left_auc, left_optimal_auc = calc_auc(left_roc, left_optimal_roc)
right_auc, right_optimal_auc = calc_auc(right_roc, right_optimal_roc)
print("\nLeft Results: ")
print(f"\tTotal Error: {left_mean_error:.3f}, AUC: {left_auc:.3f}, Optimal AUC: {left_optimal_auc:.3f}")
print("\nRight Results: ")
print(f"\tTotal Error: {right_mean_error:.3f}, AUC: {right_auc:.3f}, Optimal AUC: {right_optimal_auc:.3f}")
dusk = t.time()
print('\nFinished')
print('\nTotal execution time = {:.2f}s'.format(dusk - dawn))