-
Notifications
You must be signed in to change notification settings - Fork 0
/
process.py
99 lines (76 loc) · 3.41 KB
/
process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import torch
from skimage.io import imread
import numpy as np
SEG_CLASS_TO_RGBA = {
0: (0, 0, 255, 255), # Water
1: (55, 55, 55, 255), # Ground
2: (0, 255, 255, 255), # Building
3: (255, 212, 0, 255), # Traffic items
4: (0, 255, 0, 255), # Vegetation
5: (255, 97, 0, 255), # Terrain
6: (255, 0, 0, 255), # Car
7: (0, 0, 0, 0), # Trees
8: (255, 0, 255, 255), # Person
9: (0, 0, 0, 255), # Sky
10: (255, 255, 255, 255), # Default
}
RGBA_TO_SEG_CLASS = {
rgba_value: class_id for class_id, rgba_value in SEG_CLASS_TO_RGBA.items()
}
def load_unity_depth(unity_depth_path, far=1000):
"""Transforms the 3-channel encoded depth map from our Unity simulator
to 1-channel depth map containing metric depth values.
The depth is encoded in the following way:
- The information from the simulator is (1 - LinearDepth (in [0,1])).
far corresponds to the furthest distance to the camera included in the
depth map.
LinearDepth * far gives the real metric distance to the camera.
- depth is first divided in 31 slices encoded in R channel with values ranging
from 0 to 247
- each slice is divided again in 31 slices, whose value is encoded in G channel
- each of the G slices is divided into 256 slices, encoded in B channel
In total, we have a discretization of depth into N = 31*31*256 - 1 possible values,
covering a range of far/N meters.
Note that, what we encode here is 1 - LinearDepth so that the furthest point is
[0,0,0] (that is sky) and the closest point[255,255,255]
The metric distance associated to a pixel whose depth is (R,G,B) is :
d = (far/N) * [((255 - R)//8)*256*31 + ((255 - G)//8)*256 + (255 - B)]
Args:
unity_depth_path (str or pathlib.Path): Path to a single depth map from the
dataset
far (int, optional): [description]. Defaults to 1000.
Returns:
[numpy.array]: decoded metric depth with shape (1 x height x width).
"""
depth_image = imread(unity_depth_path).astype(np.float32)
R = depth_image[:, :, 0]
G = depth_image[:, :, 1]
B = depth_image[:, :, 2]
R = ((247 - R) / 8).type(torch.IntTensor)
G = ((247 - G) / 8).type(torch.IntTensor)
B = (255 - B).type(torch.IntTensor)
metric_depth = ((R * 256 * 31 + G * 256 + B).type(torch.FloatTensor)) / (
256 * 31 * 31 - 1
)
metric_depth = metric_depth * far
metric_depth = metric_depth[None, ...]
return metric_depth
def load_unity_segmap(unity_seg_path, default_value=10):
"""Load a segmentation RGBA image to a segmentation array with each
pixel being the index of the class
Args:
unity_seg_path (str or pathlib.Path): path to a segmentation RGBA image
from the dataset
default_value (int, optional): Class index for everything but known items.
Defaults to 10.
Returns:
numpy.array: array of size (1) x (H) x (W) with each pixel being the index
of the class
"""
seg_image = imread(unity_seg_path)
pixel_values = np.unique(np.reshape(seg_image, (-1, seg_image.shape[-1])), axis=0)
assert all(tuple(p) in RGBA_TO_SEG_CLASS for p in pixel_values)
out = np.ones((seg_image.shape[0], seg_image.shape[1])) * default_value
for class_id, rgba_value in SEG_CLASS_TO_RGBA.items():
out[np.where((seg_image == rgba_value).all(-1))] = class_id
return out[None, ...]