19
19
import numpy as np
20
20
import json
21
21
import imageio
22
+ from glob import glob
23
+ import cv2 as cv
22
24
from pathlib import Path
23
25
from plyfile import PlyData , PlyElement
24
26
from utils .sh_utils import SH2RGB
25
27
from scene .gaussian_model import BasicPointCloud
26
28
29
+
27
30
class CameraInfo (NamedTuple ):
28
31
uid : int
29
32
R : np .array
@@ -35,7 +38,8 @@ class CameraInfo(NamedTuple):
35
38
image_name : str
36
39
width : int
37
40
height : int
38
- depth : np .array
41
+ fid : int
42
+
39
43
40
44
class SceneInfo (NamedTuple ):
41
45
point_cloud : BasicPointCloud
@@ -44,6 +48,29 @@ class SceneInfo(NamedTuple):
44
48
nerf_normalization : dict
45
49
ply_path : str
46
50
51
+
52
+ def load_K_Rt_from_P (filename , P = None ):
53
+ if P is None :
54
+ lines = open (filename ).read ().splitlines ()
55
+ if len (lines ) == 4 :
56
+ lines = lines [1 :]
57
+ lines = [[x [0 ], x [1 ], x [2 ], x [3 ]] for x in (x .split (" " ) for x in lines )]
58
+ P = np .asarray (lines ).astype (np .float32 ).squeeze ()
59
+
60
+ out = cv .decomposeProjectionMatrix (P )
61
+ K = out [0 ]
62
+ R = out [1 ]
63
+ t = out [2 ]
64
+
65
+ K = K / K [2 , 2 ]
66
+
67
+ pose = np .eye (4 , dtype = np .float32 )
68
+ pose [:3 , :3 ] = R .transpose ()
69
+ pose [:3 , 3 ] = (t [:3 ] / t [3 ])[:, 0 ]
70
+
71
+ return K , pose
72
+
73
+
47
74
def getNerfppNorm (cam_info ):
48
75
def get_center_and_diag (cam_centers ):
49
76
cam_centers = np .hstack (cam_centers )
@@ -67,12 +94,13 @@ def get_center_and_diag(cam_centers):
67
94
68
95
return {"translate" : translate , "radius" : radius }
69
96
97
+
70
98
def readColmapCameras (cam_extrinsics , cam_intrinsics , images_folder ):
71
99
cam_infos = []
72
100
for idx , key in enumerate (cam_extrinsics ):
73
101
sys .stdout .write ('\r ' )
74
102
# the exact output you're looking for:
75
- sys .stdout .write ("Reading camera {}/{}" .format (idx + 1 , len (cam_extrinsics )))
103
+ sys .stdout .write ("Reading camera {}/{}" .format (idx + 1 , len (cam_extrinsics )))
76
104
sys .stdout .flush ()
77
105
78
106
extr = cam_extrinsics [key ]
@@ -84,11 +112,11 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
84
112
R = np .transpose (qvec2rotmat (extr .qvec ))
85
113
T = np .array (extr .tvec )
86
114
87
- if intr .model == "SIMPLE_PINHOLE" :
115
+ if intr .model == "SIMPLE_PINHOLE" :
88
116
focal_length_x = intr .params [0 ]
89
117
FovY = focal2fov (focal_length_x , height )
90
118
FovX = focal2fov (focal_length_x , width )
91
- elif intr .model == "PINHOLE" :
119
+ elif intr .model == "PINHOLE" :
92
120
focal_length_x = intr .params [0 ]
93
121
focal_length_y = intr .params [1 ]
94
122
FovY = focal2fov (focal_length_y , height )
@@ -106,6 +134,7 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
106
134
sys .stdout .write ('\n ' )
107
135
return cam_infos
108
136
137
+
109
138
def fetchPly (path ):
110
139
plydata = PlyData .read (path )
111
140
vertices = plydata ['vertex' ]
@@ -114,12 +143,13 @@ def fetchPly(path):
114
143
normals = np .vstack ([vertices ['nx' ], vertices ['ny' ], vertices ['nz' ]]).T
115
144
return BasicPointCloud (points = positions , colors = colors , normals = normals )
116
145
146
+
117
147
def storePly (path , xyz , rgb ):
118
148
# Define the dtype for the structured array
119
149
dtype = [('x' , 'f4' ), ('y' , 'f4' ), ('z' , 'f4' ),
120
- ('nx' , 'f4' ), ('ny' , 'f4' ), ('nz' , 'f4' ),
121
- ('red' , 'u1' ), ('green' , 'u1' ), ('blue' , 'u1' )]
122
-
150
+ ('nx' , 'f4' ), ('ny' , 'f4' ), ('nz' , 'f4' ),
151
+ ('red' , 'u1' ), ('green' , 'u1' ), ('blue' , 'u1' )]
152
+
123
153
normals = np .zeros_like (xyz )
124
154
125
155
elements = np .empty (xyz .shape [0 ], dtype = dtype )
@@ -131,6 +161,7 @@ def storePly(path, xyz, rgb):
131
161
ply_data = PlyData ([vertex_element ])
132
162
ply_data .write (path )
133
163
164
+
134
165
def readColmapSceneInfo (path , images , eval , llffhold = 8 ):
135
166
try :
136
167
cameras_extrinsic_file = os .path .join (path , "sparse/0" , "images.bin" )
@@ -144,8 +175,9 @@ def readColmapSceneInfo(path, images, eval, llffhold=8):
144
175
cam_intrinsics = read_intrinsics_text (cameras_intrinsic_file )
145
176
146
177
reading_dir = "images" if images == None else images
147
- cam_infos_unsorted = readColmapCameras (cam_extrinsics = cam_extrinsics , cam_intrinsics = cam_intrinsics , images_folder = os .path .join (path , reading_dir ))
148
- cam_infos = sorted (cam_infos_unsorted .copy (), key = lambda x : x .image_name )
178
+ cam_infos_unsorted = readColmapCameras (cam_extrinsics = cam_extrinsics , cam_intrinsics = cam_intrinsics ,
179
+ images_folder = os .path .join (path , reading_dir ))
180
+ cam_infos = sorted (cam_infos_unsorted .copy (), key = lambda x : x .image_name )
149
181
150
182
if eval :
151
183
train_cam_infos = [c for idx , c in enumerate (cam_infos ) if idx % llffhold != 0 ]
@@ -178,6 +210,7 @@ def readColmapSceneInfo(path, images, eval, llffhold=8):
178
210
ply_path = ply_path )
179
211
return scene_info
180
212
213
+
181
214
def readCamerasFromTransforms (path , transformsfile , white_background , extension = ".png" ):
182
215
cam_infos = []
183
216
@@ -194,8 +227,8 @@ def readCamerasFromTransforms(path, transformsfile, white_background, extension=
194
227
depth_name = os .path .join (path , frame ["file_path" ] + "_depth0000" + '.exr' )
195
228
196
229
matrix = np .linalg .inv (np .array (frame ["transform_matrix" ]))
197
- R = - np .transpose (matrix [:3 ,:3 ])
198
- R [:,0 ] = - R [:,0 ]
230
+ R = - np .transpose (matrix [:3 , :3 ])
231
+ R [:, 0 ] = - R [:, 0 ]
199
232
T = - matrix [:3 , 3 ]
200
233
201
234
image_path = os .path .join (path , cam_name )
@@ -205,27 +238,29 @@ def readCamerasFromTransforms(path, transformsfile, white_background, extension=
205
238
206
239
im_data = np .array (image .convert ("RGBA" ))
207
240
208
- bg = np .array ([1 ,1 , 1 ]) if white_background else np .array ([0 , 0 , 0 ])
241
+ bg = np .array ([1 , 1 , 1 ]) if white_background else np .array ([0 , 0 , 0 ])
209
242
210
243
norm_data = im_data / 255.0
211
- arr = norm_data [:,:, :3 ] * norm_data [:, :, 3 :4 ] + bg * (1 - norm_data [:, :, 3 :4 ])
212
- image = Image .fromarray (np .array (arr * 255.0 , dtype = np .byte ), "RGB" )
244
+ arr = norm_data [:, :, :3 ] * norm_data [:, :, 3 :4 ] + bg * (1 - norm_data [:, :, 3 :4 ])
245
+ image = Image .fromarray (np .array (arr * 255.0 , dtype = np .byte ), "RGB" )
213
246
214
247
fovy = focal2fov (fov2focal (fovx , image .size [0 ]), image .size [1 ])
215
- FovY = fovx
248
+ FovY = fovx
216
249
FovX = fovy
217
250
218
251
cam_infos .append (CameraInfo (uid = idx , R = R , T = T , FovY = FovY , FovX = FovX , image = image ,
219
- image_path = image_path , image_name = image_name , width = image .size [0 ], height = image .size [1 ], depth = depth ))
220
-
252
+ image_path = image_path , image_name = image_name , width = image .size [0 ],
253
+ height = image .size [1 ], depth = depth ))
254
+
221
255
return cam_infos
222
256
257
+
223
258
def readNerfSyntheticInfo (path , white_background , eval , extension = ".png" ):
224
259
print ("Reading Training Transforms" )
225
260
train_cam_infos = readCamerasFromTransforms (path , "transforms_train.json" , white_background , extension )
226
261
print ("Reading Test Transforms" )
227
262
test_cam_infos = readCamerasFromTransforms (path , "transforms_test.json" , white_background , extension )
228
-
263
+
229
264
if not eval :
230
265
train_cam_infos .extend (test_cam_infos )
231
266
test_cam_infos = []
@@ -237,7 +272,7 @@ def readNerfSyntheticInfo(path, white_background, eval, extension=".png"):
237
272
# Since this data set has no colmap data, we start with random points
238
273
num_pts = 100_000
239
274
print (f"Generating random point cloud ({ num_pts } )..." )
240
-
275
+
241
276
# We create random points inside the bounds of the synthetic Blender scenes
242
277
xyz = np .random .random ((num_pts , 3 )) * 2.6 - 1.3
243
278
shs = np .random .random ((num_pts , 3 )) / 255.0
@@ -256,7 +291,93 @@ def readNerfSyntheticInfo(path, white_background, eval, extension=".png"):
256
291
ply_path = ply_path )
257
292
return scene_info
258
293
294
+
295
+ def readDTUCameras (path , render_camera , object_camera ):
296
+ camera_dict = np .load (os .path .join (path , render_camera ))
297
+ images_lis = sorted (glob (os .path .join (path , 'image/*.png' )))
298
+ masks_lis = sorted (glob (os .path .join (path , 'mask/*.png' )))
299
+ n_images = len (images_lis )
300
+ cam_infos = []
301
+ for idx , image_path in enumerate (images_lis ):
302
+ image = np .array (Image .open (image_path ))
303
+ mask = np .array (imageio .imread (masks_lis [idx ])) / 255.0
304
+ image = Image .fromarray ((image * mask ).astype (np .uint8 ))
305
+ world_mat = camera_dict ['world_mat_%d' % idx ].astype (np .float32 )
306
+ fid = camera_dict ['fid_%d' % idx ]
307
+ image_name = Path (image_path ).stem
308
+ scale_mat = camera_dict ['scale_mat_%d' % idx ].astype (np .float32 )
309
+ P = world_mat @ scale_mat
310
+ P = P [:3 , :4 ]
311
+
312
+ K , pose = load_K_Rt_from_P (None , P )
313
+ a = pose [0 :1 , :]
314
+ b = pose [1 :2 , :]
315
+ c = pose [2 :3 , :]
316
+
317
+ pose = np .concatenate ([a , - c , - b , pose [3 :, :]], 0 )
318
+
319
+ S = np .eye (3 )
320
+ S [1 , 1 ] = - 1
321
+ S [2 , 2 ] = - 1
322
+ pose [1 , 3 ] = - pose [1 , 3 ]
323
+ pose [2 , 3 ] = - pose [2 , 3 ]
324
+ pose [:3 , :3 ] = S @ pose [:3 , :3 ] @ S
325
+
326
+ a = pose [0 :1 , :]
327
+ b = pose [1 :2 , :]
328
+ c = pose [2 :3 , :]
329
+
330
+ pose = np .concatenate ([a , c , b , pose [3 :, :]], 0 )
331
+
332
+ pose [:, 3 ] *= 0.5
333
+
334
+ matrix = np .linalg .inv (pose )
335
+ R = - np .transpose (matrix [:3 , :3 ])
336
+ R [:, 0 ] = - R [:, 0 ]
337
+ T = - matrix [:3 , 3 ]
338
+
339
+ FovY = focal2fov (K [0 , 0 ], image .size [1 ])
340
+ FovX = focal2fov (K [0 , 0 ], image .size [0 ])
341
+ cam_info = CameraInfo (uid = idx , R = R , T = T , FovY = FovY , FovX = FovX , image = image ,
342
+ image_path = image_path , image_name = image_name , width = image .size [0 ], height = image .size [1 ], fid = fid )
343
+ cam_infos .append (cam_info )
344
+ sys .stdout .write ('\n ' )
345
+ return cam_infos
346
+
347
+
348
+ def readNeuSDTUInfo (path , render_camera , object_camera ):
349
+ print ("Reading DTU Info" )
350
+ train_cam_infos = readDTUCameras (path , render_camera , object_camera )
351
+
352
+ nerf_normalization = getNerfppNorm (train_cam_infos )
353
+
354
+ ply_path = os .path .join (path , "points3d.ply" )
355
+ if not os .path .exists (ply_path ):
356
+ # Since this data set has no colmap data, we start with random points
357
+ num_pts = 100_000
358
+ print (f"Generating random point cloud ({ num_pts } )..." )
359
+
360
+ # We create random points inside the bounds of the synthetic Blender scenes
361
+ xyz = np .random .random ((num_pts , 3 )) * 2.6 - 1.3
362
+ shs = np .random .random ((num_pts , 3 )) / 255.0
363
+ pcd = BasicPointCloud (points = xyz , colors = SH2RGB (shs ), normals = np .zeros ((num_pts , 3 )))
364
+
365
+ storePly (ply_path , xyz , SH2RGB (shs ) * 255 )
366
+ try :
367
+ pcd = fetchPly (ply_path )
368
+ except :
369
+ pcd = None
370
+
371
+ scene_info = SceneInfo (point_cloud = pcd ,
372
+ train_cameras = train_cam_infos ,
373
+ test_cameras = [],
374
+ nerf_normalization = nerf_normalization ,
375
+ ply_path = ply_path )
376
+ return scene_info
377
+
378
+
259
379
sceneLoadTypeCallbacks = {
260
380
"Colmap" : readColmapSceneInfo ,
261
- "Blender" : readNerfSyntheticInfo
262
- }
381
+ "Blender" : readNerfSyntheticInfo ,
382
+ "DTU" : readNeuSDTUInfo ,
383
+ }
0 commit comments