-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprepare_dataset.py
140 lines (119 loc) · 5.48 KB
/
prepare_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
"""
HSR Dataset Preparation
This script converts preprocessed data into the format required by HSR. It processes and
organizes images, masks, depth maps, SMPL parameters and camera poses into a standardized
structure.
Arguments:
--data_dir: output directory for the processed data
Output Structure:
data_dir/
└── processed/
├── image/ # input images
├── sam_mask/ # human segmentation masks
├── depth/ # monocular depth maps
├── normal/ # monocular normal maps
├── cameras.npz # camera parameters
├── cameras_normalize.npz # camera parameters with dummy scale matrices
├── mean_shape.npy # scale and mean SMPL shape parameters
├── poses.npy # SMPL pose parameters
├── normalize_trans.npy # SMPL translations
├── intrinsic.npy # camera intrinsics
├── c2ws.npy # camera-to-world matrices
└── scene_pcd.ply # scene point cloud
"""
import argparse
import glob
import pickle as pkl
import re
import shutil
from pathlib import Path
import cv2
import numpy as np
from tqdm import tqdm
def load_normalized_data(input_dir):
intrinsic = np.load(input_dir / "intrinsic.npy")
with open(input_dir / "c2ws.pkl", "rb") as f:
c2ws = pkl.load(f)
with open(input_dir / "aligned_smpl.pkl", "rb") as f:
aligned_smpl = pkl.load(f)
return intrinsic, c2ws, aligned_smpl
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--data_dir", type=Path, required=True)
args = parser.parse_args()
data_dir = args.data_dir
print("Convert the processed data to the format required by HSR")
img_dir = data_dir / "images" / "selected_frames"
sam_mask_dir = data_dir / "masks" / "sam2_human"
depth_dir = data_dir / "monocular_cues" / "metric3d" / "depth"
normal_dir = data_dir / "monocular_cues" / "metric3d" / "normal"
save_dir = data_dir / "processed"
save_dir.mkdir(exist_ok=True)
(save_dir / "image").mkdir(parents=False, exist_ok=True)
(save_dir / "sam_mask").mkdir(parents=False, exist_ok=True)
(save_dir / "depth").mkdir(parents=False, exist_ok=True)
(save_dir / "normal").mkdir(parents=False, exist_ok=True)
img_pattern = re.compile(r"(\d+).(jpg|jpeg|png)$", re.IGNORECASE)
img_paths = [file for file in img_dir.glob("*") if img_pattern.match(file.name)]
img_paths = sorted(img_paths)
sam_mask_paths = sorted(glob.glob(f"{sam_mask_dir}/*.jpg"))
depth_paths = sorted(glob.glob(f"{depth_dir}/*.npy"))
normal_paths = sorted(glob.glob(f"{normal_dir}/*.npy"))
intrinsic, c2ws, aligned_smpl = load_normalized_data(data_dir / "normalized")
K = np.eye(4, dtype=np.float32)
K[:3, :3] = intrinsic
input_img = cv2.imread(img_paths[0])
img_h, img_w = input_img.shape[:2]
output_trans = []
output_pose = []
output_scale = []
output_shape = []
output_P = {}
c2w_list = []
suffix = next(iter(c2ws.keys())).split(".")[1]
for idx, img_path in enumerate(tqdm(img_paths)):
img_path = str(img_path)
assert idx == int(img_path.split("/")[-1].split(".")[0])
img = cv2.imread(img_path)
sam_mask = cv2.imread(sam_mask_paths[idx])
depth = np.load(depth_paths[idx])
normal = np.load(normal_paths[idx])
cv2.imwrite(str(save_dir / "image" / f"{idx:04d}.png"), img)
cv2.imwrite(str(save_dir / "sam_mask" / f"{idx:04d}.png"), sam_mask)
np.save(save_dir / "depth" / f"{idx:04d}.npy", depth)
np.save(save_dir / "normal" / f"{idx:04d}.npy", normal)
smpl_pose = aligned_smpl["pose"][idx]
smpl_trans = aligned_smpl["trans"][idx].astype(np.float32)
smpl_scale = aligned_smpl["scale"][idx]
smpl_shape = aligned_smpl["shape"][idx]
c2w = c2ws[f"{idx:04d}.{suffix}"]
w2c = np.linalg.inv(c2w)
P = K @ w2c
output_trans.append(smpl_trans)
output_pose.append(smpl_pose)
output_scale.append(smpl_scale)
output_shape.append(smpl_shape)
output_P[f"cam_{idx}"] = P.astype(np.float32)
c2w_list.append(c2w)
mean_shape = np.array(output_shape).mean(axis=0)
# use smaller scale since we tend to overestimate the scale with naked smpl
mean_scale = np.quantile(np.array(output_scale), 0.1, axis=0)
# mean_scale = np.array(output_scale).mean(axis=0)
# print("mean scale:", mean_scale)
mean_shape = np.concatenate([mean_scale, mean_shape], axis=0)
np.save(save_dir / "mean_shape.npy", mean_shape)
np.save(save_dir / "poses.npy", np.array(output_pose))
np.save(save_dir / "normalize_trans.npy", np.array(output_trans))
np.save(save_dir / "intrinsic.npy", K.astype(np.float32))
np.save(save_dir / "c2ws.npy", np.array(c2w_list, dtype=np.float32))
np.savez(save_dir / "cameras.npz", **output_P)
cameras_new = {}
for i in range(len(output_P)):
# we have a dummpy scale matrix here as our camera is already normalized in the previous step
cameras_new[f"scale_mat_{i}"] = np.eye(4, dtype=np.float32)
cameras_new[f"world_mat_{i}"] = output_P[f"cam_{i}"]
np.savez(save_dir / "cameras_normalize.npz", **cameras_new)
# copy scene point cloud to the processed folder
scene_pcd = data_dir / "normalized" / "scene_pcd.ply"
shutil.copy(scene_pcd, save_dir / "scene_pcd.ply")
print("All steps finished")