Implement the camera parameter sampling required by Live3D

NVlabs · Oct 11, 2023 · 55040d2 · 55040d2
1 parent fd33438
commit 55040d2
Showing 1 changed file with 71 additions and 9 deletions.
diff --git a/eg3d/camera_utils.py b/eg3d/camera_utils.py
@@ -123,6 +123,62 @@ def sample(horizontal_mean, vertical_mean, horizontal_stddev=0, vertical_stddev=
         forward_vectors = math_utils.normalize_vecs(-camera_origins)
         return create_cam2world_matrix(forward_vectors, camera_origins)    
 
+
+
+class Live3DCameraPoseSampler:
+    '''
+    Same as UniformCameraPoseSampler, except the
+    pose is sampled from a uniform distribution with range +-[horizontal/vertical/roll]_stddev.
+    
+    Camera radius sampling:
+        Mean: 2.7
+        Standard Deviation: 0.1
+    
+    Camera roll sampling (in degrees, converted to radians):
+        Mean: 0
+        Standard Deviation: 2 degrees
+
+    Example:
+    For a batch of random camera poses looking at the origin with yaw sampled from [-pi/2, +pi/2] radians:
+
+    cam2worlds = UniformCameraPoseSampler.sample(math.pi/2, math.pi/2, horizontal_stddev=math.pi/2, radius=1, batch_size=16)
+    
+    '''
+    @staticmethod
+    def sample(horizontal_mean, vertical_mean, roll_mean_deg = 0, horizontal_stddev=0, vertical_stddev=0, roll_stddev_deg=2, radius_mean=2.7, radius_stddev=0.1, batch_size=1, device='cpu'):
+        h = (torch.rand((batch_size, 1), device=device) * 2 - 1) * horizontal_stddev + horizontal_mean
+        v = (torch.rand((batch_size, 1), device=device) * 2 - 1) * vertical_stddev + vertical_mean
+        v = torch.clamp(v, 1e-5, math.pi - 1e-5)
+
+        theta = h
+        v = v / math.pi
+        phi = torch.arccos(1 - 2 * v)
+
+        camera_origins = torch.zeros((batch_size, 3), device=device)
+
+        # Create camera origins with radius sampled from a normal distribution with mean 2.7 and std deviation 0.1.
+        camera_origins[:, 0:1] = (radius_stddev * torch.randn((batch_size, 1), device=device) + radius_mean) * torch.sin(phi) * torch.cos(math.pi - theta)
+        camera_origins[:, 2:3] = (radius_stddev * torch.randn((batch_size, 1), device=device) + radius_mean) * torch.sin(phi) * torch.sin(math.pi - theta)
+        camera_origins[:, 1:2] = (radius_stddev * torch.randn((batch_size, 1), device=device) + radius_mean) * torch.cos(phi)
+
+        forward_vectors = math_utils.normalize_vecs(-camera_origins)
+
+        # Sample roll from a normal distribution with mean 0 and std deviation 2 degrees.
+        roll_stddev_rad = math.radians(roll_stddev_deg)
+        roll_mean_rad = math.radians(roll_mean_deg)
+        roll = torch.randn((batch_size, 1), device=device) * roll_stddev_rad + roll_mean_rad
+        roll_matrix = torch.stack([
+            torch.cos(roll), -torch.sin(roll), torch.zeros_like(roll),
+            torch.sin(roll), torch.cos(roll), torch.zeros_like(roll),
+            torch.zeros_like(roll), torch.zeros_like(roll), torch.ones_like(roll)
+        ], dim=-1).view(batch_size, 3, 3)
+
+        # Apply roll to the forward_vectors.
+        forward_vectors = torch.matmul(forward_vectors, roll_matrix)
+
+        return create_cam2world_matrix(forward_vectors, camera_origins)
+
+
 def create_cam2world_matrix(forward_vector, origin):
     """
     Takes in the direction the camera is pointing and the camera origin and returns a cam2world matrix.
@@ -145,12 +201,10 @@ def create_cam2world_matrix(forward_vector, origin):
     return cam2world
 
 
-def FOV_to_intrinsics(fov_degrees, device='cpu'):
+def FOV_to_intrinsics(fov_mean = 18.83, fov_stddev = 1, principal_point_mean = 256, principal_point_stddev = 14, device='cpu'):
     """
     Creates a 3x3 camera intrinsics matrix from the camera field of view, specified in degrees.
     Note the intrinsics are returned as normalized by image size, rather than in pixel units.
-    Assumes principal point is at image center.
-    
     the camera intrinsic matrix is a 3x3 matrix, where
         f_x, f_y are the focal length in x and y direction
         x_0, y_0 are principal point offsets
@@ -160,16 +214,24 @@ def FOV_to_intrinsics(fov_degrees, device='cpu'):
     [ 0,            f_y/size_y,   y_0/size_y]
     [ 0,            0,            1         ]
     
-    As a sanity check, after normalization, your principal point should be close to 0.5, 0.5
+  
+    FOV sampling (in degrees):
+        Mean: 18.83
+        Standard Deviation: 1
+    
+    Principal point sampling (before normalized by image size):
+        Mean: 256
+        Standard Deviation: 14
     """
     # normal distribution,standard deviation 1, centered at 18.83
-    fov_degrees = torch.randn(1, device=device) * 1.0 +  fov_degrees
+    fov_degrees = torch.randn(1, device=device) * fov_stddev + fov_mean
     # Principal point sampling: normal distribution, standard deviation 14, centered at 256
-    x_0 = (torch.randn(1, device=device) * 14.0 + 256) / 512.0
-    y_0 = (torch.randn(1, device=device) * 14.0 + 256) / 512.0
+    principal_point = (torch.randn(2, device=device) * principal_point_stddev + principal_point_mean) / 512.0
 
     focal_length = float(1 / (math.tan(fov_degrees * 3.14159 / 360) * 1.414))
-    # intrinsics = torch.tensor([[focal_length, 0, 0.5], [0, focal_length, 0.5], [0, 0, 1]], device=device)
-    intrinsics = torch.tensor([[focal_length, 0, x_0], [0, focal_length, y_0], [0, 0, 1]], device=device)
+    intrinsics = torch.tensor([
+         [focal_length, 0, principal_point[0]], 
+         [0, focal_length, principal_point[1]], 
+         [0, 0, 1]], device=device)
 
     return intrinsics