Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Automatic circular camera path rendering #3314

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Prev Previous commit
Next Next commit
add user input values
ginazhouhuiwu committed Jul 17, 2024
commit 8417be67d35728733a158f12dc7079edab31c20b
49 changes: 34 additions & 15 deletions nerfstudio/viewer/render_panel.py
Original file line number Diff line number Diff line change
@@ -753,15 +753,15 @@ def _(event: viser.ScenePointerEvent) -> None:
origin = torch.tensor(event.ray_origin).view(1, 3)
direction = torch.tensor(event.ray_direction).view(1, 3)

# get intersection
# Get intersection
bundle = RayBundle(
origin,
direction,
torch.tensor(0.001).view(1, 1),
origins=origin,
directions=direction,
pixel_area=torch.tensor(0.001).view(1, 1),
camera_indices=torch.tensor(0).view(1, 1),
nears=torch.tensor(0.05).view(1, 1),
fars=torch.tensor(100).view(1, 1),
camera_indices=torch.tensor(0).view(1, 1),
).to(torch.device)
).to("cuda")

# Get the distance/depth to the intersection --> calculate 3D position of the click
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good start! but this part should use the more general get_outputs_for_camera() function (a lot of the sampling, scaling stuff is method-specific to nerfacto). As long as the method outputs a 'depth' value it should work with this ray deprojection approach

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This way, you would render a depth image from the viewer camera and deproject the click point with the intrinsics matrix + rendered depth, meaning it doesn't matter what the rendering backend is.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One thing to note is that the 'depth' in splatfacto is actually z-depth and not ray-depth, so the math would need to be different for the two methods

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok that makes sense! Thanks for the feedback appreciate it!!

ray_samples, _, _ = viewer_model.proposal_sampler(bundle, density_fns=viewer_model.density_fns)
@@ -774,7 +774,7 @@ def _(event: viser.ScenePointerEvent) -> None:
distance = depth[0, 0].detach().cpu().numpy()

nonlocal click_position
click_position = np.array(origin + direction * distance) * VISER_NERFSTUDIO_SCALE_RATIO
click_position = np.array(origin + direction * distance).reshape(3,) * VISER_NERFSTUDIO_SCALE_RATIO

server.scene.add_icosphere(
f"/render_center/sphere",
@@ -788,7 +788,25 @@ def _(event: viser.ScenePointerEvent) -> None:
@event.client.scene.on_pointer_callback_removed
def _():
select_center_button.disabled = False


num_cameras_handle = server.gui.add_number(
label="Number of Cameras",
initial_value=10,
hint="Total number of cameras generated in path, placed equidistant from neighboring ones.",
)

radius_handle = server.gui.add_number(
label="Radius",
initial_value=2,
hint="Radius of circular camera path.",
)

camera_height_handle = server.gui.add_number(
label="Height",
initial_value=2,
hint="Height of cameras with respect to chosen origin.",
)

circular_camera_path_button = server.gui.add_button(
"Generate Circular Camera Path",
icon=viser.Icon.CAMERA,
@@ -797,12 +815,12 @@ def _():

@circular_camera_path_button.on_click
def _(event: viser.GuiEvent) -> None:
nonlocal click_position
num_cameras = 10
radius = 5
z_camera = 5
nonlocal click_position, num_cameras_handle, radius_handle, camera_height_handle
num_cameras = num_cameras_handle.value
radius = radius_handle.value
camera_height = camera_height_handle.value

camera_coords = []
fov = event.client.camera.fov
for i in range(num_cameras):
camera_coords.append((radius * np.cos(2 * np.pi * i / num_cameras), radius * np.sin(2 * np.pi * i/ num_cameras)))

@@ -821,7 +839,7 @@ def wxyz_helper(camera_position: np.ndarray) -> np.ndarray:
camera_up = np.cross(camera_right, camera_direction)

R = np.array([camera_right, camera_up, -camera_direction]).T

w = np.sqrt(1 + R[0, 0] + R[1, 1] + R[2, 2]) / 2
x = (R[2, 1] - R[1, 2]) / (4 * w)
y = (R[0, 2] - R[2, 0]) / (4 * w)
@@ -830,8 +848,9 @@ def wxyz_helper(camera_position: np.ndarray) -> np.ndarray:
else:
return np.array([1.0, 0.0, 0.0, 0.0])

fov = event.client.camera.fov
for i, item in enumerate(camera_coords):
position = click_position + np.array([item[0], item[1], z_camera])
position = click_position + np.array([item[0], item[1], camera_height])
camera_path.add_camera(
keyframe=Keyframe(
position=position,