From 8fca1580503863ee0b4a1931df6373541498a521 Mon Sep 17 00:00:00 2001 From: Gina Wu Date: Tue, 16 Jul 2024 19:38:18 -0700 Subject: [PATCH 01/12] init auto circular camera render --- nerfstudio/viewer/render_panel.py | 93 +++++++++++++++++++++++++++++++ nerfstudio/viewer/viewer.py | 1 + 2 files changed, 94 insertions(+) diff --git a/nerfstudio/viewer/render_panel.py b/nerfstudio/viewer/render_panel.py index 10d263f8c2..f2a78d4536 100644 --- a/nerfstudio/viewer/render_panel.py +++ b/nerfstudio/viewer/render_panel.py @@ -588,6 +588,7 @@ def _(_) -> None: initial_value="Perspective", hint="Camera model to render with. This is applied to all keyframes.", ) + add_button = server.gui.add_button( "Add Keyframe", icon=viser.Icon.PLUS, @@ -726,6 +727,98 @@ def _(_: viser.GuiEvent) -> None: def _(_) -> None: camera_path.show_spline = show_spline_checkbox.value camera_path.update_spline() + + auto_camera_folder = server.gui.add_folder("Automatic Camera Path") + with auto_camera_folder: + origin_point = np.array([-1.01697124, 0.86319059, -4.2836189]) + select_center_button = server.gui.add_button( + "Select Center", + icon=viser.Icon.CROSSHAIR, + hint="Choose center point to generate camera path around.", + ) + + @select_center_button.on_click + def _(event: viser.GuiEvent) -> None: + select_center_button.disabled = True + + @event.client.scene.on_pointer_event(event_type="click") + def _(event: viser.ScenePointerEvent) -> None: + # TODO: currently buggy and selects the client's keyframe as the origin + # rather than the actual clicking location, need intersector + nonlocal origin_point + origin_point = np.array(event.ray_origin) + print(origin_point) + server.scene.add_icosphere( + f"/render_center/sphere", + radius=0.1, + color=(200, 10, 30), + position=np.array(event.ray_origin) + ) + + event.client.scene.remove_pointer_callback() + + @event.client.scene.on_pointer_callback_removed + def _(): + select_center_button.disabled = False + + circular_camera_path_button = server.gui.add_button( + "Generate Circular Camera Path", + icon=viser.Icon.CAMERA, + hint="Automatically generate a circular camera path around selected point.", + ) + + @circular_camera_path_button.on_click + def _(event: viser.GuiEvent) -> None: + nonlocal origin_point + num_cameras = 10 + radius = 1 + z_camera = 2 + camera_coords = [] + fov = event.client.camera.fov + for i in range(num_cameras): + camera_coords.append((radius * np.cos(2 * np.pi * i / num_cameras), radius * np.sin(2 * np.pi * i/ num_cameras))) + + def wxyz_helper(camera_position: np.ndarray) -> np.ndarray: + # Calculates the camera direction from position to origin_point + direction = camera_position - origin_point + direction = direction / np.linalg.norm(direction) + + global_up = np.array([0.0, 0.0, 1.0]) + + right = np.cross(global_up, direction) + right_norm = np.linalg.norm(right) + if right_norm > 0: + right = right / right_norm + + up = np.cross(right, direction) + + R = np.array([right, up, -direction]).T + + w = np.sqrt(1 + R[0, 0] + R[1, 1] + R[2, 2]) / 2 + x = (R[2, 1] - R[1, 2]) / (4 * w) + y = (R[0, 2] - R[2, 0]) / (4 * w) + z = (R[1, 0] - R[0, 1]) / (4 * w) + return np.array([w, x, y, z]) + else: + return np.array([1.0, 0.0, 0.0, 0.0]) + + for i, item in enumerate(camera_coords): + position = origin_point + np.array([item[0], item[1], z_camera]) + camera_path.add_camera( + keyframe=Keyframe( + position=position, + wxyz=wxyz_helper(position), + override_fov_enabled=False, + override_fov_rad=fov, + override_time_enabled=False, + override_time_val=0.0, + aspect=resolution.value[0] / resolution.value[1], + override_transition_enabled=False, + override_transition_sec=None, + ) + ) + duration_number.value = camera_path.compute_duration() + camera_path.update_spline() playback_folder = server.gui.add_folder("Playback") with playback_folder: diff --git a/nerfstudio/viewer/viewer.py b/nerfstudio/viewer/viewer.py index bb90ea1602..5241208cce 100644 --- a/nerfstudio/viewer/viewer.py +++ b/nerfstudio/viewer/viewer.py @@ -200,6 +200,7 @@ def __init__( default_composite_depth=self.config.default_composite_depth, ) config_path = self.log_filename.parents[0] / "config.yml" + with tabs.add_tab("Render", viser.Icon.CAMERA): self.render_tab_state = populate_render_tab( self.viser_server, config_path, self.datapath, self.control_panel From 159864ba1ba319b3f5dc0160f910db199b644090 Mon Sep 17 00:00:00 2001 From: Gina Wu Date: Wed, 17 Jul 2024 11:14:44 -0700 Subject: [PATCH 02/12] notation changes --- nerfstudio/viewer/render_panel.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/nerfstudio/viewer/render_panel.py b/nerfstudio/viewer/render_panel.py index f2a78d4536..96132d60ab 100644 --- a/nerfstudio/viewer/render_panel.py +++ b/nerfstudio/viewer/render_panel.py @@ -730,7 +730,7 @@ def _(_) -> None: auto_camera_folder = server.gui.add_folder("Automatic Camera Path") with auto_camera_folder: - origin_point = np.array([-1.01697124, 0.86319059, -4.2836189]) + origin_point = np.array([1.0, 0.0, 0.0]) select_center_button = server.gui.add_button( "Select Center", icon=viser.Icon.CROSSHAIR, @@ -771,8 +771,8 @@ def _(): def _(event: viser.GuiEvent) -> None: nonlocal origin_point num_cameras = 10 - radius = 1 - z_camera = 2 + radius = 5 + z_camera = 5 camera_coords = [] fov = event.client.camera.fov for i in range(num_cameras): @@ -780,20 +780,20 @@ def _(event: viser.GuiEvent) -> None: def wxyz_helper(camera_position: np.ndarray) -> np.ndarray: # Calculates the camera direction from position to origin_point - direction = camera_position - origin_point - direction = direction / np.linalg.norm(direction) + camera_direction = camera_position - origin_point + camera_direction = camera_direction / np.linalg.norm(camera_direction) global_up = np.array([0.0, 0.0, 1.0]) - right = np.cross(global_up, direction) - right_norm = np.linalg.norm(right) - if right_norm > 0: - right = right / right_norm + camera_right = np.cross(camera_direction, global_up) + camera_right_norm = np.linalg.norm(camera_right) + if camera_right_norm > 0: + camera_right = camera_right / camera_right_norm - up = np.cross(right, direction) - - R = np.array([right, up, -direction]).T + camera_up = np.cross(camera_right, camera_direction) + R = np.array([camera_right, camera_up, -camera_direction]).T + w = np.sqrt(1 + R[0, 0] + R[1, 1] + R[2, 2]) / 2 x = (R[2, 1] - R[1, 2]) / (4 * w) y = (R[0, 2] - R[2, 0]) / (4 * w) @@ -818,7 +818,7 @@ def wxyz_helper(camera_position: np.ndarray) -> np.ndarray: ) ) duration_number.value = camera_path.compute_duration() - camera_path.update_spline() + #camera_path.update_spline() playback_folder = server.gui.add_folder("Playback") with playback_folder: From ee4125172b08cc601b640e0e473d49ffbdb8b7aa Mon Sep 17 00:00:00 2001 From: Gina Wu Date: Wed, 17 Jul 2024 15:08:58 -0700 Subject: [PATCH 03/12] wip in debugging click position --- nerfstudio/viewer/render_panel.py | 53 ++++++++++++++++++++++++------- nerfstudio/viewer/viewer.py | 2 +- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/nerfstudio/viewer/render_panel.py b/nerfstudio/viewer/render_panel.py index 96132d60ab..bd2a90a144 100644 --- a/nerfstudio/viewer/render_panel.py +++ b/nerfstudio/viewer/render_panel.py @@ -30,6 +30,7 @@ import viser.transforms as tf from scipy import interpolate +from nerfstudio.models.base_model import Model from nerfstudio.viewer.control_panel import ControlPanel @@ -520,6 +521,7 @@ def populate_render_tab( server: viser.ViserServer, config_path: Path, datapath: Path, + viewer_model: Model, control_panel: Optional[ControlPanel] = None, ) -> RenderTabState: from nerfstudio.viewer.viewer import VISER_NERFSTUDIO_SCALE_RATIO @@ -730,7 +732,7 @@ def _(_) -> None: auto_camera_folder = server.gui.add_folder("Automatic Camera Path") with auto_camera_folder: - origin_point = np.array([1.0, 0.0, 0.0]) + click_position = np.array([1.0, 0.0, 0.0]) select_center_button = server.gui.add_button( "Select Center", icon=viser.Icon.CROSSHAIR, @@ -743,16 +745,42 @@ def _(event: viser.GuiEvent) -> None: @event.client.scene.on_pointer_event(event_type="click") def _(event: viser.ScenePointerEvent) -> None: - # TODO: currently buggy and selects the client's keyframe as the origin - # rather than the actual clicking location, need intersector - nonlocal origin_point - origin_point = np.array(event.ray_origin) - print(origin_point) + import torch + from nerfstudio.cameras.rays import RayBundle + from nerfstudio.field_components.field_heads import FieldHeadNames + from nerfstudio.model_components.losses import scale_gradients_by_distance_squared + + origin = torch.tensor(event.ray_origin).view(1, 3) + direction = torch.tensor(event.ray_direction).view(1, 3) + + # get intersection + bundle = RayBundle( + origin, + direction, + torch.tensor(0.001).view(1, 1), + nears=torch.tensor(0.05).view(1, 1), + fars=torch.tensor(100).view(1, 1), + camera_indices=torch.tensor(0).view(1, 1), + ).to(torch.device) + + # Get the distance/depth to the intersection --> calculate 3D position of the click + ray_samples, _, _ = viewer_model.proposal_sampler(bundle, density_fns=viewer_model.density_fns) + field_outputs = viewer_model.field.forward(ray_samples, compute_normals=viewer_model.config.predict_normals) + if viewer_model.config.use_gradient_scaling: + field_outputs = scale_gradients_by_distance_squared(field_outputs, ray_samples) + weights = ray_samples.get_weights(field_outputs[FieldHeadNames.DENSITY]) + with torch.no_grad(): + depth = viewer_model.renderer_depth(weights=weights, ray_samples=ray_samples) + distance = depth[0, 0].detach().cpu().numpy() + + nonlocal click_position + click_position = np.array(origin + direction * distance) * VISER_NERFSTUDIO_SCALE_RATIO + server.scene.add_icosphere( f"/render_center/sphere", radius=0.1, color=(200, 10, 30), - position=np.array(event.ray_origin) + position=click_position, ) event.client.scene.remove_pointer_callback() @@ -769,7 +797,7 @@ def _(): @circular_camera_path_button.on_click def _(event: viser.GuiEvent) -> None: - nonlocal origin_point + nonlocal click_position num_cameras = 10 radius = 5 z_camera = 5 @@ -779,8 +807,8 @@ def _(event: viser.GuiEvent) -> None: camera_coords.append((radius * np.cos(2 * np.pi * i / num_cameras), radius * np.sin(2 * np.pi * i/ num_cameras))) def wxyz_helper(camera_position: np.ndarray) -> np.ndarray: - # Calculates the camera direction from position to origin_point - camera_direction = camera_position - origin_point + # Calculates the camera direction from position to click_position + camera_direction = camera_position - click_position camera_direction = camera_direction / np.linalg.norm(camera_direction) global_up = np.array([0.0, 0.0, 1.0]) @@ -803,7 +831,7 @@ def wxyz_helper(camera_position: np.ndarray) -> np.ndarray: return np.array([1.0, 0.0, 0.0, 0.0]) for i, item in enumerate(camera_coords): - position = origin_point + np.array([item[0], item[1], z_camera]) + position = click_position + np.array([item[0], item[1], z_camera]) camera_path.add_camera( keyframe=Keyframe( position=position, @@ -818,7 +846,7 @@ def wxyz_helper(camera_position: np.ndarray) -> np.ndarray: ) ) duration_number.value = camera_path.compute_duration() - #camera_path.update_spline() + camera_path.update_spline() playback_folder = server.gui.add_folder("Playback") with playback_folder: @@ -1271,6 +1299,7 @@ def _(_) -> None: server=viser.ViserServer(), config_path=Path("."), datapath=Path("."), + viewer_model=Model, ) while True: time.sleep(10.0) diff --git a/nerfstudio/viewer/viewer.py b/nerfstudio/viewer/viewer.py index 5241208cce..c854456a5c 100644 --- a/nerfstudio/viewer/viewer.py +++ b/nerfstudio/viewer/viewer.py @@ -203,7 +203,7 @@ def __init__( with tabs.add_tab("Render", viser.Icon.CAMERA): self.render_tab_state = populate_render_tab( - self.viser_server, config_path, self.datapath, self.control_panel + self.viser_server, config_path, self.datapath, self.pipeline.model, self.control_panel ) with tabs.add_tab("Export", viser.Icon.PACKAGE_EXPORT): From 8417be67d35728733a158f12dc7079edab31c20b Mon Sep 17 00:00:00 2001 From: Gina Wu Date: Wed, 17 Jul 2024 15:51:32 -0700 Subject: [PATCH 04/12] add user input values --- nerfstudio/viewer/render_panel.py | 49 +++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/nerfstudio/viewer/render_panel.py b/nerfstudio/viewer/render_panel.py index bd2a90a144..3a6797fb66 100644 --- a/nerfstudio/viewer/render_panel.py +++ b/nerfstudio/viewer/render_panel.py @@ -753,15 +753,15 @@ def _(event: viser.ScenePointerEvent) -> None: origin = torch.tensor(event.ray_origin).view(1, 3) direction = torch.tensor(event.ray_direction).view(1, 3) - # get intersection + # Get intersection bundle = RayBundle( - origin, - direction, - torch.tensor(0.001).view(1, 1), + origins=origin, + directions=direction, + pixel_area=torch.tensor(0.001).view(1, 1), + camera_indices=torch.tensor(0).view(1, 1), nears=torch.tensor(0.05).view(1, 1), fars=torch.tensor(100).view(1, 1), - camera_indices=torch.tensor(0).view(1, 1), - ).to(torch.device) + ).to("cuda") # Get the distance/depth to the intersection --> calculate 3D position of the click ray_samples, _, _ = viewer_model.proposal_sampler(bundle, density_fns=viewer_model.density_fns) @@ -774,7 +774,7 @@ def _(event: viser.ScenePointerEvent) -> None: distance = depth[0, 0].detach().cpu().numpy() nonlocal click_position - click_position = np.array(origin + direction * distance) * VISER_NERFSTUDIO_SCALE_RATIO + click_position = np.array(origin + direction * distance).reshape(3,) * VISER_NERFSTUDIO_SCALE_RATIO server.scene.add_icosphere( f"/render_center/sphere", @@ -788,7 +788,25 @@ def _(event: viser.ScenePointerEvent) -> None: @event.client.scene.on_pointer_callback_removed def _(): select_center_button.disabled = False - + + num_cameras_handle = server.gui.add_number( + label="Number of Cameras", + initial_value=10, + hint="Total number of cameras generated in path, placed equidistant from neighboring ones.", + ) + + radius_handle = server.gui.add_number( + label="Radius", + initial_value=2, + hint="Radius of circular camera path.", + ) + + camera_height_handle = server.gui.add_number( + label="Height", + initial_value=2, + hint="Height of cameras with respect to chosen origin.", + ) + circular_camera_path_button = server.gui.add_button( "Generate Circular Camera Path", icon=viser.Icon.CAMERA, @@ -797,12 +815,12 @@ def _(): @circular_camera_path_button.on_click def _(event: viser.GuiEvent) -> None: - nonlocal click_position - num_cameras = 10 - radius = 5 - z_camera = 5 + nonlocal click_position, num_cameras_handle, radius_handle, camera_height_handle + num_cameras = num_cameras_handle.value + radius = radius_handle.value + camera_height = camera_height_handle.value + camera_coords = [] - fov = event.client.camera.fov for i in range(num_cameras): camera_coords.append((radius * np.cos(2 * np.pi * i / num_cameras), radius * np.sin(2 * np.pi * i/ num_cameras))) @@ -821,7 +839,7 @@ def wxyz_helper(camera_position: np.ndarray) -> np.ndarray: camera_up = np.cross(camera_right, camera_direction) R = np.array([camera_right, camera_up, -camera_direction]).T - + w = np.sqrt(1 + R[0, 0] + R[1, 1] + R[2, 2]) / 2 x = (R[2, 1] - R[1, 2]) / (4 * w) y = (R[0, 2] - R[2, 0]) / (4 * w) @@ -830,8 +848,9 @@ def wxyz_helper(camera_position: np.ndarray) -> np.ndarray: else: return np.array([1.0, 0.0, 0.0, 0.0]) + fov = event.client.camera.fov for i, item in enumerate(camera_coords): - position = click_position + np.array([item[0], item[1], z_camera]) + position = click_position + np.array([item[0], item[1], camera_height]) camera_path.add_camera( keyframe=Keyframe( position=position, From afb547caa2abd89340ce320ad9a3f9abba6a158b Mon Sep 17 00:00:00 2001 From: Gina Wu Date: Wed, 17 Jul 2024 16:53:29 -0700 Subject: [PATCH 05/12] nits --- nerfstudio/viewer/render_panel.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nerfstudio/viewer/render_panel.py b/nerfstudio/viewer/render_panel.py index 3a6797fb66..bd90bfd871 100644 --- a/nerfstudio/viewer/render_panel.py +++ b/nerfstudio/viewer/render_panel.py @@ -745,6 +745,7 @@ def _(event: viser.GuiEvent) -> None: @event.client.scene.on_pointer_event(event_type="click") def _(event: viser.ScenePointerEvent) -> None: + # Code mostly borrowed from garfield.studio! import torch from nerfstudio.cameras.rays import RayBundle from nerfstudio.field_components.field_heads import FieldHeadNames @@ -774,10 +775,10 @@ def _(event: viser.ScenePointerEvent) -> None: distance = depth[0, 0].detach().cpu().numpy() nonlocal click_position - click_position = np.array(origin + direction * distance).reshape(3,) * VISER_NERFSTUDIO_SCALE_RATIO + click_position = np.array(origin + direction * distance).reshape(3,) server.scene.add_icosphere( - f"/render_center/sphere", + f"/render_center_pos", radius=0.1, color=(200, 10, 30), position=click_position, From 8780664e3a63f2de1bf1bd5810beb68d6fc13a6b Mon Sep 17 00:00:00 2001 From: Gina Wu Date: Wed, 21 Aug 2024 17:20:24 -0700 Subject: [PATCH 06/12] init ray shooting object avoidance --- nerfstudio/viewer/render_panel.py | 348 ++++++++++++++++++------------ 1 file changed, 210 insertions(+), 138 deletions(-) diff --git a/nerfstudio/viewer/render_panel.py b/nerfstudio/viewer/render_panel.py index bd90bfd871..5ae5d01da0 100644 --- a/nerfstudio/viewer/render_panel.py +++ b/nerfstudio/viewer/render_panel.py @@ -729,144 +729,6 @@ def _(_: viser.GuiEvent) -> None: def _(_) -> None: camera_path.show_spline = show_spline_checkbox.value camera_path.update_spline() - - auto_camera_folder = server.gui.add_folder("Automatic Camera Path") - with auto_camera_folder: - click_position = np.array([1.0, 0.0, 0.0]) - select_center_button = server.gui.add_button( - "Select Center", - icon=viser.Icon.CROSSHAIR, - hint="Choose center point to generate camera path around.", - ) - - @select_center_button.on_click - def _(event: viser.GuiEvent) -> None: - select_center_button.disabled = True - - @event.client.scene.on_pointer_event(event_type="click") - def _(event: viser.ScenePointerEvent) -> None: - # Code mostly borrowed from garfield.studio! - import torch - from nerfstudio.cameras.rays import RayBundle - from nerfstudio.field_components.field_heads import FieldHeadNames - from nerfstudio.model_components.losses import scale_gradients_by_distance_squared - - origin = torch.tensor(event.ray_origin).view(1, 3) - direction = torch.tensor(event.ray_direction).view(1, 3) - - # Get intersection - bundle = RayBundle( - origins=origin, - directions=direction, - pixel_area=torch.tensor(0.001).view(1, 1), - camera_indices=torch.tensor(0).view(1, 1), - nears=torch.tensor(0.05).view(1, 1), - fars=torch.tensor(100).view(1, 1), - ).to("cuda") - - # Get the distance/depth to the intersection --> calculate 3D position of the click - ray_samples, _, _ = viewer_model.proposal_sampler(bundle, density_fns=viewer_model.density_fns) - field_outputs = viewer_model.field.forward(ray_samples, compute_normals=viewer_model.config.predict_normals) - if viewer_model.config.use_gradient_scaling: - field_outputs = scale_gradients_by_distance_squared(field_outputs, ray_samples) - weights = ray_samples.get_weights(field_outputs[FieldHeadNames.DENSITY]) - with torch.no_grad(): - depth = viewer_model.renderer_depth(weights=weights, ray_samples=ray_samples) - distance = depth[0, 0].detach().cpu().numpy() - - nonlocal click_position - click_position = np.array(origin + direction * distance).reshape(3,) - - server.scene.add_icosphere( - f"/render_center_pos", - radius=0.1, - color=(200, 10, 30), - position=click_position, - ) - - event.client.scene.remove_pointer_callback() - - @event.client.scene.on_pointer_callback_removed - def _(): - select_center_button.disabled = False - - num_cameras_handle = server.gui.add_number( - label="Number of Cameras", - initial_value=10, - hint="Total number of cameras generated in path, placed equidistant from neighboring ones.", - ) - - radius_handle = server.gui.add_number( - label="Radius", - initial_value=2, - hint="Radius of circular camera path.", - ) - - camera_height_handle = server.gui.add_number( - label="Height", - initial_value=2, - hint="Height of cameras with respect to chosen origin.", - ) - - circular_camera_path_button = server.gui.add_button( - "Generate Circular Camera Path", - icon=viser.Icon.CAMERA, - hint="Automatically generate a circular camera path around selected point.", - ) - - @circular_camera_path_button.on_click - def _(event: viser.GuiEvent) -> None: - nonlocal click_position, num_cameras_handle, radius_handle, camera_height_handle - num_cameras = num_cameras_handle.value - radius = radius_handle.value - camera_height = camera_height_handle.value - - camera_coords = [] - for i in range(num_cameras): - camera_coords.append((radius * np.cos(2 * np.pi * i / num_cameras), radius * np.sin(2 * np.pi * i/ num_cameras))) - - def wxyz_helper(camera_position: np.ndarray) -> np.ndarray: - # Calculates the camera direction from position to click_position - camera_direction = camera_position - click_position - camera_direction = camera_direction / np.linalg.norm(camera_direction) - - global_up = np.array([0.0, 0.0, 1.0]) - - camera_right = np.cross(camera_direction, global_up) - camera_right_norm = np.linalg.norm(camera_right) - if camera_right_norm > 0: - camera_right = camera_right / camera_right_norm - - camera_up = np.cross(camera_right, camera_direction) - - R = np.array([camera_right, camera_up, -camera_direction]).T - - w = np.sqrt(1 + R[0, 0] + R[1, 1] + R[2, 2]) / 2 - x = (R[2, 1] - R[1, 2]) / (4 * w) - y = (R[0, 2] - R[2, 0]) / (4 * w) - z = (R[1, 0] - R[0, 1]) / (4 * w) - return np.array([w, x, y, z]) - else: - return np.array([1.0, 0.0, 0.0, 0.0]) - - fov = event.client.camera.fov - for i, item in enumerate(camera_coords): - position = click_position + np.array([item[0], item[1], camera_height]) - camera_path.add_camera( - keyframe=Keyframe( - position=position, - wxyz=wxyz_helper(position), - override_fov_enabled=False, - override_fov_rad=fov, - override_time_enabled=False, - override_time_val=0.0, - aspect=resolution.value[0] / resolution.value[1], - override_transition_enabled=False, - override_transition_sec=None, - ) - ) - duration_number.value = camera_path.compute_duration() - camera_path.update_spline() playback_folder = server.gui.add_folder("Playback") with playback_folder: @@ -1302,6 +1164,216 @@ def _(event: viser.GuiEvent) -> None: @close_button.on_click def _(_) -> None: modal.close() + + auto_camera_folder = server.gui.add_folder("Automatic Camera Path") + with auto_camera_folder: + click_position = np.array([0.0, 0.0, -5.0]) + select_center_button = server.gui.add_button( + "Select Center", + icon=viser.Icon.CROSSHAIR, + hint="Choose center point to generate camera path around.", + ) + + @select_center_button.on_click + def _(event: viser.GuiEvent) -> None: + select_center_button.disabled = True + + @event.client.scene.on_pointer_event(event_type="click") + def _(event: viser.ScenePointerEvent) -> None: + # Code mostly borrowed from garfield.studio! + import torch + from nerfstudio.cameras.rays import RayBundle + from nerfstudio.field_components.field_heads import FieldHeadNames + from nerfstudio.model_components.losses import scale_gradients_by_distance_squared + + origin = torch.tensor(event.ray_origin).view(1, 3) + direction = torch.tensor(event.ray_direction).view(1, 3) + + # Get intersection + bundle = RayBundle( + origins=origin, + directions=direction, + pixel_area=torch.tensor(0.001).view(1, 1), + camera_indices=torch.tensor(0).view(1, 1), + nears=torch.tensor(0.05).view(1, 1), + fars=torch.tensor(100).view(1, 1), + ).to("cuda") + + # Get the distance/depth to the intersection --> calculate 3D position of the click + ray_samples, _, _ = viewer_model.proposal_sampler(bundle, density_fns=viewer_model.density_fns) + field_outputs = viewer_model.field.forward(ray_samples, compute_normals=viewer_model.config.predict_normals) + if viewer_model.config.use_gradient_scaling: + field_outputs = scale_gradients_by_distance_squared(field_outputs, ray_samples) + weights = ray_samples.get_weights(field_outputs[FieldHeadNames.DENSITY]) + with torch.no_grad(): + depth = viewer_model.renderer_depth(weights=weights, ray_samples=ray_samples) + distance = depth[0, 0].detach().cpu().numpy() + + nonlocal click_position + click_position = np.array(origin + direction * distance).reshape(3,) + + server.scene.add_icosphere( + f"/render_center_pos", + radius=0.1, + color=(200, 10, 30), + position=click_position, + ) + + event.client.scene.remove_pointer_callback() + + @event.client.scene.on_pointer_callback_removed + def _(): + select_center_button.disabled = False + + num_cameras_handle = server.gui.add_number( + label="Number of Cameras", + initial_value=3, + hint="Total number of cameras generated in path, placed equidistant from neighboring ones.", + ) + + radius_handle = server.gui.add_number( + label="Radius", + initial_value=4, + hint="Radius of circular camera path.", + ) + + camera_height_handle = server.gui.add_number( + label="Height", + initial_value=2, + hint="Height of cameras with respect to chosen origin.", + ) + + circular_camera_path_button = server.gui.add_button( + "Generate Circular Camera Path", + icon=viser.Icon.CAMERA, + hint="Automatically generate a circular camera path around selected point.", + ) + + def wxyz_helper(camera_position: np.ndarray) -> np.ndarray: + # Calculates the camera direction from position to click_position + camera_direction = camera_position - click_position + camera_direction = camera_direction / np.linalg.norm(camera_direction) + + global_up = np.array([0.0, 0.0, 1.0]) + + camera_right = np.cross(camera_direction, global_up) + camera_right_norm = np.linalg.norm(camera_right) + if camera_right_norm > 0: + camera_right = camera_right / camera_right_norm + + camera_up = np.cross(camera_right, camera_direction) + + R = np.array([-camera_right, -camera_up, -camera_direction]).T + + w = np.sqrt(1 + R[0, 0] + R[1, 1] + R[2, 2]) / 2 + x = (R[2, 1] - R[1, 2]) / (4 * w) + y = (R[0, 2] - R[2, 0]) / (4 * w) + z = (R[1, 0] - R[0, 1]) / (4 * w) + return np.array([w, x, y, z]) + else: + return np.array([1.0, 0.0, 0.0, 0.0]) + + camera_coords = [] + @circular_camera_path_button.on_click + def _(event: viser.GuiEvent) -> None: + nonlocal click_position, num_cameras_handle, radius_handle, camera_height_handle, camera_coords + num_cameras = num_cameras_handle.value + radius = radius_handle.value + camera_height = camera_height_handle.value + + camera_coords = [] + for i in range(num_cameras): + camera_coords.append((radius * np.cos(2 * np.pi * i / num_cameras), radius * np.sin(2 * np.pi * i/ num_cameras))) + + fov = event.client.camera.fov + for i, item in enumerate(camera_coords): + position = click_position + np.array([item[0], item[1], camera_height]) + camera_path.add_camera( + keyframe=Keyframe( + position=position, + wxyz=wxyz_helper(position), + override_fov_enabled=False, + override_fov_rad=fov, + override_time_enabled=False, + override_time_val=0.0, + aspect=resolution.value[0] / resolution.value[1], + override_transition_enabled=False, + override_transition_sec=None, + ), + keyframe_index = i, + ) + duration_number.value = camera_path.compute_duration() + camera_path.update_spline() + + optimize_button = server.gui.add_button( + "Optimize Camera Path", + icon=viser.Icon.CAMERA, + hint="Optimizes camera path for object avoidance iteratively.", + ) + + @optimize_button.on_click + def _(event: viser.GuiEvent) -> None: + import torch + from nerfstudio.cameras.rays import RayBundle + from nerfstudio.field_components.field_heads import FieldHeadNames + from nerfstudio.model_components.losses import scale_gradients_by_distance_squared + + directions = [[1, 0, 0], + [-1, 0, 0], + [0, 1, 0], + [0, -1, 0], + [0, 0, 1], + [0, 0, -1]] + + nonlocal click_position, num_cameras_handle, radius_handle, camera_height_handle, camera_coords + camera_height = camera_height_handle.value + fov = event.client.camera.fov + + for i, item in enumerate(camera_coords): + raylen = 2.0 + position = click_position + np.array([item[0], item[1], camera_height]) + + origins = torch.tensor(np.tile(position, (6, 1))) + pixel_area = torch.ones_like(origins[..., 0:1]) + camera_indices = torch.zeros_like(origins[..., 0:1]).int() + nears = torch.zeros_like(origins[..., 0:1]) + fars = torch.ones_like(origins[..., 0:1]) * raylen + directions_norm = torch.ones_like(origins[..., 0:1]) + viewer_model.training = False + + bundle = RayBundle( + origins=origins, + directions=torch.tensor(directions), + pixel_area=pixel_area, + camera_indices=camera_indices, + nears=nears, + fars=fars, + metadata={"directions_norm": directions_norm}, + ).to('cuda') + + outputs = viewer_model.get_outputs(bundle) + + distances = outputs["expected_depth"].detach().cpu().numpy() + if min(distances) < 0.4: + position = position - directions[np.argmin(distances)] * 1 + camera_path.add_camera( + keyframe=Keyframe( + position=position, + wxyz=wxyz_helper(position), + override_fov_enabled=False, + override_fov_rad=fov, + override_time_enabled=False, + override_time_val=0.0, + aspect=resolution.value[0] / resolution.value[1], + override_transition_enabled=False, + override_transition_sec=None, + ), + keyframe_index = i, + ) + duration_number.value = camera_path.compute_duration() + camera_path.update_spline() + + camera_coords[i] = position if control_panel is not None: camera_path = CameraPath(server, duration_number, control_panel._time_enabled) From c42adc89b3453ea377a49666b14e080869104c73 Mon Sep 17 00:00:00 2001 From: Gina Wu Date: Wed, 21 Aug 2024 17:40:08 -0700 Subject: [PATCH 07/12] fix camera position bug --- nerfstudio/viewer/render_panel.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nerfstudio/viewer/render_panel.py b/nerfstudio/viewer/render_panel.py index 5ae5d01da0..63cc711e69 100644 --- a/nerfstudio/viewer/render_panel.py +++ b/nerfstudio/viewer/render_panel.py @@ -1283,11 +1283,13 @@ def _(event: viser.GuiEvent) -> None: camera_coords = [] for i in range(num_cameras): - camera_coords.append((radius * np.cos(2 * np.pi * i / num_cameras), radius * np.sin(2 * np.pi * i/ num_cameras))) + camera_coords.append(click_position + + np.array([radius * np.cos(2 * np.pi * i / num_cameras), + radius * np.sin(2 * np.pi * i/ num_cameras), + camera_height])) fov = event.client.camera.fov - for i, item in enumerate(camera_coords): - position = click_position + np.array([item[0], item[1], camera_height]) + for i, position in enumerate(camera_coords): camera_path.add_camera( keyframe=Keyframe( position=position, @@ -1329,10 +1331,8 @@ def _(event: viser.GuiEvent) -> None: camera_height = camera_height_handle.value fov = event.client.camera.fov - for i, item in enumerate(camera_coords): + for i, position in enumerate(camera_coords): raylen = 2.0 - position = click_position + np.array([item[0], item[1], camera_height]) - origins = torch.tensor(np.tile(position, (6, 1))) pixel_area = torch.ones_like(origins[..., 0:1]) camera_indices = torch.zeros_like(origins[..., 0:1]).int() From 92039c7e6b14b6739dfdbb3939866466a6e25565 Mon Sep 17 00:00:00 2001 From: Gina Wu Date: Wed, 21 Aug 2024 17:55:49 -0700 Subject: [PATCH 08/12] clean --- nerfstudio/viewer/render_panel.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nerfstudio/viewer/render_panel.py b/nerfstudio/viewer/render_panel.py index 63cc711e69..1877b6709f 100644 --- a/nerfstudio/viewer/render_panel.py +++ b/nerfstudio/viewer/render_panel.py @@ -1320,16 +1320,14 @@ def _(event: viser.GuiEvent) -> None: from nerfstudio.field_components.field_heads import FieldHeadNames from nerfstudio.model_components.losses import scale_gradients_by_distance_squared + nonlocal camera_coords + directions = [[1, 0, 0], [-1, 0, 0], [0, 1, 0], [0, -1, 0], [0, 0, 1], [0, 0, -1]] - - nonlocal click_position, num_cameras_handle, radius_handle, camera_height_handle, camera_coords - camera_height = camera_height_handle.value - fov = event.client.camera.fov for i, position in enumerate(camera_coords): raylen = 2.0 @@ -1352,16 +1350,18 @@ def _(event: viser.GuiEvent) -> None: ).to('cuda') outputs = viewer_model.get_outputs(bundle) - distances = outputs["expected_depth"].detach().cpu().numpy() - if min(distances) < 0.4: + + loss = -min(distances) + print(i, loss) + if loss > -0.4: position = position - directions[np.argmin(distances)] * 1 camera_path.add_camera( keyframe=Keyframe( position=position, wxyz=wxyz_helper(position), override_fov_enabled=False, - override_fov_rad=fov, + override_fov_rad=event.client.camera.fov, override_time_enabled=False, override_time_val=0.0, aspect=resolution.value[0] / resolution.value[1], From 719bea20ff2751abc6ce7559b7983e4429f4a907 Mon Sep 17 00:00:00 2001 From: Gina Wu Date: Wed, 21 Aug 2024 19:02:56 -0700 Subject: [PATCH 09/12] clean --- nerfstudio/viewer/render_panel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/viewer/render_panel.py b/nerfstudio/viewer/render_panel.py index 1877b6709f..12958732d3 100644 --- a/nerfstudio/viewer/render_panel.py +++ b/nerfstudio/viewer/render_panel.py @@ -1353,9 +1353,9 @@ def _(event: viser.GuiEvent) -> None: distances = outputs["expected_depth"].detach().cpu().numpy() loss = -min(distances) - print(i, loss) if loss > -0.4: position = position - directions[np.argmin(distances)] * 1 + # backprop through the nerf as the gradient step, input is position camera_path.add_camera( keyframe=Keyframe( position=position, From 9a6053127497ab0547bd0c4a36ac453569be2999 Mon Sep 17 00:00:00 2001 From: Gina Wu Date: Fri, 27 Sep 2024 11:09:18 -0700 Subject: [PATCH 10/12] restructure major features out of nerfstudio, minor changes for usability --- nerfstudio/viewer/render_panel.py | 219 +----------------------------- nerfstudio/viewer/viewer.py | 156 +++++++++++++++------ 2 files changed, 122 insertions(+), 253 deletions(-) diff --git a/nerfstudio/viewer/render_panel.py b/nerfstudio/viewer/render_panel.py index 12958732d3..59cd36cf07 100644 --- a/nerfstudio/viewer/render_panel.py +++ b/nerfstudio/viewer/render_panel.py @@ -21,7 +21,7 @@ import threading import time from pathlib import Path -from typing import Dict, List, Literal, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union import numpy as np import splines @@ -33,6 +33,9 @@ from nerfstudio.models.base_model import Model from nerfstudio.viewer.control_panel import ControlPanel +if TYPE_CHECKING: + from viser import GuiInputHandle + @dataclasses.dataclass class Keyframe: @@ -523,7 +526,7 @@ def populate_render_tab( datapath: Path, viewer_model: Model, control_panel: Optional[ControlPanel] = None, -) -> RenderTabState: +) -> Tuple[RenderTabState, GuiInputHandle, GuiInputHandle, GuiInputHandle]: from nerfstudio.viewer.viewer import VISER_NERFSTUDIO_SCALE_RATIO render_tab_state = RenderTabState( @@ -1164,216 +1167,6 @@ def _(event: viser.GuiEvent) -> None: @close_button.on_click def _(_) -> None: modal.close() - - auto_camera_folder = server.gui.add_folder("Automatic Camera Path") - with auto_camera_folder: - click_position = np.array([0.0, 0.0, -5.0]) - select_center_button = server.gui.add_button( - "Select Center", - icon=viser.Icon.CROSSHAIR, - hint="Choose center point to generate camera path around.", - ) - - @select_center_button.on_click - def _(event: viser.GuiEvent) -> None: - select_center_button.disabled = True - - @event.client.scene.on_pointer_event(event_type="click") - def _(event: viser.ScenePointerEvent) -> None: - # Code mostly borrowed from garfield.studio! - import torch - from nerfstudio.cameras.rays import RayBundle - from nerfstudio.field_components.field_heads import FieldHeadNames - from nerfstudio.model_components.losses import scale_gradients_by_distance_squared - - origin = torch.tensor(event.ray_origin).view(1, 3) - direction = torch.tensor(event.ray_direction).view(1, 3) - - # Get intersection - bundle = RayBundle( - origins=origin, - directions=direction, - pixel_area=torch.tensor(0.001).view(1, 1), - camera_indices=torch.tensor(0).view(1, 1), - nears=torch.tensor(0.05).view(1, 1), - fars=torch.tensor(100).view(1, 1), - ).to("cuda") - - # Get the distance/depth to the intersection --> calculate 3D position of the click - ray_samples, _, _ = viewer_model.proposal_sampler(bundle, density_fns=viewer_model.density_fns) - field_outputs = viewer_model.field.forward(ray_samples, compute_normals=viewer_model.config.predict_normals) - if viewer_model.config.use_gradient_scaling: - field_outputs = scale_gradients_by_distance_squared(field_outputs, ray_samples) - weights = ray_samples.get_weights(field_outputs[FieldHeadNames.DENSITY]) - with torch.no_grad(): - depth = viewer_model.renderer_depth(weights=weights, ray_samples=ray_samples) - distance = depth[0, 0].detach().cpu().numpy() - - nonlocal click_position - click_position = np.array(origin + direction * distance).reshape(3,) - - server.scene.add_icosphere( - f"/render_center_pos", - radius=0.1, - color=(200, 10, 30), - position=click_position, - ) - - event.client.scene.remove_pointer_callback() - - @event.client.scene.on_pointer_callback_removed - def _(): - select_center_button.disabled = False - - num_cameras_handle = server.gui.add_number( - label="Number of Cameras", - initial_value=3, - hint="Total number of cameras generated in path, placed equidistant from neighboring ones.", - ) - - radius_handle = server.gui.add_number( - label="Radius", - initial_value=4, - hint="Radius of circular camera path.", - ) - - camera_height_handle = server.gui.add_number( - label="Height", - initial_value=2, - hint="Height of cameras with respect to chosen origin.", - ) - - circular_camera_path_button = server.gui.add_button( - "Generate Circular Camera Path", - icon=viser.Icon.CAMERA, - hint="Automatically generate a circular camera path around selected point.", - ) - - def wxyz_helper(camera_position: np.ndarray) -> np.ndarray: - # Calculates the camera direction from position to click_position - camera_direction = camera_position - click_position - camera_direction = camera_direction / np.linalg.norm(camera_direction) - - global_up = np.array([0.0, 0.0, 1.0]) - - camera_right = np.cross(camera_direction, global_up) - camera_right_norm = np.linalg.norm(camera_right) - if camera_right_norm > 0: - camera_right = camera_right / camera_right_norm - - camera_up = np.cross(camera_right, camera_direction) - - R = np.array([-camera_right, -camera_up, -camera_direction]).T - - w = np.sqrt(1 + R[0, 0] + R[1, 1] + R[2, 2]) / 2 - x = (R[2, 1] - R[1, 2]) / (4 * w) - y = (R[0, 2] - R[2, 0]) / (4 * w) - z = (R[1, 0] - R[0, 1]) / (4 * w) - return np.array([w, x, y, z]) - else: - return np.array([1.0, 0.0, 0.0, 0.0]) - - camera_coords = [] - @circular_camera_path_button.on_click - def _(event: viser.GuiEvent) -> None: - nonlocal click_position, num_cameras_handle, radius_handle, camera_height_handle, camera_coords - num_cameras = num_cameras_handle.value - radius = radius_handle.value - camera_height = camera_height_handle.value - - camera_coords = [] - for i in range(num_cameras): - camera_coords.append(click_position + - np.array([radius * np.cos(2 * np.pi * i / num_cameras), - radius * np.sin(2 * np.pi * i/ num_cameras), - camera_height])) - - fov = event.client.camera.fov - for i, position in enumerate(camera_coords): - camera_path.add_camera( - keyframe=Keyframe( - position=position, - wxyz=wxyz_helper(position), - override_fov_enabled=False, - override_fov_rad=fov, - override_time_enabled=False, - override_time_val=0.0, - aspect=resolution.value[0] / resolution.value[1], - override_transition_enabled=False, - override_transition_sec=None, - ), - keyframe_index = i, - ) - duration_number.value = camera_path.compute_duration() - camera_path.update_spline() - - optimize_button = server.gui.add_button( - "Optimize Camera Path", - icon=viser.Icon.CAMERA, - hint="Optimizes camera path for object avoidance iteratively.", - ) - - @optimize_button.on_click - def _(event: viser.GuiEvent) -> None: - import torch - from nerfstudio.cameras.rays import RayBundle - from nerfstudio.field_components.field_heads import FieldHeadNames - from nerfstudio.model_components.losses import scale_gradients_by_distance_squared - - nonlocal camera_coords - - directions = [[1, 0, 0], - [-1, 0, 0], - [0, 1, 0], - [0, -1, 0], - [0, 0, 1], - [0, 0, -1]] - - for i, position in enumerate(camera_coords): - raylen = 2.0 - origins = torch.tensor(np.tile(position, (6, 1))) - pixel_area = torch.ones_like(origins[..., 0:1]) - camera_indices = torch.zeros_like(origins[..., 0:1]).int() - nears = torch.zeros_like(origins[..., 0:1]) - fars = torch.ones_like(origins[..., 0:1]) * raylen - directions_norm = torch.ones_like(origins[..., 0:1]) - viewer_model.training = False - - bundle = RayBundle( - origins=origins, - directions=torch.tensor(directions), - pixel_area=pixel_area, - camera_indices=camera_indices, - nears=nears, - fars=fars, - metadata={"directions_norm": directions_norm}, - ).to('cuda') - - outputs = viewer_model.get_outputs(bundle) - distances = outputs["expected_depth"].detach().cpu().numpy() - - loss = -min(distances) - if loss > -0.4: - position = position - directions[np.argmin(distances)] * 1 - # backprop through the nerf as the gradient step, input is position - camera_path.add_camera( - keyframe=Keyframe( - position=position, - wxyz=wxyz_helper(position), - override_fov_enabled=False, - override_fov_rad=event.client.camera.fov, - override_time_enabled=False, - override_time_val=0.0, - aspect=resolution.value[0] / resolution.value[1], - override_transition_enabled=False, - override_transition_sec=None, - ), - keyframe_index = i, - ) - duration_number.value = camera_path.compute_duration() - camera_path.update_spline() - - camera_coords[i] = position if control_panel is not None: camera_path = CameraPath(server, duration_number, control_panel._time_enabled) @@ -1383,7 +1176,7 @@ def _(event: viser.GuiEvent) -> None: camera_path.default_fov = fov_degrees.value / 180.0 * np.pi camera_path.default_transition_sec = transition_sec_number.value - return render_tab_state + return render_tab_state, camera_path, duration_number, resolution if __name__ == "__main__": diff --git a/nerfstudio/viewer/viewer.py b/nerfstudio/viewer/viewer.py index 4137dd44ef..1ca514d73b 100644 --- a/nerfstudio/viewer/viewer.py +++ b/nerfstudio/viewer/viewer.py @@ -94,7 +94,9 @@ def __init__( self.include_time = self.pipeline.datamanager.includes_time if self.config.websocket_port is None: - websocket_port = viewer_utils.get_free_port(default_port=self.config.websocket_port_default) + websocket_port = viewer_utils.get_free_port( + default_port=self.config.websocket_port_default + ) else: websocket_port = self.config.websocket_port self.log_filename.parent.mkdir(exist_ok=True) @@ -106,12 +108,16 @@ def __init__( self.train_btn_state: Literal["training", "paused", "completed"] = ( "training" if self.trainer is None else self.trainer.training_state ) - self._prev_train_state: Literal["training", "paused", "completed"] = self.train_btn_state + self._prev_train_state: Literal["training", "paused", "completed"] = ( + self.train_btn_state + ) self.last_move_time = 0 # track the camera index that last being clicked self.current_camera_idx = 0 - self.viser_server = viser.ViserServer(host=config.websocket_host, port=websocket_port) + self.viser_server = viser.ViserServer( + host=config.websocket_host, port=websocket_port + ) # Set the name of the URL either to the share link if available, or the localhost share_url = None if share: @@ -120,15 +126,21 @@ def __init__( print("Couldn't make share URL!") if share_url is not None: - self.viewer_info = [f"Viewer at: http://localhost:{websocket_port} or {share_url}"] + self.viewer_info = [ + f"Viewer at: http://localhost:{websocket_port} or {share_url}" + ] elif config.websocket_host == "0.0.0.0": # 0.0.0.0 is not a real IP address and was confusing people, so # we'll just print localhost instead. There are some security # (and IPv6 compatibility) implications here though, so we should # note that the server is bound to 0.0.0.0! - self.viewer_info = [f"Viewer running locally at: http://localhost:{websocket_port} (listening on 0.0.0.0)"] + self.viewer_info = [ + f"Viewer running locally at: http://localhost:{websocket_port} (listening on 0.0.0.0)" + ] else: - self.viewer_info = [f"Viewer running locally at: http://{config.websocket_host}:{websocket_port}"] + self.viewer_info = [ + f"Viewer running locally at: http://{config.websocket_host}:{websocket_port}" + ] buttons = ( viser.theme.TitlebarButton( @@ -195,8 +207,8 @@ def __init__( self.show_images.visible = False mkdown = self.make_stats_markdown(0, "0x0px") self.stats_markdown = self.viser_server.gui.add_markdown(mkdown) - tabs = self.viser_server.gui.add_tab_group() - control_tab = tabs.add_tab("Control", viser.Icon.SETTINGS) + self.tabs = self.viser_server.gui.add_tab_group() + control_tab = self.tabs.add_tab("Control", viser.Icon.SETTINGS) with control_tab: self.control_panel = ControlPanel( self.viser_server, @@ -209,13 +221,24 @@ def __init__( ) config_path = self.log_filename.parents[0] / "config.yml" - with tabs.add_tab("Render", viser.Icon.CAMERA): - self.render_tab_state = populate_render_tab( - self.viser_server, config_path, self.datapath, self.pipeline.model, self.control_panel + with self.tabs.add_tab("Render", viser.Icon.CAMERA): + ( + self.render_tab_state, + self.camera_path, + self.duration_number, + self.resolution, + ) = populate_render_tab( + self.viser_server, + config_path, + self.datapath, + self.pipeline.model, + self.control_panel, ) - with tabs.add_tab("Export", viser.Icon.PACKAGE_EXPORT): - populate_export_tab(self.viser_server, self.control_panel, config_path, self.pipeline.model) + with self.tabs.add_tab("Export", viser.Icon.PACKAGE_EXPORT): + populate_export_tab( + self.viser_server, self.control_panel, config_path, self.pipeline.model + ) # Keep track of the pointers to generated GUI folders, because each generated folder holds a unique ID. viewer_gui_folders = dict() @@ -225,17 +248,26 @@ def prev_cb_wrapper(prev_cb): # concurrently executing render thread. This may block rendering, however this can be necessary # if the callback uses get_outputs internally. def cb_lock(element): - with self.train_lock if self.train_lock is not None else contextlib.nullcontext(): + with ( + self.train_lock + if self.train_lock is not None + else contextlib.nullcontext() + ): prev_cb(element) return cb_lock - def nested_folder_install(folder_labels: List[str], prev_labels: List[str], element: ViewerElement): + def nested_folder_install( + folder_labels: List[str], prev_labels: List[str], element: ViewerElement + ): if len(folder_labels) == 0: element.install(self.viser_server) # also rewire the hook to rerender prev_cb = element.cb_hook - element.cb_hook = lambda element: [prev_cb_wrapper(prev_cb)(element), self._trigger_rerender()] + element.cb_hook = lambda element: [ + prev_cb_wrapper(prev_cb)(element), + self._trigger_rerender(), + ] else: # recursively create folders # If the folder name is "Custom Elements/a/b", then: @@ -251,12 +283,18 @@ def nested_folder_install(folder_labels: List[str], prev_labels: List[str], elem # Otherwise, use the existing folder as context manager. folder_path = "/".join(prev_labels + [folder_labels[0]]) if folder_path not in viewer_gui_folders: - viewer_gui_folders[folder_path] = self.viser_server.gui.add_folder(folder_labels[0]) + viewer_gui_folders[folder_path] = self.viser_server.gui.add_folder( + folder_labels[0] + ) with viewer_gui_folders[folder_path]: - nested_folder_install(folder_labels[1:], prev_labels + [folder_labels[0]], element) + nested_folder_install( + folder_labels[1:], prev_labels + [folder_labels[0]], element + ) with control_tab: - from nerfstudio.viewer_legacy.server.viewer_elements import ViewerElement as LegacyViewerElement + from nerfstudio.viewer_legacy.server.viewer_elements import ( + ViewerElement as LegacyViewerElement, + ) if len(parse_object(pipeline, LegacyViewerElement, "Custom Elements")) > 0: from nerfstudio.utils.rich_utils import CONSOLE @@ -266,7 +304,9 @@ def nested_folder_install(folder_labels: List[str], prev_labels: List[str], elem style="bold yellow", ) self.viewer_elements = [] - self.viewer_elements.extend(parse_object(pipeline, ViewerElement, "Custom Elements")) + self.viewer_elements.extend( + parse_object(pipeline, ViewerElement, "Custom Elements") + ) for param_path, element in self.viewer_elements: folder_labels = param_path.split("/")[:-1] nested_folder_install(folder_labels, [], element) @@ -283,7 +323,8 @@ def nested_folder_install(folder_labels: List[str], prev_labels: List[str], elem if isinstance(pipeline.model, SplatfactoModel): self.viser_server.scene.add_point_cloud( "/gaussian_splatting_initial_points", - points=pipeline.model.means.numpy(force=True) * VISER_NERFSTUDIO_SCALE_RATIO, + points=pipeline.model.means.numpy(force=True) + * VISER_NERFSTUDIO_SCALE_RATIO, colors=(255, 0, 0), point_size=0.01, point_shape="circle", @@ -318,7 +359,10 @@ def get_camera_state(self, client: viser.ClientHandle) -> CameraState: R = vtf.SO3(wxyz=client.camera.wxyz) R = R @ vtf.SO3.from_x_radians(np.pi) R = torch.tensor(R.as_matrix()) - pos = torch.tensor(client.camera.position, dtype=torch.float64) / VISER_NERFSTUDIO_SCALE_RATIO + pos = ( + torch.tensor(client.camera.position, dtype=torch.float64) + / VISER_NERFSTUDIO_SCALE_RATIO + ) c2w = torch.concatenate([R, pos[:, None]], dim=1) if self.ready and self.render_tab_state.preview_render: camera_type = self.render_tab_state.preview_camera_type @@ -327,13 +371,19 @@ def get_camera_state(self, client: viser.ClientHandle) -> CameraState: aspect=self.render_tab_state.preview_aspect, c2w=c2w, time=self.render_tab_state.preview_time, - camera_type=CameraType.PERSPECTIVE - if camera_type == "Perspective" - else CameraType.FISHEYE - if camera_type == "Fisheye" - else CameraType.EQUIRECTANGULAR - if camera_type == "Equirectangular" - else assert_never(camera_type), + camera_type=( + CameraType.PERSPECTIVE + if camera_type == "Perspective" + else ( + CameraType.FISHEYE + if camera_type == "Fisheye" + else ( + CameraType.EQUIRECTANGULAR + if camera_type == "Equirectangular" + else assert_never(camera_type) + ) + ) + ), idx=self.current_camera_idx, ) else: @@ -351,7 +401,9 @@ def handle_disconnect(self, client: viser.ClientHandle) -> None: self.render_statemachines.pop(client.client_id) def handle_new_client(self, client: viser.ClientHandle) -> None: - self.render_statemachines[client.client_id] = RenderStateMachine(self, VISER_NERFSTUDIO_SCALE_RATIO, client) + self.render_statemachines[client.client_id] = RenderStateMachine( + self, VISER_NERFSTUDIO_SCALE_RATIO, client + ) self.render_statemachines[client.client_id].start() @client.camera.on_update @@ -361,7 +413,9 @@ def _(_: viser.CameraHandle) -> None: self.last_move_time = time.time() with self.viser_server.atomic(): camera_state = self.get_camera_state(client) - self.render_statemachines[client.client_id].action(RenderAction("move", camera_state)) + self.render_statemachines[client.client_id].action( + RenderAction("move", camera_state) + ) def set_camera_visibility(self, visible: bool) -> None: """Toggle the visibility of the training cameras.""" @@ -382,15 +436,23 @@ def update_camera_poses(self): idxs = list(self.camera_handles.keys()) with torch.no_grad(): assert isinstance(camera_optimizer, CameraOptimizer) - c2ws_delta = camera_optimizer(torch.tensor(idxs, device=camera_optimizer.device)).cpu().numpy() + c2ws_delta = ( + camera_optimizer(torch.tensor(idxs, device=camera_optimizer.device)) + .cpu() + .numpy() + ) for i, key in enumerate(idxs): # both are numpy arrays c2w_orig = self.original_c2w[key] c2w_delta = c2ws_delta[i, ...] - c2w = c2w_orig @ np.concatenate((c2w_delta, np.array([[0, 0, 0, 1]])), axis=0) + c2w = c2w_orig @ np.concatenate( + (c2w_delta, np.array([[0, 0, 0, 1]])), axis=0 + ) R = vtf.SO3.from_matrix(c2w[:3, :3]) # type: ignore R = R @ vtf.SO3.from_x_radians(np.pi) - self.camera_handles[key].position = c2w[:3, 3] * VISER_NERFSTUDIO_SCALE_RATIO + self.camera_handles[key].position = ( + c2w[:3, 3] * VISER_NERFSTUDIO_SCALE_RATIO + ) self.camera_handles[key].wxyz = R.wxyz def _trigger_rerender(self) -> None: @@ -430,7 +492,9 @@ def _pick_drawn_image_idxs(self, total_num: int) -> list[int]: else: num_display_images = min(self.config.max_num_display_images, total_num) # draw indices, roughly evenly spaced - return np.linspace(0, total_num - 1, num_display_images, dtype=np.int32).tolist() + return np.linspace( + 0, total_num - 1, num_display_images, dtype=np.int32 + ).tolist() def init_scene( self, @@ -474,7 +538,9 @@ def init_scene( ) def create_on_click_callback(capture_idx): - def on_click_callback(event: viser.SceneNodePointerEvent[viser.CameraFrustumHandle]) -> None: + def on_click_callback( + event: viser.SceneNodePointerEvent[viser.CameraFrustumHandle], + ) -> None: with event.client.atomic(): event.client.camera.position = event.target.position event.client.camera.wxyz = event.target.wxyz @@ -504,12 +570,18 @@ def update_scene(self, step: int, num_rays_per_batch: Optional[int] = None) -> N # this stops training while moving to make the response smoother while time.time() - self.last_move_time < 0.1: time.sleep(0.05) - if self.trainer is not None and self.trainer.training_state == "training" and self.train_util != 1: + if ( + self.trainer is not None + and self.trainer.training_state == "training" + and self.train_util != 1 + ): if ( EventName.TRAIN_RAYS_PER_SEC.value in GLOBAL_BUFFER["events"] and EventName.VIS_RAYS_PER_SEC.value in GLOBAL_BUFFER["events"] ): - train_s = GLOBAL_BUFFER["events"][EventName.TRAIN_RAYS_PER_SEC.value]["avg"] + train_s = GLOBAL_BUFFER["events"][EventName.TRAIN_RAYS_PER_SEC.value][ + "avg" + ] vis_s = GLOBAL_BUFFER["events"][EventName.VIS_RAYS_PER_SEC.value]["avg"] train_util = self.train_util vis_n = self.control_panel.max_res**2 @@ -517,7 +589,9 @@ def update_scene(self, step: int, num_rays_per_batch: Optional[int] = None) -> N train_time = train_n / train_s vis_time = vis_n / vis_s - render_freq = train_util * vis_time / (train_time - train_util * train_time) + render_freq = ( + train_util * vis_time / (train_time - train_util * train_time) + ) else: render_freq = 30 if step > self.last_step + render_freq: @@ -526,7 +600,9 @@ def update_scene(self, step: int, num_rays_per_batch: Optional[int] = None) -> N for id in clients: camera_state = self.get_camera_state(clients[id]) if camera_state is not None: - self.render_statemachines[id].action(RenderAction("step", camera_state)) + self.render_statemachines[id].action( + RenderAction("step", camera_state) + ) self.update_camera_poses() self.update_step(step) From cea20a73f33aa754a37d1a4cc85e3bf829275107 Mon Sep 17 00:00:00 2001 From: Gina Wu Date: Fri, 27 Sep 2024 11:25:10 -0700 Subject: [PATCH 11/12] minor changes --- nerfstudio/viewer/render_panel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/viewer/render_panel.py b/nerfstudio/viewer/render_panel.py index 59cd36cf07..cf05c59c6c 100644 --- a/nerfstudio/viewer/render_panel.py +++ b/nerfstudio/viewer/render_panel.py @@ -526,7 +526,7 @@ def populate_render_tab( datapath: Path, viewer_model: Model, control_panel: Optional[ControlPanel] = None, -) -> Tuple[RenderTabState, GuiInputHandle, GuiInputHandle, GuiInputHandle]: +) -> Tuple[RenderTabState, CameraPath, GuiInputHandle, GuiInputHandle]: from nerfstudio.viewer.viewer import VISER_NERFSTUDIO_SCALE_RATIO render_tab_state = RenderTabState( From 8bed243a33b0d89df6d51d5e8594b4022176050c Mon Sep 17 00:00:00 2001 From: Gina Wu Date: Thu, 10 Oct 2024 03:01:19 -0700 Subject: [PATCH 12/12] init keyframe value capture for image embeddings --- nerfstudio/viewer/render_panel.py | 53 +++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/nerfstudio/viewer/render_panel.py b/nerfstudio/viewer/render_panel.py index cf05c59c6c..288a9b2e2b 100644 --- a/nerfstudio/viewer/render_panel.py +++ b/nerfstudio/viewer/render_panel.py @@ -20,6 +20,8 @@ import json import threading import time +import torch + from pathlib import Path from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union @@ -30,7 +32,10 @@ import viser.transforms as tf from scipy import interpolate +from nerfstudio.cameras.cameras import Cameras, CameraType, RayBundle +from nerfstudio.cameras.camera_utils import quaternion_matrix from nerfstudio.models.base_model import Model +from nerfstudio.utils import colormaps from nerfstudio.viewer.control_panel import ControlPanel if TYPE_CHECKING: @@ -66,9 +71,14 @@ def from_camera(camera: viser.CameraHandle, aspect: float) -> Keyframe: class CameraPath: def __init__( - self, server: viser.ViserServer, duration_element: viser.GuiInputHandle[float], time_enabled: bool = False + self, + server: viser.ViserServer, + duration_element: viser.GuiInputHandle[float], + viewer_model: Model, + time_enabled: bool = False ): self._server = server + self._model = viewer_model self._keyframes: Dict[int, Tuple[Keyframe, viser.CameraFrustumHandle]] = {} self._keyframe_counter: int = 0 self._spline_nodes: List[viser.SceneNodeHandle] = [] @@ -167,6 +177,7 @@ def _(_) -> None: delete_button = server.gui.add_button("Delete", color="red", icon=viser.Icon.TRASH) go_to_button = server.gui.add_button("Go to") + capture_button = server.gui.add_button("Capture") close_button = server.gui.add_button("Close") @override_fov.on_update @@ -232,6 +243,42 @@ def _(event: viser.GuiEvent) -> None: client.camera.wxyz = T_world_set.rotation().wxyz client.camera.position = T_world_set.translation() time.sleep(1.0 / 30.0) + + @capture_button.on_click + def _(event: viser.GuiEvent) -> None: + R = quaternion_matrix(keyframe.wxyz) + forward_vector = np.array([0, 0, 1, 1]) + direction = R @ forward_vector + direction = [direction[:3]] + + image_height = 607.0 + image_width = 1060.0 + + from nerfstudio.viewer.viewer import VISER_NERFSTUDIO_SCALE_RATIO + + c2w = tf.SE3.from_rotation_and_translation( + tf.SO3(keyframe.wxyz) @ tf.SO3.from_x_radians(np.pi), + keyframe.position / VISER_NERFSTUDIO_SCALE_RATIO, + ).as_matrix() + c2w = torch.tensor(c2w[:3]) + + camera = Cameras( + fx=image_width / 2, + fy=image_height, + cx=image_width / 2, + cy=image_height / 2, + camera_to_worlds=c2w, + camera_type=CameraType.PERSPECTIVE, + times=None, + ) + + self._model.training = False + outputs = self._model.get_outputs_for_camera(camera) + + from PIL import Image + _im = outputs['rgb'].detach().numpy() + im = Image.fromarray((_im * 255).astype(np.uint8)) + im.save("capture.png") @close_button.on_click def _(_) -> None: @@ -1169,9 +1216,9 @@ def _(_) -> None: modal.close() if control_panel is not None: - camera_path = CameraPath(server, duration_number, control_panel._time_enabled) + camera_path = CameraPath(server, duration_number, viewer_model, control_panel._time_enabled) else: - camera_path = CameraPath(server, duration_number) + camera_path = CameraPath(server, duration_number, viewer_model) camera_path.tension = tension_slider.value camera_path.default_fov = fov_degrees.value / 180.0 * np.pi camera_path.default_transition_sec = transition_sec_number.value