From 70ade42e46f3dcf0e89e4efc445650f6d6525673 Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Tue, 10 Oct 2023 17:11:36 -0700 Subject: [PATCH 01/38] fix jittering in markdown in viewer beta --- nerfstudio/viewer_beta/control_panel.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py index 07f556dd46..d93d335468 100644 --- a/nerfstudio/viewer_beta/control_panel.py +++ b/nerfstudio/viewer_beta/control_panel.py @@ -270,10 +270,9 @@ def update_step(self, step): step: the train step to set the model to """ with self.viser_server.atomic(), self.stat_folder: - # TODO change to a .value call instead of remove() and add, this makes it jittery with self.viser_server.atomic(): self.markdown.remove() - self.markdown = self.viser_server.add_gui_markdown(f"Step: {step}") + self.markdown.content = f"Step: {step}" def update_output_options(self, new_options: List[str]): """ From 622342cdceaa2b01ffc7518c2bf8542c687f5a23 Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Tue, 10 Oct 2023 17:12:21 -0700 Subject: [PATCH 02/38] Revert "fix jittering in markdown in viewer beta" This reverts commit 70ade42e46f3dcf0e89e4efc445650f6d6525673. --- nerfstudio/viewer_beta/control_panel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py index d93d335468..07f556dd46 100644 --- a/nerfstudio/viewer_beta/control_panel.py +++ b/nerfstudio/viewer_beta/control_panel.py @@ -270,9 +270,10 @@ def update_step(self, step): step: the train step to set the model to """ with self.viser_server.atomic(), self.stat_folder: + # TODO change to a .value call instead of remove() and add, this makes it jittery with self.viser_server.atomic(): self.markdown.remove() - self.markdown.content = f"Step: {step}" + self.markdown = self.viser_server.add_gui_markdown(f"Step: {step}") def update_output_options(self, new_options: List[str]): """ From 8a08740aedeac29d2ca168b900f79c2235275864 Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Sun, 29 Oct 2023 16:31:59 -0700 Subject: [PATCH 03/38] print correctly formatted url in banner for viewer beta --- nerfstudio/viewer_beta/viewer.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py index ae8ce3ae77..2ba3693b83 100644 --- a/nerfstudio/viewer_beta/viewer.py +++ b/nerfstudio/viewer_beta/viewer.py @@ -95,8 +95,6 @@ def __init__( websocket_port = self.config.websocket_port self.log_filename.parent.mkdir(exist_ok=True) - self.viewer_url = viewer_utils.get_viewer_url(websocket_port) - # viewer specific variables self.output_type_changed = True self.output_split_type_changed = True @@ -106,6 +104,19 @@ def __init__( self.last_move_time = 0 self.viser_server = viser.ViserServer(host=config.websocket_host, port=websocket_port, share=share) + # Set the name of the URL either to the share link if available, or the localhost + if share: + assert self.viser_server._share_tunnel is not None + while self.viser_server._share_tunnel._shared_state["status"] == "connecting": + # wait for connection before grabbing URL + time.sleep(0.01) + url_maybe = self.viser_server._share_tunnel.get_url() + if url_maybe is not None: + self.viewer_url = url_maybe + else: + self.viewer_url = f"http://{config.websocket_host}:{websocket_port}" + else: + self.viewer_url = f"http://{config.websocket_host}:{websocket_port}" buttons = ( viser.theme.TitlebarButton( text="Getting Started", From 09cc1d12a256377dab6c7c8fb532585fdf5dd45b Mon Sep 17 00:00:00 2001 From: xucr Date: Sun, 24 Dec 2023 20:07:57 -0800 Subject: [PATCH 04/38] fix the bug when camera.distortion_params is None --- nerfstudio/data/datamanagers/full_images_datamanager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py index 35837f05a1..7d401e1ea3 100644 --- a/nerfstudio/data/datamanagers/full_images_datamanager.py +++ b/nerfstudio/data/datamanagers/full_images_datamanager.py @@ -129,6 +129,7 @@ def cache_images(self, cache_images_option): camera = self.train_dataset.cameras[i].reshape(()) K = camera.get_intrinsics_matrices().numpy() if camera.distortion_params is None: + cached_train.append(data) continue distortion_params = camera.distortion_params.numpy() image = data["image"].numpy() @@ -203,6 +204,7 @@ def cache_images(self, cache_images_option): camera = self.eval_dataset.cameras[i].reshape(()) K = camera.get_intrinsics_matrices().numpy() if camera.distortion_params is None: + cached_eval.append(data) continue distortion_params = camera.distortion_params.numpy() image = data["image"].numpy() From a4e640b274ed057a3754d596114d4fa839ea840f Mon Sep 17 00:00:00 2001 From: xucr Date: Sun, 24 Dec 2023 22:37:13 -0800 Subject: [PATCH 05/38] Handle background color override when using blender. --- .../data/datamanagers/full_images_datamanager.py | 4 ++-- nerfstudio/data/dataparsers/blender_dataparser.py | 11 ++++++----- nerfstudio/model_components/renderers.py | 3 +-- nerfstudio/models/gaussian_splatting.py | 12 ++++++------ 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py index 7d401e1ea3..333a8d4c79 100644 --- a/nerfstudio/data/datamanagers/full_images_datamanager.py +++ b/nerfstudio/data/datamanagers/full_images_datamanager.py @@ -60,7 +60,7 @@ class FullImageDatamanagerConfig(DataManagerConfig): eval_image_indices: Optional[Tuple[int, ...]] = (0,) """Specifies the image indices to use during eval; if None, uses all.""" cache_images: Literal["no-cache", "cpu", "gpu"] = "cpu" - """Whether to cache images in memory. If "numpy", caches as numpy arrays, if "torch", caches as torch tensors.""" + """Whether to cache images in memory. If "gpu", images are cached on the GPU.""" class FullImageDatamanager(DataManager, Generic[TDataset]): @@ -104,7 +104,7 @@ def __init__( self.train_dataset = self.create_train_dataset() self.eval_dataset = self.create_eval_dataset() if len(self.train_dataset) > 500 and self.config.cache_images == "gpu": - CONSOLE.print("Train dataset has over 500 images, overriding cach_images to cpu", style="bold yellow") + CONSOLE.print("Train dataset has over 500 images, overriding cache_images to cpu", style="bold yellow") self.config.cache_images = "cpu" self.cached_train, self.cached_eval = self.cache_images(self.config.cache_images) self.exclude_batch_keys_from_device = self.train_dataset.exclude_batch_keys_from_device diff --git a/nerfstudio/data/dataparsers/blender_dataparser.py b/nerfstudio/data/dataparsers/blender_dataparser.py index 9d6524d4e0..0a207c9694 100644 --- a/nerfstudio/data/dataparsers/blender_dataparser.py +++ b/nerfstudio/data/dataparsers/blender_dataparser.py @@ -32,6 +32,7 @@ from nerfstudio.data.scene_box import SceneBox from nerfstudio.utils.colors import get_color from nerfstudio.utils.io import load_from_json +from nerfstudio.model_components.renderers import background_color_override_context @dataclass @@ -61,13 +62,13 @@ def __init__(self, config: BlenderDataParserConfig): self.data: Path = config.data self.scale_factor: float = config.scale_factor self.alpha_color = config.alpha_color - - def _generate_dataparser_outputs(self, split="train"): if self.alpha_color is not None: - alpha_color_tensor = get_color(self.alpha_color) + self.alpha_color_tensor = get_color(self.alpha_color) + background_color_override_context(self.alpha_color_tensor) else: - alpha_color_tensor = None + self.alpha_color_tensor = None + def _generate_dataparser_outputs(self, split="train"): meta = load_from_json(self.data / f"transforms_{split}.json") image_filenames = [] poses = [] @@ -102,7 +103,7 @@ def _generate_dataparser_outputs(self, split="train"): dataparser_outputs = DataparserOutputs( image_filenames=image_filenames, cameras=cameras, - alpha_color=alpha_color_tensor, + alpha_color=self.alpha_color_tensor, scene_box=scene_box, dataparser_scale=self.scale_factor, ) diff --git a/nerfstudio/model_components/renderers.py b/nerfstudio/model_components/renderers.py index 1fde0d693c..32329b2aca 100644 --- a/nerfstudio/model_components/renderers.py +++ b/nerfstudio/model_components/renderers.py @@ -50,8 +50,7 @@ def background_color_override_context(mode: Float[Tensor, "3"]) -> Generator[Non old_background_color = BACKGROUND_COLOR_OVERRIDE try: BACKGROUND_COLOR_OVERRIDE = mode - yield - finally: + except: BACKGROUND_COLOR_OVERRIDE = old_background_color diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py index 91686b44a2..ce35cd20a1 100644 --- a/nerfstudio/models/gaussian_splatting.py +++ b/nerfstudio/models/gaussian_splatting.py @@ -533,14 +533,14 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: if self.training: # currently relies on the branch vickie/camera-grads self.camera_optimizer.apply_to_camera(camera) - if self.training: + # get the background color + if renderers.BACKGROUND_COLOR_OVERRIDE is not None: + background = renderers.BACKGROUND_COLOR_OVERRIDE.to(self.device) + elif self.training: background = torch.rand(3, device=self.device) else: - # logic for setting the background of the scene - if renderers.BACKGROUND_COLOR_OVERRIDE is not None: - background = renderers.BACKGROUND_COLOR_OVERRIDE - else: - background = self.back_color.to(self.device) + background = self.back_color.to(self.device) + if self.crop_box is not None and not self.training: crop_ids = self.crop_box.within(self.means).squeeze() if crop_ids.sum() == 0: From bd07a71977caf748d230ed9f8f97112eea8ae158 Mon Sep 17 00:00:00 2001 From: xucr Date: Sun, 24 Dec 2023 22:44:44 -0800 Subject: [PATCH 06/38] fix bare except --- nerfstudio/model_components/renderers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nerfstudio/model_components/renderers.py b/nerfstudio/model_components/renderers.py index 32329b2aca..3492c94681 100644 --- a/nerfstudio/model_components/renderers.py +++ b/nerfstudio/model_components/renderers.py @@ -50,8 +50,10 @@ def background_color_override_context(mode: Float[Tensor, "3"]) -> Generator[Non old_background_color = BACKGROUND_COLOR_OVERRIDE try: BACKGROUND_COLOR_OVERRIDE = mode - except: + yield + except Exception: BACKGROUND_COLOR_OVERRIDE = old_background_color + raise (Exception) class RGBRenderer(nn.Module): From 2fcd55617c8e6b272c0e6dd06fa64f5e4c908188 Mon Sep 17 00:00:00 2001 From: xucr Date: Sun, 24 Dec 2023 22:53:29 -0800 Subject: [PATCH 07/38] format --- nerfstudio/models/gaussian_splatting.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py index ce35cd20a1..850105543a 100644 --- a/nerfstudio/models/gaussian_splatting.py +++ b/nerfstudio/models/gaussian_splatting.py @@ -536,11 +536,12 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: # get the background color if renderers.BACKGROUND_COLOR_OVERRIDE is not None: background = renderers.BACKGROUND_COLOR_OVERRIDE.to(self.device) - elif self.training: - background = torch.rand(3, device=self.device) else: - background = self.back_color.to(self.device) - + if self.training: + background = torch.rand(3, device=self.device) + else: + background = self.back_color.to(self.device) + if self.crop_box is not None and not self.training: crop_ids = self.crop_box.within(self.means).squeeze() if crop_ids.sum() == 0: From 00d10a170b267ab3a546349f6a972ec56e0b4739 Mon Sep 17 00:00:00 2001 From: xucr Date: Mon, 25 Dec 2023 00:50:54 -0800 Subject: [PATCH 08/38] Update background color override in Blender dataparser --- nerfstudio/data/dataparsers/blender_dataparser.py | 4 ++-- nerfstudio/model_components/renderers.py | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/nerfstudio/data/dataparsers/blender_dataparser.py b/nerfstudio/data/dataparsers/blender_dataparser.py index 0a207c9694..d8172579c7 100644 --- a/nerfstudio/data/dataparsers/blender_dataparser.py +++ b/nerfstudio/data/dataparsers/blender_dataparser.py @@ -32,7 +32,7 @@ from nerfstudio.data.scene_box import SceneBox from nerfstudio.utils.colors import get_color from nerfstudio.utils.io import load_from_json -from nerfstudio.model_components.renderers import background_color_override_context +from nerfstudio.model_components.renderers import force_background_color_override @dataclass @@ -64,7 +64,7 @@ def __init__(self, config: BlenderDataParserConfig): self.alpha_color = config.alpha_color if self.alpha_color is not None: self.alpha_color_tensor = get_color(self.alpha_color) - background_color_override_context(self.alpha_color_tensor) + force_background_color_override(self.alpha_color_tensor) else: self.alpha_color_tensor = None diff --git a/nerfstudio/model_components/renderers.py b/nerfstudio/model_components/renderers.py index 3492c94681..bbcca06624 100644 --- a/nerfstudio/model_components/renderers.py +++ b/nerfstudio/model_components/renderers.py @@ -51,9 +51,14 @@ def background_color_override_context(mode: Float[Tensor, "3"]) -> Generator[Non try: BACKGROUND_COLOR_OVERRIDE = mode yield - except Exception: + finally: BACKGROUND_COLOR_OVERRIDE = old_background_color - raise (Exception) + + +def force_background_color_override(mode: Float[Tensor, "3"]) -> None: + """Force background color override.""" + global BACKGROUND_COLOR_OVERRIDE + BACKGROUND_COLOR_OVERRIDE = mode class RGBRenderer(nn.Module): From 2158946580a43ca40645c634ad52b93c665facf4 Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Thu, 7 Dec 2023 17:05:28 -0500 Subject: [PATCH 09/38] Add ability to download EyefulTower dataset --- nerfstudio/scripts/downloads/download_data.py | 118 ++++++++++++++++-- pyproject.toml | 4 +- 2 files changed, 110 insertions(+), 12 deletions(-) diff --git a/nerfstudio/scripts/downloads/download_data.py b/nerfstudio/scripts/downloads/download_data.py index 9560beba8d..1be76d6c7b 100644 --- a/nerfstudio/scripts/downloads/download_data.py +++ b/nerfstudio/scripts/downloads/download_data.py @@ -22,15 +22,16 @@ import zipfile from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING, Union +from typing import TYPE_CHECKING, Tuple, Union +import awscli.clidriver import gdown import torch import tyro -from nerfstudio.process_data import process_data_utils from typing_extensions import Annotated from nerfstudio.configs.base_config import PrintableConfig +from nerfstudio.process_data import process_data_utils from nerfstudio.utils import install_checks from nerfstudio.utils.scripts import run_command @@ -545,6 +546,108 @@ def download(self, save_dir: Path) -> None: shutil.rmtree(target_path / "val") +eyefultower_downloads = [ + "all", + "apartment", + "kitchen", + "office1a", + "office1b", + "office2", + "office_view1", + "office_view2", + "riverview", + "seating_area", + "table", + "workshop", +] + +eyefultower_resolutions = { + "all": None, + "jpeg_2k": "images-jpeg-2k", + "jpeg_4k": "images-jpeg-4k", + "jpeg_8k": "images-jpeg", + "exr_2k": "images-2k", +} + +if TYPE_CHECKING: + EyefulTowerCaptureName = str + EyefulTowerResolution = str +else: + EyefulTowerCaptureName = tyro.extras.literal_type_from_choices(eyefultower_downloads) + EyefulTowerResolution = tyro.extras.literal_type_from_choices(eyefultower_resolutions.keys()) + + +@dataclass +class EyefulTowerDownload(DatasetDownload): + """Download the EyefulTower dataset. + + Use the --help flag with the `eyefultower` subcommand to see all available datasets. + Find more information about the dataset at https://github.com/facebookresearch/EyefulTower. + """ + + capture_name: Tuple[EyefulTowerCaptureName, ...] = () + resolution_name: Tuple[EyefulTowerResolution, ...] = () + + def download(self, save_dir: Path): + if len(self.capture_name) == 0: + self.capture_name = ("seating_area",) + print( + f"No capture specified, using {self.capture_name} by default.", + "Add `--help` to this command to see all available captures.", + ) + + if len(self.resolution_name) == 0: + self.resolution_name = ("jpeg_2k",) + print( + f"No resolution specified, using {self.resolution_name} by default.", + "Add `--help` to this command to see all available resolutions.", + ) + + captures = set() + for capture in self.capture_name: + if capture == "all": + captures.update([c for c in eyefultower_downloads if c != "all"]) + else: + captures.add(capture) + captures = sorted(captures) + if len(captures) == 0: + print("WARNING: No EyefulTower captures specified. Nothing will be downloaded.") + + resolutions = set() + for resolution in self.resolution_name: + if resolution == "all": + resolutions.update([r for r in eyefultower_resolutions.keys() if r != "all"]) + else: + resolutions.add(resolution) + resolutions = sorted(resolutions) + if len(resolutions) == 0: + print("WARNING: No EyefulTower resolutions specified. Nothing will be downloaded.") + + driver = awscli.clidriver.create_clidriver() + + for i, capture in enumerate(captures): + base_url = f"s3://fb-baas-f32eacb9-8abb-11eb-b2b8-4857dd089e15/EyefulTower/{capture}/" + output_path = save_dir / "eyefultower" / capture + includes = [] + for resolution in resolutions: + includes.extend(["--include", f"{eyefultower_resolutions[resolution]}/*"]) + command = ( + ["s3", "sync", "--no-sign-request", "--only-show-errors", "--exclude", "images*/*"] + + includes + + [base_url, str(output_path)] + ) + print( + f"[Capture {i+1: >2d}/{len(captures)}]:", + f"Downloading resolutions {resolutions} from EyefulTower capture '{capture}'", + f"to '{output_path.resolve()}' with command `aws {' '.join(command)}`", + "...", + end=" ", + flush=True, + ) + driver.main(command) + print("done!") + + Commands = Union[ Annotated[BlenderDownload, tyro.conf.subcommand(name="blender")], Annotated[Sitcoms3DDownload, tyro.conf.subcommand(name="sitcoms3d")], @@ -555,6 +658,7 @@ def download(self, save_dir: Path) -> None: Annotated[SDFstudioDemoDownload, tyro.conf.subcommand(name="sdfstudio")], Annotated[NeRFOSRDownload, tyro.conf.subcommand(name="nerfosr")], Annotated[Mill19Download, tyro.conf.subcommand(name="mill19")], + Annotated[EyefulTowerDownload, tyro.conf.subcommand(name="eyefultower")], ] @@ -562,15 +666,7 @@ def main( dataset: DatasetDownload, ): """Script to download existing datasets. - We currently support the following datasets: - - nerfstudio: Growing collection of real-world scenes. Use the `capture_name` argument to specify - which capture to download. - - blender: Blender synthetic scenes realeased with NeRF. - - sitcoms3d: Friends TV show scenes. - - record3d: Record3d dataset. - - dnerf: D-NeRF dataset. - - phototourism: PhotoTourism dataset. Use the `capture_name` argument to specify which capture to download. - - mill19: Mill 19 dataset. Use the `capture_name` argument to specify which capture to download. + We currently support the datasets listed above in the Commands. Args: dataset: The dataset to download (from). diff --git a/pyproject.toml b/pyproject.toml index cb432a1a6b..85611102a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ classifiers = [ "Programming Language :: Python", ] dependencies = [ + "awscli>=1.31.10", "appdirs>=1.4", "av>=9.2.0", "comet_ml>=3.33.8", @@ -92,7 +93,8 @@ dev = [ "typeguard==2.13.3", "ruff==0.0.267", "sshconf==0.2.5", - "pycolmap>=0.3.0", # NOTE: pycolmap==0.3.0 is not available on newer python versions + # TODO(1480) enable when pycolmap windows wheels are available + # "pycolmap>=0.3.0", # NOTE: pycolmap==0.3.0 is not available on newer python versions "diffusers==0.16.1", "opencv-stubs==0.0.7", "transformers==4.29.2", From ba5ce12d4d87710c72db6099712c486c85290f41 Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Fri, 8 Dec 2023 14:47:28 -0500 Subject: [PATCH 10/38] wip before I copy linning's stuff in --- nerfstudio/process_data/metashape_utils.py | 2 +- nerfstudio/process_data/process_data_utils.py | 4 ++-- nerfstudio/scripts/process_data.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/nerfstudio/process_data/metashape_utils.py b/nerfstudio/process_data/metashape_utils.py index 8bea8707cb..fad2445ed0 100644 --- a/nerfstudio/process_data/metashape_utils.py +++ b/nerfstudio/process_data/metashape_utils.py @@ -17,7 +17,7 @@ import json import xml.etree.ElementTree as ET from pathlib import Path -from typing import Dict, List +from typing import Dict, List, Optional import numpy as np diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py index c1946305b3..76239d0205 100644 --- a/nerfstudio/process_data/process_data_utils.py +++ b/nerfstudio/process_data/process_data_utils.py @@ -61,12 +61,12 @@ def list_images(data: Path) -> List[Path]: """Lists all supported images in a directory Args: - data: Path to the directory of images. + data: Path to the directory of images. Nested folders are searched as well. Returns: Paths to images contained in the directory """ allowed_exts = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] + ALLOWED_RAW_EXTS - image_paths = sorted([p for p in data.glob("[!.]*") if p.suffix.lower() in allowed_exts]) + image_paths = sorted([p for p in data.glob("**/[!.]*") if p.suffix.lower() in allowed_exts]) return image_paths diff --git a/nerfstudio/scripts/process_data.py b/nerfstudio/scripts/process_data.py index 2c2cd7a381..bbe3b635af 100644 --- a/nerfstudio/scripts/process_data.py +++ b/nerfstudio/scripts/process_data.py @@ -20,7 +20,7 @@ import zipfile from dataclasses import dataclass from pathlib import Path -from typing import Union +from typing import Optional, Union import numpy as np import tyro @@ -28,11 +28,11 @@ from nerfstudio.process_data import ( metashape_utils, + odm_utils, polycam_utils, process_data_utils, realitycapture_utils, record3d_utils, - odm_utils, ) from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import BaseConverterToNerfstudioDataset from nerfstudio.process_data.images_to_nerfstudio_dataset import ImagesToNerfstudioDataset From f95b4eb99728fb209f5a128334dc8d525721919f Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Fri, 8 Dec 2023 19:15:29 -0500 Subject: [PATCH 11/38] Generate per-resolution cameras.xml --- nerfstudio/scripts/downloads/download_data.py | 104 ++++++++++++++++-- 1 file changed, 94 insertions(+), 10 deletions(-) diff --git a/nerfstudio/scripts/downloads/download_data.py b/nerfstudio/scripts/downloads/download_data.py index 1be76d6c7b..aff58e97d1 100644 --- a/nerfstudio/scripts/downloads/download_data.py +++ b/nerfstudio/scripts/downloads/download_data.py @@ -15,10 +15,12 @@ """Download datasets and specific captures from the datasets.""" from __future__ import annotations +import copy import json import os import shutil import tarfile +import xml.etree.ElementTree as ET import zipfile from dataclasses import dataclass from pathlib import Path @@ -561,12 +563,20 @@ def download(self, save_dir: Path) -> None: "workshop", ] + +@dataclass +class EyefulTowerResolutionMetadata: + folder_name: str + width: int + height: int + + eyefultower_resolutions = { "all": None, - "jpeg_2k": "images-jpeg-2k", - "jpeg_4k": "images-jpeg-4k", - "jpeg_8k": "images-jpeg", - "exr_2k": "images-2k", + "jpeg_2k": EyefulTowerResolutionMetadata("images-jpeg-2k", 1368, 2048), + "jpeg_4k": EyefulTowerResolutionMetadata("images-jpeg-4k", 2736, 4096), + "jpeg_8k": EyefulTowerResolutionMetadata("images-jpeg", 5784, 8660), + "exr_2k": EyefulTowerResolutionMetadata("images-2k", 1368, 2048), } if TYPE_CHECKING: @@ -588,9 +598,62 @@ class EyefulTowerDownload(DatasetDownload): capture_name: Tuple[EyefulTowerCaptureName, ...] = () resolution_name: Tuple[EyefulTowerResolution, ...] = () + @staticmethod + def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, target_height: int): + transformed = copy.deepcopy(xml_tree) + + root = transformed.getroot() + assert len(root) == 1 + chunk = root[0] + sensors = chunk.find("sensors") + assert sensors is not None + + for sensor in sensors: + resolution = sensor.find("resolution") + assert resolution is not None, "Resolution not found in EyefulTower camera.xml" + original_width = int(resolution.get("width")) # type: ignore + original_height = int(resolution.get("height")) # type: ignore + + if original_width > original_height: + target_width, target_height = max(target_width, target_height), min(target_width, target_height) + else: + target_height, target_width = max(target_width, target_height), min(target_width, target_height) + + resolution.set("width", str(target_width)) + resolution.set("height", str(target_height)) + + calib = sensor.find("calibration") + assert calib is not None, "Calibration not found in EyefulTower sensor" + + calib_resolution = calib.find("resolution") + assert calib_resolution is not None + calib_resolution.set("width", str(target_width)) + calib_resolution.set("height", str(target_height)) + + # Compute each scale individually and average for better rounding + x_scale = target_width / original_width + y_scale = target_height / original_height + scale = (x_scale + y_scale) / 2.0 + + f = calib.find("f") + assert f is not None and f.text is not None, "f not found in calib" + f.text = str(float(f.text) * scale) + + cx = calib.find("cx") + assert cx is not None and cx.text is not None, "cx not found in calib" + cx.text = str(float(cx.text) * x_scale) + + cy = calib.find("cy") + assert cy is not None and cy.text is not None, "cy not found in calib" + cy.text = str(float(cy.text) * y_scale) + + # TODO: Maybe update pixel_width / pixel_height / focal_length / layer_index? + + return transformed + def download(self, save_dir: Path): if len(self.capture_name) == 0: - self.capture_name = ("seating_area",) + self.capture_name = ("riverview",) print( f"No capture specified, using {self.capture_name} by default.", "Add `--help` to this command to see all available captures.", @@ -630,23 +693,44 @@ def download(self, save_dir: Path): output_path = save_dir / "eyefultower" / capture includes = [] for resolution in resolutions: - includes.extend(["--include", f"{eyefultower_resolutions[resolution]}/*"]) + includes.extend(["--include", f"{eyefultower_resolutions[resolution].folder_name}/*"]) command = ( ["s3", "sync", "--no-sign-request", "--only-show-errors", "--exclude", "images*/*"] + includes + [base_url, str(output_path)] ) + print(f"[EyefulTower Capture {i+1: >2d}/{len(captures)}]: '{capture}'") print( - f"[Capture {i+1: >2d}/{len(captures)}]:", - f"Downloading resolutions {resolutions} from EyefulTower capture '{capture}'", - f"to '{output_path.resolve()}' with command `aws {' '.join(command)}`", - "...", + f" Downloading resolutions {resolutions}", + f"to '{output_path.resolve()}' with command `aws {' '.join(command)}` ...", end=" ", flush=True, ) driver.main(command) print("done!") + # After downloading, we'll insert an appropriate cameras.xml file into each directory + # It's quick enough that we can just redo it every time this is called, regardless + # of whether new data is downloaded. + xml_input_path = output_path / "cameras.xml" + if not xml_input_path.exists: + print(" WARNING: cameras.xml not found. Scaled cameras.xml will not be generated.") + continue + + tree = ET.parse(output_path / "cameras.xml") + + for resolution in resolutions: + metadata = eyefultower_resolutions[resolution] + xml_output_path = output_path / metadata.folder_name / "cameras.xml" + print( + f" Generating cameras.xml for '{resolution}' to {xml_output_path.resolve()} ... ", + end=" ", + flush=True, + ) + scaled_tree = self.scale_metashape_transform(tree, metadata.width, metadata.height) + scaled_tree.write(xml_output_path) + print("done!") + Commands = Union[ Annotated[BlenderDownload, tyro.conf.subcommand(name="blender")], From f811bd244bfdc10d5d5a8c92fbe6f534a87aabc7 Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Tue, 12 Dec 2023 16:41:02 -0500 Subject: [PATCH 12/38] Generate transforms.json at download --- nerfstudio/scripts/downloads/download_data.py | 154 +++++++++++++++--- 1 file changed, 135 insertions(+), 19 deletions(-) diff --git a/nerfstudio/scripts/downloads/download_data.py b/nerfstudio/scripts/downloads/download_data.py index aff58e97d1..2ccda92296 100644 --- a/nerfstudio/scripts/downloads/download_data.py +++ b/nerfstudio/scripts/downloads/download_data.py @@ -28,6 +28,7 @@ import awscli.clidriver import gdown +import numpy as np import torch import tyro from typing_extensions import Annotated @@ -569,14 +570,15 @@ class EyefulTowerResolutionMetadata: folder_name: str width: int height: int + extension: str eyefultower_resolutions = { "all": None, - "jpeg_2k": EyefulTowerResolutionMetadata("images-jpeg-2k", 1368, 2048), - "jpeg_4k": EyefulTowerResolutionMetadata("images-jpeg-4k", 2736, 4096), - "jpeg_8k": EyefulTowerResolutionMetadata("images-jpeg", 5784, 8660), - "exr_2k": EyefulTowerResolutionMetadata("images-2k", 1368, 2048), + "jpeg_2k": EyefulTowerResolutionMetadata("images-jpeg-2k", 1368, 2048, "jpg"), + "jpeg_4k": EyefulTowerResolutionMetadata("images-jpeg-4k", 2736, 4096, "jpg"), + "jpeg_8k": EyefulTowerResolutionMetadata("images-jpeg", 5784, 8660, "jpg"), + "exr_2k": EyefulTowerResolutionMetadata("images-2k", 1368, 2048, "exr"), } if TYPE_CHECKING: @@ -651,6 +653,89 @@ def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, targe return transformed + def convert_cameras_to_nerfstudio_transforms( + self, cameras: dict, target_width: int, target_height: int, extension: str + ): + output = {} + + distortion_models = [c["distortionModel"] for c in cameras["KRT"]] + distortion_model = list(set(distortion_models)) + assert len(distortion_model) == 1 + distortion_model = distortion_model[0] + if distortion_model == "RadialAndTangential": + output["camera_model"] = "OPENCV" + elif distortion_model == "Fisheye": + output["camera_model"] = "OPENCV_FISHEYE" + else: + raise NotImplementedError(f"Camera model {distortion_model} not implemented") + + frames = [] + for camera in cameras["KRT"]: + frame = {} + # TODO EXR + frame["file_path"] = camera["cameraId"] + f".{extension}" + + original_width = camera["width"] + original_height = camera["height"] + if original_width > original_height: + target_width, target_height = max(target_width, target_height), min(target_width, target_height) + else: + target_height, target_width = max(target_width, target_height), min(target_width, target_height) + x_scale = target_width / original_width + y_scale = target_height / original_height + + frame["w"] = target_width + frame["h"] = target_height + K = np.array(camera["K"]).T # Data stored as column-major + frame["fl_x"] = K[0][0] * x_scale + frame["fl_y"] = K[1][1] * y_scale + frame["cx"] = K[0][2] * x_scale + frame["cy"] = K[1][2] * y_scale + + if distortion_model == "RadialAndTangential": + # pinhole: [k1, k2, p1, p2, k3] + frame["k1"] = camera["distortion"][0] + frame["k2"] = camera["distortion"][1] + frame["k3"] = camera["distortion"][4] + frame["k4"] = 0.0 + frame["p1"] = camera["distortion"][2] + frame["p2"] = camera["distortion"][3] + elif distortion_model == "Fisheye": + # fisheye: [k1, k2, k3, _, _, _, p1, p2] + frame["k1"] = camera["distortion"][0] + frame["k2"] = camera["distortion"][1] + frame["k3"] = camera["distortion"][2] + frame["p1"] = camera["distortion"][6] + frame["p2"] = camera["distortion"][7] + else: + raise NotImplementedError("This shouldn't happen") + + T = np.array(camera["T"]).T # Data stored as column-major + T = np.linalg.inv(T) + T = T[[2, 0, 1, 3], :] + T[:, 1:3] *= -1 + frame["transform_matrix"] = T.tolist() + + frames.append(frame) + + frames = sorted(frames, key=lambda f: f["file_path"]) + + output["frames"] = frames + return output + + def subsample_nerfstudio_transforms(self, transforms: dict, n: int): + target = min(len(transforms["frames"]), n) + indices = np.round(np.linspace(0, len(transforms["frames"]) - 1, target)).astype(int) + + frames = [] + for i in indices: + frames.append(transforms["frames"][i]) + + output = copy.deepcopy(transforms) + output["frames"] = frames + + return output + def download(self, save_dir: Path): if len(self.capture_name) == 0: self.capture_name = ("riverview",) @@ -715,21 +800,52 @@ def download(self, save_dir: Path): xml_input_path = output_path / "cameras.xml" if not xml_input_path.exists: print(" WARNING: cameras.xml not found. Scaled cameras.xml will not be generated.") - continue - - tree = ET.parse(output_path / "cameras.xml") - - for resolution in resolutions: - metadata = eyefultower_resolutions[resolution] - xml_output_path = output_path / metadata.folder_name / "cameras.xml" - print( - f" Generating cameras.xml for '{resolution}' to {xml_output_path.resolve()} ... ", - end=" ", - flush=True, - ) - scaled_tree = self.scale_metashape_transform(tree, metadata.width, metadata.height) - scaled_tree.write(xml_output_path) - print("done!") + else: + tree = ET.parse(output_path / "cameras.xml") + + for resolution in resolutions: + metadata = eyefultower_resolutions[resolution] + xml_output_path = output_path / metadata.folder_name / "cameras.xml" + print( + f" Generating cameras.xml for '{resolution}' to {xml_output_path.resolve()} ... ", + end=" ", + flush=True, + ) + scaled_tree = self.scale_metashape_transform(tree, metadata.width, metadata.height) + scaled_tree.write(xml_output_path) + print("done!") + + json_input_path = output_path / "cameras.json" + if not json_input_path.exists: + print(" WARNING: cameras.json not found. transforms.json will not be generated.") + else: + with open(json_input_path, "r") as f: + cameras = json.load(f) + + for resolution in resolutions: + metadata = eyefultower_resolutions[resolution] + json_output_path = output_path / metadata.folder_name / "transforms.json" + print( + f" Generating transforms.json for '{resolution}' to {json_output_path.resolve()} ... ", + end=" ", + flush=True, + ) + transforms = self.convert_cameras_to_nerfstudio_transforms( + cameras, metadata.width, metadata.height, metadata.extension + ) + + with open(json_output_path, "w", encoding="utf8") as f: + json.dump(transforms, f, indent=4) + + for count, name in [ + (300, "transforms_300.json"), + (int(len(cameras["KRT"]) // 2), "transforms_half.json"), + ]: + subsampled = self.subsample_nerfstudio_transforms(transforms, count) + with open(json_output_path.with_name(name), "w", encoding="utf8") as f: + json.dump(subsampled, f, indent=4) + + print("done!") Commands = Union[ From 59e1cf67e6ede298874e54aec6902832fd04dc21 Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Wed, 10 Jan 2024 15:08:31 -0500 Subject: [PATCH 13/38] Fix a couple of quotes --- nerfstudio/scripts/downloads/download_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nerfstudio/scripts/downloads/download_data.py b/nerfstudio/scripts/downloads/download_data.py index 2ccda92296..776f9809ef 100644 --- a/nerfstudio/scripts/downloads/download_data.py +++ b/nerfstudio/scripts/downloads/download_data.py @@ -807,7 +807,7 @@ def download(self, save_dir: Path): metadata = eyefultower_resolutions[resolution] xml_output_path = output_path / metadata.folder_name / "cameras.xml" print( - f" Generating cameras.xml for '{resolution}' to {xml_output_path.resolve()} ... ", + f" Generating cameras.xml for '{resolution}' to '{xml_output_path.resolve()}' ... ", end=" ", flush=True, ) @@ -826,7 +826,7 @@ def download(self, save_dir: Path): metadata = eyefultower_resolutions[resolution] json_output_path = output_path / metadata.folder_name / "transforms.json" print( - f" Generating transforms.json for '{resolution}' to {json_output_path.resolve()} ... ", + f" Generating transforms.json for '{resolution}' to '{json_output_path.resolve()}' ... ", end=" ", flush=True, ) From 688c39b750b0c4f7593d10ea0421715a3b5ee32e Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Wed, 10 Jan 2024 16:34:17 -0500 Subject: [PATCH 14/38] Use official EyefulTower splits for train and val --- nerfstudio/scripts/downloads/download_data.py | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/nerfstudio/scripts/downloads/download_data.py b/nerfstudio/scripts/downloads/download_data.py index 776f9809ef..1a236b2cbc 100644 --- a/nerfstudio/scripts/downloads/download_data.py +++ b/nerfstudio/scripts/downloads/download_data.py @@ -15,6 +15,7 @@ """Download datasets and specific captures from the datasets.""" from __future__ import annotations +import collections import copy import json import os @@ -654,7 +655,7 @@ def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, targe return transformed def convert_cameras_to_nerfstudio_transforms( - self, cameras: dict, target_width: int, target_height: int, extension: str + self, cameras: dict, splits: dict, target_width: int, target_height: int, extension: str ): output = {} @@ -669,11 +670,17 @@ def convert_cameras_to_nerfstudio_transforms( else: raise NotImplementedError(f"Camera model {distortion_model} not implemented") + split_sets = {k: set(v) for k, v in splits.items()} + frames = [] + split_filenames = collections.defaultdict(list) for camera in cameras["KRT"]: frame = {} # TODO EXR frame["file_path"] = camera["cameraId"] + f".{extension}" + for split in split_sets: + if camera["cameraId"] in split_sets[split]: + split_filenames[split].append(frame["file_path"]) original_width = camera["width"] original_height = camera["height"] @@ -721,6 +728,8 @@ def convert_cameras_to_nerfstudio_transforms( frames = sorted(frames, key=lambda f: f["file_path"]) output["frames"] = frames + output["train_filenames"] = split_filenames["train"] + output["val_filenames"] = split_filenames["test"] return output def subsample_nerfstudio_transforms(self, transforms: dict, n: int): @@ -734,6 +743,11 @@ def subsample_nerfstudio_transforms(self, transforms: dict, n: int): output = copy.deepcopy(transforms) output["frames"] = frames + # Remove the unused files from the splits + filenames = {f["file_path"] for f in frames} + for key in ["train_filenames", "val_filenames"]: + output[key] = sorted(list(set(transforms[key]) & filenames)) + return output def download(self, save_dir: Path): @@ -816,12 +830,18 @@ def download(self, save_dir: Path): print("done!") json_input_path = output_path / "cameras.json" + splits_input_path = output_path / "splits.json" if not json_input_path.exists: print(" WARNING: cameras.json not found. transforms.json will not be generated.") + elif not splits_input_path.exists: + print(" WARNING: splits.json not found. transforms.json will not be generated.") else: with open(json_input_path, "r") as f: cameras = json.load(f) + with open(splits_input_path, "r") as f: + splits = json.load(f) + for resolution in resolutions: metadata = eyefultower_resolutions[resolution] json_output_path = output_path / metadata.folder_name / "transforms.json" @@ -831,7 +851,7 @@ def download(self, save_dir: Path): flush=True, ) transforms = self.convert_cameras_to_nerfstudio_transforms( - cameras, metadata.width, metadata.height, metadata.extension + cameras, splits, metadata.width, metadata.height, metadata.extension ) with open(json_output_path, "w", encoding="utf8") as f: From 25b6d58f282fbdf03a5311579841d29d95a0264b Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Thu, 11 Jan 2024 14:12:43 -0500 Subject: [PATCH 15/38] Disable projectaria-tools on windows --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 85611102a8..2debde67eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,7 +99,9 @@ dev = [ "opencv-stubs==0.0.7", "transformers==4.29.2", "pyright==1.1.331", - "projectaria_tools[all]>=1.2.0", + # NOTE: Disabling projectaria-tools because it doesn't have prebuilt windows wheels + # Syntax comes from here: https://pip.pypa.io/en/stable/reference/requirement-specifiers/ + "projectaria-tools>=1.3.1; sys_platform != 'win32'", ] # Documentation related packages From 0e7d29e78a01504dd098a5008bd9908f1035935f Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Thu, 11 Jan 2024 15:37:20 -0500 Subject: [PATCH 16/38] Fix extra imports --- nerfstudio/process_data/metashape_utils.py | 2 +- nerfstudio/scripts/process_data.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nerfstudio/process_data/metashape_utils.py b/nerfstudio/process_data/metashape_utils.py index fad2445ed0..8bea8707cb 100644 --- a/nerfstudio/process_data/metashape_utils.py +++ b/nerfstudio/process_data/metashape_utils.py @@ -17,7 +17,7 @@ import json import xml.etree.ElementTree as ET from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, List import numpy as np diff --git a/nerfstudio/scripts/process_data.py b/nerfstudio/scripts/process_data.py index bbe3b635af..6890cd933a 100644 --- a/nerfstudio/scripts/process_data.py +++ b/nerfstudio/scripts/process_data.py @@ -20,7 +20,7 @@ import zipfile from dataclasses import dataclass from pathlib import Path -from typing import Optional, Union +from typing import Union import numpy as np import tyro From 5393ee48650f298029771a70b75fb2505d10a754 Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Thu, 11 Jan 2024 16:14:59 -0500 Subject: [PATCH 17/38] Add a new nerfacto method tund for EyefulTower --- nerfstudio/configs/method_configs.py | 64 ++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py index 1f1a09edbb..96f4b175f9 100644 --- a/nerfstudio/configs/method_configs.py +++ b/nerfstudio/configs/method_configs.py @@ -26,23 +26,16 @@ from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig from nerfstudio.configs.base_config import ViewerConfig from nerfstudio.configs.external_methods import get_external_methods -from nerfstudio.data.datamanagers.base_datamanager import ( - VanillaDataManager, - VanillaDataManagerConfig, -) +from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager, VanillaDataManagerConfig +from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanagerConfig from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManagerConfig -from nerfstudio.data.datamanagers.random_cameras_datamanager import ( - RandomCamerasDataManagerConfig, -) +from nerfstudio.data.datamanagers.random_cameras_datamanager import RandomCamerasDataManagerConfig from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig +from nerfstudio.data.dataparsers.colmap_dataparser import ColmapDataParserConfig from nerfstudio.data.dataparsers.dnerf_dataparser import DNeRFDataParserConfig -from nerfstudio.data.dataparsers.instant_ngp_dataparser import ( - InstantNGPDataParserConfig, -) +from nerfstudio.data.dataparsers.instant_ngp_dataparser import InstantNGPDataParserConfig from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig -from nerfstudio.data.dataparsers.phototourism_dataparser import ( - PhototourismDataParserConfig, -) +from nerfstudio.data.dataparsers.phototourism_dataparser import PhototourismDataParserConfig from nerfstudio.data.dataparsers.sdfstudio_dataparser import SDFStudioDataParserConfig from nerfstudio.data.dataparsers.sitcoms3d_dataparser import Sitcoms3DDataParserConfig from nerfstudio.data.datasets.depth_dataset import DepthDataset @@ -62,7 +55,6 @@ from nerfstudio.models.gaussian_splatting import GaussianSplattingModelConfig from nerfstudio.models.generfacto import GenerfactoModelConfig from nerfstudio.models.instant_ngp import InstantNGPModelConfig -from nerfstudio.data.dataparsers.colmap_dataparser import ColmapDataParserConfig from nerfstudio.models.mipnerf import MipNerfModel from nerfstudio.models.nerfacto import NerfactoModelConfig from nerfstudio.models.neus import NeuSModelConfig @@ -71,13 +63,13 @@ from nerfstudio.models.tensorf import TensoRFModelConfig from nerfstudio.models.vanilla_nerf import NeRFModel, VanillaModelConfig from nerfstudio.pipelines.base_pipeline import VanillaPipelineConfig -from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanagerConfig from nerfstudio.pipelines.dynamic_batch import DynamicBatchPipelineConfig from nerfstudio.plugins.registry import discover_methods method_configs: Dict[str, TrainerConfig] = {} descriptions = { "nerfacto": "Recommended real-time model tuned for real captures. This model will be continually updated.", + "nerfacto-eyeful-tower": "Variant of nerfacto with settings tuned for EyefulTower dataset scenes.", "depth-nerfacto": "Nerfacto with depth supervision.", "instant-ngp": "Implementation of Instant-NGP. Recommended real-time model for unbounded scenes.", "instant-ngp-bounded": "Implementation of Instant-NGP. Recommended for bounded real and synthetic scenes", @@ -218,6 +210,45 @@ vis="viewer", ) +method_configs["nerfacto-eyeful-tower"] = TrainerConfig( + method_name="nerfacto", + steps_per_eval_batch=500, + steps_per_save=2000, + max_num_iterations=100_000, + steps_per_eval_all_images=100_000, + mixed_precision=True, + pipeline=VanillaPipelineConfig( + datamanager=ParallelDataManagerConfig( + dataparser=NerfstudioDataParserConfig(), + train_num_rays_per_batch=12_800, + eval_num_rays_per_batch=4096, + ), + model=NerfactoModelConfig( + eval_num_rays_per_chunk=1 << 15, + camera_optimizer=CameraOptimizerConfig(mode="off"), + max_res=19_912, + log2_hashmap_size=22, + far_plane=100.0, + ), + ), + optimizers={ + "proposal_networks": { + "optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15), + "scheduler": ExponentialDecaySchedulerConfig(lr_final=0.0001, max_steps=200000), + }, + "fields": { + "optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15), + "scheduler": ExponentialDecaySchedulerConfig(lr_final=0.0001, max_steps=200000), + }, + "camera_opt": { + "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15), + "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=5000), + }, + }, + viewer=ViewerConfig(num_rays_per_chunk=1 << 15), + vis="viewer", +) + method_configs["depth-nerfacto"] = TrainerConfig( method_name="depth-nerfacto", steps_per_eval_batch=500, @@ -306,8 +337,7 @@ viewer=ViewerConfig(num_rays_per_chunk=1 << 12), vis="viewer", ) -# -# + method_configs["mipnerf"] = TrainerConfig( method_name="mipnerf", pipeline=VanillaPipelineConfig( From 8fb0a8b65eb263726693dd485e7adf79b8ca6206 Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Fri, 12 Jan 2024 15:50:16 -0500 Subject: [PATCH 18/38] Split eyefultower download into a separate file --- nerfstudio/scripts/downloads/download_data.py | 342 +---------------- nerfstudio/scripts/downloads/eyeful_tower.py | 345 ++++++++++++++++++ nerfstudio/scripts/downloads/utils.py | 32 ++ 3 files changed, 380 insertions(+), 339 deletions(-) create mode 100644 nerfstudio/scripts/downloads/eyeful_tower.py create mode 100644 nerfstudio/scripts/downloads/utils.py diff --git a/nerfstudio/scripts/downloads/download_data.py b/nerfstudio/scripts/downloads/download_data.py index 1a236b2cbc..515ee45a62 100644 --- a/nerfstudio/scripts/downloads/download_data.py +++ b/nerfstudio/scripts/downloads/download_data.py @@ -15,45 +15,27 @@ """Download datasets and specific captures from the datasets.""" from __future__ import annotations -import collections -import copy import json import os import shutil import tarfile -import xml.etree.ElementTree as ET import zipfile from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING, Tuple, Union +from typing import TYPE_CHECKING, Union -import awscli.clidriver import gdown -import numpy as np import torch import tyro from typing_extensions import Annotated -from nerfstudio.configs.base_config import PrintableConfig from nerfstudio.process_data import process_data_utils +from nerfstudio.scripts.downloads.eyeful_tower import EyefulTowerDownload +from nerfstudio.scripts.downloads.utils import DatasetDownload from nerfstudio.utils import install_checks from nerfstudio.utils.scripts import run_command -@dataclass -class DatasetDownload(PrintableConfig): - """Download a dataset""" - - capture_name = None - - save_dir: Path = Path("data/") - """The directory to save the dataset to""" - - def download(self, save_dir: Path) -> None: - """Download the dataset""" - raise NotImplementedError - - @dataclass class BlenderDownload(DatasetDownload): """Download the blender dataset.""" @@ -550,324 +532,6 @@ def download(self, save_dir: Path) -> None: shutil.rmtree(target_path / "val") -eyefultower_downloads = [ - "all", - "apartment", - "kitchen", - "office1a", - "office1b", - "office2", - "office_view1", - "office_view2", - "riverview", - "seating_area", - "table", - "workshop", -] - - -@dataclass -class EyefulTowerResolutionMetadata: - folder_name: str - width: int - height: int - extension: str - - -eyefultower_resolutions = { - "all": None, - "jpeg_2k": EyefulTowerResolutionMetadata("images-jpeg-2k", 1368, 2048, "jpg"), - "jpeg_4k": EyefulTowerResolutionMetadata("images-jpeg-4k", 2736, 4096, "jpg"), - "jpeg_8k": EyefulTowerResolutionMetadata("images-jpeg", 5784, 8660, "jpg"), - "exr_2k": EyefulTowerResolutionMetadata("images-2k", 1368, 2048, "exr"), -} - -if TYPE_CHECKING: - EyefulTowerCaptureName = str - EyefulTowerResolution = str -else: - EyefulTowerCaptureName = tyro.extras.literal_type_from_choices(eyefultower_downloads) - EyefulTowerResolution = tyro.extras.literal_type_from_choices(eyefultower_resolutions.keys()) - - -@dataclass -class EyefulTowerDownload(DatasetDownload): - """Download the EyefulTower dataset. - - Use the --help flag with the `eyefultower` subcommand to see all available datasets. - Find more information about the dataset at https://github.com/facebookresearch/EyefulTower. - """ - - capture_name: Tuple[EyefulTowerCaptureName, ...] = () - resolution_name: Tuple[EyefulTowerResolution, ...] = () - - @staticmethod - def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, target_height: int): - transformed = copy.deepcopy(xml_tree) - - root = transformed.getroot() - assert len(root) == 1 - chunk = root[0] - sensors = chunk.find("sensors") - assert sensors is not None - - for sensor in sensors: - resolution = sensor.find("resolution") - assert resolution is not None, "Resolution not found in EyefulTower camera.xml" - original_width = int(resolution.get("width")) # type: ignore - original_height = int(resolution.get("height")) # type: ignore - - if original_width > original_height: - target_width, target_height = max(target_width, target_height), min(target_width, target_height) - else: - target_height, target_width = max(target_width, target_height), min(target_width, target_height) - - resolution.set("width", str(target_width)) - resolution.set("height", str(target_height)) - - calib = sensor.find("calibration") - assert calib is not None, "Calibration not found in EyefulTower sensor" - - calib_resolution = calib.find("resolution") - assert calib_resolution is not None - calib_resolution.set("width", str(target_width)) - calib_resolution.set("height", str(target_height)) - - # Compute each scale individually and average for better rounding - x_scale = target_width / original_width - y_scale = target_height / original_height - scale = (x_scale + y_scale) / 2.0 - - f = calib.find("f") - assert f is not None and f.text is not None, "f not found in calib" - f.text = str(float(f.text) * scale) - - cx = calib.find("cx") - assert cx is not None and cx.text is not None, "cx not found in calib" - cx.text = str(float(cx.text) * x_scale) - - cy = calib.find("cy") - assert cy is not None and cy.text is not None, "cy not found in calib" - cy.text = str(float(cy.text) * y_scale) - - # TODO: Maybe update pixel_width / pixel_height / focal_length / layer_index? - - return transformed - - def convert_cameras_to_nerfstudio_transforms( - self, cameras: dict, splits: dict, target_width: int, target_height: int, extension: str - ): - output = {} - - distortion_models = [c["distortionModel"] for c in cameras["KRT"]] - distortion_model = list(set(distortion_models)) - assert len(distortion_model) == 1 - distortion_model = distortion_model[0] - if distortion_model == "RadialAndTangential": - output["camera_model"] = "OPENCV" - elif distortion_model == "Fisheye": - output["camera_model"] = "OPENCV_FISHEYE" - else: - raise NotImplementedError(f"Camera model {distortion_model} not implemented") - - split_sets = {k: set(v) for k, v in splits.items()} - - frames = [] - split_filenames = collections.defaultdict(list) - for camera in cameras["KRT"]: - frame = {} - # TODO EXR - frame["file_path"] = camera["cameraId"] + f".{extension}" - for split in split_sets: - if camera["cameraId"] in split_sets[split]: - split_filenames[split].append(frame["file_path"]) - - original_width = camera["width"] - original_height = camera["height"] - if original_width > original_height: - target_width, target_height = max(target_width, target_height), min(target_width, target_height) - else: - target_height, target_width = max(target_width, target_height), min(target_width, target_height) - x_scale = target_width / original_width - y_scale = target_height / original_height - - frame["w"] = target_width - frame["h"] = target_height - K = np.array(camera["K"]).T # Data stored as column-major - frame["fl_x"] = K[0][0] * x_scale - frame["fl_y"] = K[1][1] * y_scale - frame["cx"] = K[0][2] * x_scale - frame["cy"] = K[1][2] * y_scale - - if distortion_model == "RadialAndTangential": - # pinhole: [k1, k2, p1, p2, k3] - frame["k1"] = camera["distortion"][0] - frame["k2"] = camera["distortion"][1] - frame["k3"] = camera["distortion"][4] - frame["k4"] = 0.0 - frame["p1"] = camera["distortion"][2] - frame["p2"] = camera["distortion"][3] - elif distortion_model == "Fisheye": - # fisheye: [k1, k2, k3, _, _, _, p1, p2] - frame["k1"] = camera["distortion"][0] - frame["k2"] = camera["distortion"][1] - frame["k3"] = camera["distortion"][2] - frame["p1"] = camera["distortion"][6] - frame["p2"] = camera["distortion"][7] - else: - raise NotImplementedError("This shouldn't happen") - - T = np.array(camera["T"]).T # Data stored as column-major - T = np.linalg.inv(T) - T = T[[2, 0, 1, 3], :] - T[:, 1:3] *= -1 - frame["transform_matrix"] = T.tolist() - - frames.append(frame) - - frames = sorted(frames, key=lambda f: f["file_path"]) - - output["frames"] = frames - output["train_filenames"] = split_filenames["train"] - output["val_filenames"] = split_filenames["test"] - return output - - def subsample_nerfstudio_transforms(self, transforms: dict, n: int): - target = min(len(transforms["frames"]), n) - indices = np.round(np.linspace(0, len(transforms["frames"]) - 1, target)).astype(int) - - frames = [] - for i in indices: - frames.append(transforms["frames"][i]) - - output = copy.deepcopy(transforms) - output["frames"] = frames - - # Remove the unused files from the splits - filenames = {f["file_path"] for f in frames} - for key in ["train_filenames", "val_filenames"]: - output[key] = sorted(list(set(transforms[key]) & filenames)) - - return output - - def download(self, save_dir: Path): - if len(self.capture_name) == 0: - self.capture_name = ("riverview",) - print( - f"No capture specified, using {self.capture_name} by default.", - "Add `--help` to this command to see all available captures.", - ) - - if len(self.resolution_name) == 0: - self.resolution_name = ("jpeg_2k",) - print( - f"No resolution specified, using {self.resolution_name} by default.", - "Add `--help` to this command to see all available resolutions.", - ) - - captures = set() - for capture in self.capture_name: - if capture == "all": - captures.update([c for c in eyefultower_downloads if c != "all"]) - else: - captures.add(capture) - captures = sorted(captures) - if len(captures) == 0: - print("WARNING: No EyefulTower captures specified. Nothing will be downloaded.") - - resolutions = set() - for resolution in self.resolution_name: - if resolution == "all": - resolutions.update([r for r in eyefultower_resolutions.keys() if r != "all"]) - else: - resolutions.add(resolution) - resolutions = sorted(resolutions) - if len(resolutions) == 0: - print("WARNING: No EyefulTower resolutions specified. Nothing will be downloaded.") - - driver = awscli.clidriver.create_clidriver() - - for i, capture in enumerate(captures): - base_url = f"s3://fb-baas-f32eacb9-8abb-11eb-b2b8-4857dd089e15/EyefulTower/{capture}/" - output_path = save_dir / "eyefultower" / capture - includes = [] - for resolution in resolutions: - includes.extend(["--include", f"{eyefultower_resolutions[resolution].folder_name}/*"]) - command = ( - ["s3", "sync", "--no-sign-request", "--only-show-errors", "--exclude", "images*/*"] - + includes - + [base_url, str(output_path)] - ) - print(f"[EyefulTower Capture {i+1: >2d}/{len(captures)}]: '{capture}'") - print( - f" Downloading resolutions {resolutions}", - f"to '{output_path.resolve()}' with command `aws {' '.join(command)}` ...", - end=" ", - flush=True, - ) - driver.main(command) - print("done!") - - # After downloading, we'll insert an appropriate cameras.xml file into each directory - # It's quick enough that we can just redo it every time this is called, regardless - # of whether new data is downloaded. - xml_input_path = output_path / "cameras.xml" - if not xml_input_path.exists: - print(" WARNING: cameras.xml not found. Scaled cameras.xml will not be generated.") - else: - tree = ET.parse(output_path / "cameras.xml") - - for resolution in resolutions: - metadata = eyefultower_resolutions[resolution] - xml_output_path = output_path / metadata.folder_name / "cameras.xml" - print( - f" Generating cameras.xml for '{resolution}' to '{xml_output_path.resolve()}' ... ", - end=" ", - flush=True, - ) - scaled_tree = self.scale_metashape_transform(tree, metadata.width, metadata.height) - scaled_tree.write(xml_output_path) - print("done!") - - json_input_path = output_path / "cameras.json" - splits_input_path = output_path / "splits.json" - if not json_input_path.exists: - print(" WARNING: cameras.json not found. transforms.json will not be generated.") - elif not splits_input_path.exists: - print(" WARNING: splits.json not found. transforms.json will not be generated.") - else: - with open(json_input_path, "r") as f: - cameras = json.load(f) - - with open(splits_input_path, "r") as f: - splits = json.load(f) - - for resolution in resolutions: - metadata = eyefultower_resolutions[resolution] - json_output_path = output_path / metadata.folder_name / "transforms.json" - print( - f" Generating transforms.json for '{resolution}' to '{json_output_path.resolve()}' ... ", - end=" ", - flush=True, - ) - transforms = self.convert_cameras_to_nerfstudio_transforms( - cameras, splits, metadata.width, metadata.height, metadata.extension - ) - - with open(json_output_path, "w", encoding="utf8") as f: - json.dump(transforms, f, indent=4) - - for count, name in [ - (300, "transforms_300.json"), - (int(len(cameras["KRT"]) // 2), "transforms_half.json"), - ]: - subsampled = self.subsample_nerfstudio_transforms(transforms, count) - with open(json_output_path.with_name(name), "w", encoding="utf8") as f: - json.dump(subsampled, f, indent=4) - - print("done!") - - Commands = Union[ Annotated[BlenderDownload, tyro.conf.subcommand(name="blender")], Annotated[Sitcoms3DDownload, tyro.conf.subcommand(name="sitcoms3d")], diff --git a/nerfstudio/scripts/downloads/eyeful_tower.py b/nerfstudio/scripts/downloads/eyeful_tower.py new file mode 100644 index 0000000000..46353b4c9a --- /dev/null +++ b/nerfstudio/scripts/downloads/eyeful_tower.py @@ -0,0 +1,345 @@ +# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import collections +import copy +import json +import xml.etree.ElementTree as ET +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING, Tuple + +import awscli.clidriver +import numpy as np +import tyro + +from nerfstudio.scripts.downloads.utils import DatasetDownload + +eyefultower_downloads = [ + "all", + "apartment", + "kitchen", + "office1a", + "office1b", + "office2", + "office_view1", + "office_view2", + "riverview", + "seating_area", + "table", + "workshop", +] + + +@dataclass +class EyefulTowerResolutionMetadata: + folder_name: str + width: int + height: int + extension: str + + +eyefultower_resolutions = { + "all": None, + "jpeg_2k": EyefulTowerResolutionMetadata("images-jpeg-2k", 1368, 2048, "jpg"), + "jpeg_4k": EyefulTowerResolutionMetadata("images-jpeg-4k", 2736, 4096, "jpg"), + "jpeg_8k": EyefulTowerResolutionMetadata("images-jpeg", 5784, 8660, "jpg"), + "exr_2k": EyefulTowerResolutionMetadata("images-2k", 1368, 2048, "exr"), +} + +if TYPE_CHECKING: + EyefulTowerCaptureName = str + EyefulTowerResolution = str +else: + EyefulTowerCaptureName = tyro.extras.literal_type_from_choices(eyefultower_downloads) + EyefulTowerResolution = tyro.extras.literal_type_from_choices(eyefultower_resolutions.keys()) + + +@dataclass +class EyefulTowerDownload(DatasetDownload): + """Download the EyefulTower dataset. + + Use the --help flag with the `eyefultower` subcommand to see all available datasets. + Find more information about the dataset at https://github.com/facebookresearch/EyefulTower. + """ + + capture_name: Tuple[EyefulTowerCaptureName, ...] = () + resolution_name: Tuple[EyefulTowerResolution, ...] = () + + @staticmethod + def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, target_height: int): + transformed = copy.deepcopy(xml_tree) + + root = transformed.getroot() + assert len(root) == 1 + chunk = root[0] + sensors = chunk.find("sensors") + assert sensors is not None + + for sensor in sensors: + resolution = sensor.find("resolution") + assert resolution is not None, "Resolution not found in EyefulTower camera.xml" + original_width = int(resolution.get("width")) # type: ignore + original_height = int(resolution.get("height")) # type: ignore + + if original_width > original_height: + target_width, target_height = max(target_width, target_height), min(target_width, target_height) + else: + target_height, target_width = max(target_width, target_height), min(target_width, target_height) + + resolution.set("width", str(target_width)) + resolution.set("height", str(target_height)) + + calib = sensor.find("calibration") + assert calib is not None, "Calibration not found in EyefulTower sensor" + + calib_resolution = calib.find("resolution") + assert calib_resolution is not None + calib_resolution.set("width", str(target_width)) + calib_resolution.set("height", str(target_height)) + + # Compute each scale individually and average for better rounding + x_scale = target_width / original_width + y_scale = target_height / original_height + scale = (x_scale + y_scale) / 2.0 + + f = calib.find("f") + assert f is not None and f.text is not None, "f not found in calib" + f.text = str(float(f.text) * scale) + + cx = calib.find("cx") + assert cx is not None and cx.text is not None, "cx not found in calib" + cx.text = str(float(cx.text) * x_scale) + + cy = calib.find("cy") + assert cy is not None and cy.text is not None, "cy not found in calib" + cy.text = str(float(cy.text) * y_scale) + + # TODO: Maybe update pixel_width / pixel_height / focal_length / layer_index? + + return transformed + + def convert_cameras_to_nerfstudio_transforms( + self, cameras: dict, splits: dict, target_width: int, target_height: int, extension: str + ): + output = {} + + distortion_models = [c["distortionModel"] for c in cameras["KRT"]] + distortion_model = list(set(distortion_models)) + assert len(distortion_model) == 1 + distortion_model = distortion_model[0] + if distortion_model == "RadialAndTangential": + output["camera_model"] = "OPENCV" + elif distortion_model == "Fisheye": + output["camera_model"] = "OPENCV_FISHEYE" + else: + raise NotImplementedError(f"Camera model {distortion_model} not implemented") + + split_sets = {k: set(v) for k, v in splits.items()} + + frames = [] + split_filenames = collections.defaultdict(list) + for camera in cameras["KRT"]: + frame = {} + # TODO EXR + frame["file_path"] = camera["cameraId"] + f".{extension}" + for split in split_sets: + if camera["cameraId"] in split_sets[split]: + split_filenames[split].append(frame["file_path"]) + + original_width = camera["width"] + original_height = camera["height"] + if original_width > original_height: + target_width, target_height = max(target_width, target_height), min(target_width, target_height) + else: + target_height, target_width = max(target_width, target_height), min(target_width, target_height) + x_scale = target_width / original_width + y_scale = target_height / original_height + + frame["w"] = target_width + frame["h"] = target_height + K = np.array(camera["K"]).T # Data stored as column-major + frame["fl_x"] = K[0][0] * x_scale + frame["fl_y"] = K[1][1] * y_scale + frame["cx"] = K[0][2] * x_scale + frame["cy"] = K[1][2] * y_scale + + if distortion_model == "RadialAndTangential": + # pinhole: [k1, k2, p1, p2, k3] + frame["k1"] = camera["distortion"][0] + frame["k2"] = camera["distortion"][1] + frame["k3"] = camera["distortion"][4] + frame["k4"] = 0.0 + frame["p1"] = camera["distortion"][2] + frame["p2"] = camera["distortion"][3] + elif distortion_model == "Fisheye": + # fisheye: [k1, k2, k3, _, _, _, p1, p2] + frame["k1"] = camera["distortion"][0] + frame["k2"] = camera["distortion"][1] + frame["k3"] = camera["distortion"][2] + frame["p1"] = camera["distortion"][6] + frame["p2"] = camera["distortion"][7] + else: + raise NotImplementedError("This shouldn't happen") + + T = np.array(camera["T"]).T # Data stored as column-major + T = np.linalg.inv(T) + T = T[[2, 0, 1, 3], :] + T[:, 1:3] *= -1 + frame["transform_matrix"] = T.tolist() + + frames.append(frame) + + frames = sorted(frames, key=lambda f: f["file_path"]) + + output["frames"] = frames + output["train_filenames"] = split_filenames["train"] + output["val_filenames"] = split_filenames["test"] + return output + + def subsample_nerfstudio_transforms(self, transforms: dict, n: int): + target = min(len(transforms["frames"]), n) + indices = np.round(np.linspace(0, len(transforms["frames"]) - 1, target)).astype(int) + + frames = [] + for i in indices: + frames.append(transforms["frames"][i]) + + output = copy.deepcopy(transforms) + output["frames"] = frames + + # Remove the unused files from the splits + filenames = {f["file_path"] for f in frames} + for key in ["train_filenames", "val_filenames"]: + output[key] = sorted(list(set(transforms[key]) & filenames)) + + return output + + def download(self, save_dir: Path): + if len(self.capture_name) == 0: + self.capture_name = ("riverview",) + print( + f"No capture specified, using {self.capture_name} by default.", + "Add `--help` to this command to see all available captures.", + ) + + if len(self.resolution_name) == 0: + self.resolution_name = ("jpeg_2k",) + print( + f"No resolution specified, using {self.resolution_name} by default.", + "Add `--help` to this command to see all available resolutions.", + ) + + captures = set() + for capture in self.capture_name: + if capture == "all": + captures.update([c for c in eyefultower_downloads if c != "all"]) + else: + captures.add(capture) + captures = sorted(captures) + if len(captures) == 0: + print("WARNING: No EyefulTower captures specified. Nothing will be downloaded.") + + resolutions = set() + for resolution in self.resolution_name: + if resolution == "all": + resolutions.update([r for r in eyefultower_resolutions.keys() if r != "all"]) + else: + resolutions.add(resolution) + resolutions = sorted(resolutions) + if len(resolutions) == 0: + print("WARNING: No EyefulTower resolutions specified. Nothing will be downloaded.") + + driver = awscli.clidriver.create_clidriver() + + for i, capture in enumerate(captures): + base_url = f"s3://fb-baas-f32eacb9-8abb-11eb-b2b8-4857dd089e15/EyefulTower/{capture}/" + output_path = save_dir / "eyefultower" / capture + includes = [] + for resolution in resolutions: + includes.extend(["--include", f"{eyefultower_resolutions[resolution].folder_name}/*"]) + command = ( + ["s3", "sync", "--no-sign-request", "--only-show-errors", "--exclude", "images*/*"] + + includes + + [base_url, str(output_path)] + ) + print(f"[EyefulTower Capture {i+1: >2d}/{len(captures)}]: '{capture}'") + print( + f" Downloading resolutions {resolutions}", + f"to '{output_path.resolve()}' with command `aws {' '.join(command)}` ...", + end=" ", + flush=True, + ) + driver.main(command) + print("done!") + + # After downloading, we'll insert an appropriate cameras.xml file into each directory + # It's quick enough that we can just redo it every time this is called, regardless + # of whether new data is downloaded. + xml_input_path = output_path / "cameras.xml" + if not xml_input_path.exists: + print(" WARNING: cameras.xml not found. Scaled cameras.xml will not be generated.") + else: + tree = ET.parse(output_path / "cameras.xml") + + for resolution in resolutions: + metadata = eyefultower_resolutions[resolution] + xml_output_path = output_path / metadata.folder_name / "cameras.xml" + print( + f" Generating cameras.xml for '{resolution}' to '{xml_output_path.resolve()}' ... ", + end=" ", + flush=True, + ) + scaled_tree = self.scale_metashape_transform(tree, metadata.width, metadata.height) + scaled_tree.write(xml_output_path) + print("done!") + + json_input_path = output_path / "cameras.json" + splits_input_path = output_path / "splits.json" + if not json_input_path.exists: + print(" WARNING: cameras.json not found. transforms.json will not be generated.") + elif not splits_input_path.exists: + print(" WARNING: splits.json not found. transforms.json will not be generated.") + else: + with open(json_input_path, "r") as f: + cameras = json.load(f) + + with open(splits_input_path, "r") as f: + splits = json.load(f) + + for resolution in resolutions: + metadata = eyefultower_resolutions[resolution] + json_output_path = output_path / metadata.folder_name / "transforms.json" + print( + f" Generating transforms.json for '{resolution}' to '{json_output_path.resolve()}' ... ", + end=" ", + flush=True, + ) + transforms = self.convert_cameras_to_nerfstudio_transforms( + cameras, splits, metadata.width, metadata.height, metadata.extension + ) + + with open(json_output_path, "w", encoding="utf8") as f: + json.dump(transforms, f, indent=4) + + for count, name in [ + (300, "transforms_300.json"), + (int(len(cameras["KRT"]) // 2), "transforms_half.json"), + ]: + subsampled = self.subsample_nerfstudio_transforms(transforms, count) + with open(json_output_path.with_name(name), "w", encoding="utf8") as f: + json.dump(subsampled, f, indent=4) + + print("done!") diff --git a/nerfstudio/scripts/downloads/utils.py b/nerfstudio/scripts/downloads/utils.py new file mode 100644 index 0000000000..72054edb03 --- /dev/null +++ b/nerfstudio/scripts/downloads/utils.py @@ -0,0 +1,32 @@ +# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from pathlib import Path + +from nerfstudio.configs.base_config import PrintableConfig + + +@dataclass +class DatasetDownload(PrintableConfig): + """Download a dataset""" + + capture_name = None + + save_dir: Path = Path("data/") + """The directory to save the dataset to""" + + def download(self, save_dir: Path) -> None: + """Download the dataset""" + raise NotImplementedError From c481c29f256bb3e981bb4333516a2cb9e300cd55 Mon Sep 17 00:00:00 2001 From: xucr Date: Sat, 13 Jan 2024 20:17:09 -0800 Subject: [PATCH 19/38] Fix typo --- nerfstudio/data/datamanagers/full_images_datamanager.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py index 99bba3f3da..61296e010a 100644 --- a/nerfstudio/data/datamanagers/full_images_datamanager.py +++ b/nerfstudio/data/datamanagers/full_images_datamanager.py @@ -58,12 +58,11 @@ class FullImageDatamanagerConfig(DataManagerConfig): """When not evaluating on all images, number of iterations before picking new images. If -1, never pick new images.""" eval_image_indices: Optional[Tuple[int, ...]] = (0,) - """Specifies the image indices to use during eval; if None, uses all."" + """Specifies the image indices to use during eval; if None, uses all.""" cache_images: Literal["cpu", "gpu"] = "cpu" """Whether to cache images in memory. If "cpu", caches on cpu. If "gpu", caches on device.""" - class FullImageDatamanager(DataManager, Generic[TDataset]): """ A datamanager that outputs full images and cameras instead of raybundles. This makes the From ac5200af82a4709298d8f5713861100b52135f76 Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Tue, 16 Jan 2024 17:23:43 -0500 Subject: [PATCH 20/38] Add some fisheye support for eyeful data --- nerfstudio/configs/method_configs.py | 52 ++++++++++++------- .../data/datamanagers/base_datamanager.py | 35 +++++-------- .../data/datamanagers/parallel_datamanager.py | 22 +++++--- .../data/dataparsers/nerfstudio_dataparser.py | 21 +++++--- nerfstudio/scripts/downloads/eyeful_tower.py | 14 ++++- nerfstudio/utils/tensor_dataclass.py | 6 ++- 6 files changed, 89 insertions(+), 61 deletions(-) diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py index 96f4b175f9..4d3ea835bd 100644 --- a/nerfstudio/configs/method_configs.py +++ b/nerfstudio/configs/method_configs.py @@ -26,30 +26,42 @@ from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig from nerfstudio.configs.base_config import ViewerConfig from nerfstudio.configs.external_methods import get_external_methods -from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager, VanillaDataManagerConfig -from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanagerConfig -from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManagerConfig -from nerfstudio.data.datamanagers.random_cameras_datamanager import RandomCamerasDataManagerConfig -from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig -from nerfstudio.data.dataparsers.colmap_dataparser import ColmapDataParserConfig +from nerfstudio.data.datamanagers.base_datamanager import ( + VanillaDataManager, VanillaDataManagerConfig) +from nerfstudio.data.datamanagers.full_images_datamanager import \ + FullImageDatamanagerConfig +from nerfstudio.data.datamanagers.parallel_datamanager import \ + ParallelDataManagerConfig +from nerfstudio.data.datamanagers.random_cameras_datamanager import \ + RandomCamerasDataManagerConfig +from nerfstudio.data.dataparsers.blender_dataparser import \ + BlenderDataParserConfig +from nerfstudio.data.dataparsers.colmap_dataparser import \ + ColmapDataParserConfig from nerfstudio.data.dataparsers.dnerf_dataparser import DNeRFDataParserConfig -from nerfstudio.data.dataparsers.instant_ngp_dataparser import InstantNGPDataParserConfig -from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig -from nerfstudio.data.dataparsers.phototourism_dataparser import PhototourismDataParserConfig -from nerfstudio.data.dataparsers.sdfstudio_dataparser import SDFStudioDataParserConfig -from nerfstudio.data.dataparsers.sitcoms3d_dataparser import Sitcoms3DDataParserConfig +from nerfstudio.data.dataparsers.instant_ngp_dataparser import \ + InstantNGPDataParserConfig +from nerfstudio.data.dataparsers.nerfstudio_dataparser import \ + NerfstudioDataParserConfig +from nerfstudio.data.dataparsers.phototourism_dataparser import \ + PhototourismDataParserConfig +from nerfstudio.data.dataparsers.sdfstudio_dataparser import \ + SDFStudioDataParserConfig +from nerfstudio.data.dataparsers.sitcoms3d_dataparser import \ + Sitcoms3DDataParserConfig from nerfstudio.data.datasets.depth_dataset import DepthDataset from nerfstudio.data.datasets.sdf_dataset import SDFDataset from nerfstudio.data.datasets.semantic_dataset import SemanticDataset -from nerfstudio.data.pixel_samplers import PairPixelSamplerConfig -from nerfstudio.engine.optimizers import AdamOptimizerConfig, RAdamOptimizerConfig -from nerfstudio.engine.schedulers import ( - CosineDecaySchedulerConfig, - ExponentialDecaySchedulerConfig, - MultiStepSchedulerConfig, -) +from nerfstudio.data.pixel_samplers import (PairPixelSamplerConfig, + PixelSamplerConfig) +from nerfstudio.engine.optimizers import (AdamOptimizerConfig, + RAdamOptimizerConfig) +from nerfstudio.engine.schedulers import (CosineDecaySchedulerConfig, + ExponentialDecaySchedulerConfig, + MultiStepSchedulerConfig) from nerfstudio.engine.trainer import TrainerConfig -from nerfstudio.field_components.temporal_distortions import TemporalDistortionKind +from nerfstudio.field_components.temporal_distortions import \ + TemporalDistortionKind from nerfstudio.fields.sdf_field import SDFFieldConfig from nerfstudio.models.depth_nerfacto import DepthNerfactoModelConfig from nerfstudio.models.gaussian_splatting import GaussianSplattingModelConfig @@ -246,7 +258,7 @@ }, }, viewer=ViewerConfig(num_rays_per_chunk=1 << 15), - vis="viewer", + vis="viewer+tensorboard", ) method_configs["depth-nerfacto"] = TrainerConfig( diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py index 2131d5260b..66363c060e 100644 --- a/nerfstudio/data/datamanagers/base_datamanager.py +++ b/nerfstudio/data/datamanagers/base_datamanager.py @@ -23,22 +23,8 @@ from dataclasses import dataclass, field from functools import cached_property from pathlib import Path -from typing import ( - Any, - Callable, - Dict, - ForwardRef, - Generic, - List, - Literal, - Optional, - Tuple, - Type, - Union, - cast, - get_args, - get_origin, -) +from typing import (Any, Callable, Dict, ForwardRef, Generic, List, Literal, + Optional, Tuple, Type, Union, cast, get_args, get_origin) import torch from torch import nn @@ -52,12 +38,17 @@ from nerfstudio.configs.base_config import InstantiateConfig from nerfstudio.configs.dataparser_configs import AnnotatedDataParserUnion from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs -from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig +from nerfstudio.data.dataparsers.blender_dataparser import \ + BlenderDataParserConfig from nerfstudio.data.datasets.base_dataset import InputDataset -from nerfstudio.data.pixel_samplers import PatchPixelSamplerConfig, PixelSampler, PixelSamplerConfig -from nerfstudio.data.utils.dataloaders import CacheDataloader, FixedIndicesEvalDataloader, RandIndicesEvalDataloader +from nerfstudio.data.pixel_samplers import (PatchPixelSamplerConfig, + PixelSampler, PixelSamplerConfig) +from nerfstudio.data.utils.dataloaders import (CacheDataloader, + FixedIndicesEvalDataloader, + RandIndicesEvalDataloader) from nerfstudio.data.utils.nerfstudio_collate import nerfstudio_collate -from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes +from nerfstudio.engine.callbacks import (TrainingCallback, + TrainingCallbackAttributes) from nerfstudio.model_components.ray_generators import RayGenerator from nerfstudio.utils.misc import IterableWrapper, get_orig_class from nerfstudio.utils.rich_utils import CONSOLE @@ -468,8 +459,8 @@ def _get_pixel_sampler(self, dataset: TDataset, num_rays_per_batch: int) -> Pixe CONSOLE.print("[bold yellow]Warning: Some cameras are equirectangular, but using default pixel sampler.") fisheye_crop_radius = None - if dataset.cameras.metadata is not None and "fisheye_crop_radius" in dataset.cameras.metadata: - fisheye_crop_radius = dataset.cameras.metadata["fisheye_crop_radius"] + if dataset.cameras.metadata is not None: + fisheye_crop_radius = dataset.cameras.metadata.get("fisheye_crop_radius") return self.config.pixel_sampler.setup( is_equirectangular=is_equirectangular, diff --git a/nerfstudio/data/datamanagers/parallel_datamanager.py b/nerfstudio/data/datamanagers/parallel_datamanager.py index 9f36807a61..eb704ae3fd 100644 --- a/nerfstudio/data/datamanagers/parallel_datamanager.py +++ b/nerfstudio/data/datamanagers/parallel_datamanager.py @@ -32,14 +32,13 @@ from nerfstudio.cameras.cameras import Cameras, CameraType from nerfstudio.cameras.rays import RayBundle from nerfstudio.data.datamanagers.base_datamanager import ( - DataManager, - TDataset, - VanillaDataManagerConfig, - variable_res_collate, -) + DataManager, TDataset, VanillaDataManagerConfig, variable_res_collate) from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs -from nerfstudio.data.pixel_samplers import PatchPixelSamplerConfig, PixelSampler, PixelSamplerConfig -from nerfstudio.data.utils.dataloaders import CacheDataloader, FixedIndicesEvalDataloader, RandIndicesEvalDataloader +from nerfstudio.data.pixel_samplers import (PatchPixelSamplerConfig, + PixelSampler, PixelSamplerConfig) +from nerfstudio.data.utils.dataloaders import (CacheDataloader, + FixedIndicesEvalDataloader, + RandIndicesEvalDataloader) from nerfstudio.model_components.ray_generators import RayGenerator from nerfstudio.utils.rich_utils import CONSOLE @@ -198,8 +197,15 @@ def _get_pixel_sampler(self, dataset: TDataset, num_rays_per_batch: int) -> Pixe is_equirectangular = (dataset.cameras.camera_type == CameraType.EQUIRECTANGULAR.value).all() if is_equirectangular.any(): CONSOLE.print("[bold yellow]Warning: Some cameras are equirectangular, but using default pixel sampler.") + + fisheye_crop_radius = None + if dataset.cameras.metadata is not None: + fisheye_crop_radius = dataset.cameras.metadata.get("fisheye_crop_radius") + return self.config.pixel_sampler.setup( - is_equirectangular=is_equirectangular, num_rays_per_batch=num_rays_per_batch + is_equirectangular=is_equirectangular, + num_rays_per_batch=num_rays_per_batch, + fisheye_crop_radius=fisheye_crop_radius, ) def setup_train(self): diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py index aaa88c7691..4cca0e9304 100644 --- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py +++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py @@ -25,15 +25,15 @@ from PIL import Image from nerfstudio.cameras import camera_utils -from nerfstudio.cameras.cameras import CAMERA_MODEL_TO_TYPE, Cameras, CameraType -from nerfstudio.data.dataparsers.base_dataparser import DataParser, DataParserConfig, DataparserOutputs +from nerfstudio.cameras.cameras import (CAMERA_MODEL_TO_TYPE, Cameras, + CameraType) +from nerfstudio.data.dataparsers.base_dataparser import (DataParser, + DataParserConfig, + DataparserOutputs) from nerfstudio.data.scene_box import SceneBox from nerfstudio.data.utils.dataparsers_utils import ( - get_train_eval_split_all, - get_train_eval_split_filename, - get_train_eval_split_fraction, - get_train_eval_split_interval, -) + get_train_eval_split_all, get_train_eval_split_filename, + get_train_eval_split_fraction, get_train_eval_split_interval) from nerfstudio.utils.io import load_from_json from nerfstudio.utils.rich_utils import CONSOLE @@ -289,7 +289,12 @@ def _generate_dataparser_outputs(self, split="train"): else: distortion_params = torch.stack(distort, dim=0)[idx_tensor] - metadata = {"fisheye_crop_radius": fisheye_crop_radius} if fisheye_crop_radius is not None else None + # Only add fisheye crop radius parameter if the images are actually fisheye, to allow the same config to be used + # for both fisheye and non-fisheye datasets. + metadata = {} + if (camera_type in [CameraType.FISHEYE, CameraType.FISHEYE624]) and (fisheye_crop_radius is not None): + metadata['fisheye_crop_radius'] = fisheye_crop_radius + cameras = Cameras( fx=fx, fy=fy, diff --git a/nerfstudio/scripts/downloads/eyeful_tower.py b/nerfstudio/scripts/downloads/eyeful_tower.py index 46353b4c9a..afc7486c0b 100644 --- a/nerfstudio/scripts/downloads/eyeful_tower.py +++ b/nerfstudio/scripts/downloads/eyeful_tower.py @@ -42,6 +42,15 @@ "workshop", ] +# Crop radii empirically chosen to try to avoid hitting the rig base or go out of bounds +eyefultower_fisheye_radii = { + "office1a": 0.43, + "office2": 0.45, + "seating_area": 0.375, # could be .45 except for camera 2 + "table": 0.45, + "workshop": 0.45, +} + @dataclass class EyefulTowerResolutionMetadata: @@ -132,7 +141,7 @@ def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, targe return transformed def convert_cameras_to_nerfstudio_transforms( - self, cameras: dict, splits: dict, target_width: int, target_height: int, extension: str + self, capture_name: str, cameras: dict, splits: dict, target_width: int, target_height: int, extension: str ): output = {} @@ -144,6 +153,7 @@ def convert_cameras_to_nerfstudio_transforms( output["camera_model"] = "OPENCV" elif distortion_model == "Fisheye": output["camera_model"] = "OPENCV_FISHEYE" + output["fisheye_crop_radius"] = eyefultower_fisheye_radii[capture_name] else: raise NotImplementedError(f"Camera model {distortion_model} not implemented") @@ -328,7 +338,7 @@ def download(self, save_dir: Path): flush=True, ) transforms = self.convert_cameras_to_nerfstudio_transforms( - cameras, splits, metadata.width, metadata.height, metadata.extension + capture, cameras, splits, metadata.width, metadata.height, metadata.extension ) with open(json_output_path, "w", encoding="utf8") as f: diff --git a/nerfstudio/utils/tensor_dataclass.py b/nerfstudio/utils/tensor_dataclass.py index a2b8d1dadb..6c77eef6fe 100644 --- a/nerfstudio/utils/tensor_dataclass.py +++ b/nerfstudio/utils/tensor_dataclass.py @@ -16,7 +16,8 @@ import dataclasses from copy import deepcopy -from typing import Callable, Dict, List, NoReturn, Optional, Tuple, TypeVar, Union +from typing import (Callable, Dict, List, NoReturn, Optional, Tuple, TypeVar, + Union) import numpy as np import torch @@ -141,6 +142,9 @@ def _broadcast_dict_fields(self, dict_: Dict, batch_shape) -> Dict: new_dict[k] = v.broadcast_to(batch_shape) elif isinstance(v, Dict): new_dict[k] = self._broadcast_dict_fields(v, batch_shape) + else: + # Don't broadcast the remaining fields + new_dict[k] = v return new_dict def __getitem__(self: TensorDataclassT, indices) -> TensorDataclassT: From 6998760c75e71d7aefd6c51fc4a5b5d9161c69ba Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Tue, 16 Jan 2024 17:50:26 -0500 Subject: [PATCH 21/38] Reformatted imports to not be dumb --- nerfstudio/configs/method_configs.py | 50 +++++++------------ .../data/datamanagers/base_datamanager.py | 31 ++++++++---- .../data/datamanagers/parallel_datamanager.py | 13 ++--- .../data/dataparsers/nerfstudio_dataparser.py | 16 +++--- nerfstudio/utils/tensor_dataclass.py | 3 +- 5 files changed, 55 insertions(+), 58 deletions(-) diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py index 4d3ea835bd..f0dd59c652 100644 --- a/nerfstudio/configs/method_configs.py +++ b/nerfstudio/configs/method_configs.py @@ -26,42 +26,30 @@ from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig from nerfstudio.configs.base_config import ViewerConfig from nerfstudio.configs.external_methods import get_external_methods -from nerfstudio.data.datamanagers.base_datamanager import ( - VanillaDataManager, VanillaDataManagerConfig) -from nerfstudio.data.datamanagers.full_images_datamanager import \ - FullImageDatamanagerConfig -from nerfstudio.data.datamanagers.parallel_datamanager import \ - ParallelDataManagerConfig -from nerfstudio.data.datamanagers.random_cameras_datamanager import \ - RandomCamerasDataManagerConfig -from nerfstudio.data.dataparsers.blender_dataparser import \ - BlenderDataParserConfig -from nerfstudio.data.dataparsers.colmap_dataparser import \ - ColmapDataParserConfig +from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager, VanillaDataManagerConfig +from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanagerConfig +from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManagerConfig +from nerfstudio.data.datamanagers.random_cameras_datamanager import RandomCamerasDataManagerConfig +from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig +from nerfstudio.data.dataparsers.colmap_dataparser import ColmapDataParserConfig from nerfstudio.data.dataparsers.dnerf_dataparser import DNeRFDataParserConfig -from nerfstudio.data.dataparsers.instant_ngp_dataparser import \ - InstantNGPDataParserConfig -from nerfstudio.data.dataparsers.nerfstudio_dataparser import \ - NerfstudioDataParserConfig -from nerfstudio.data.dataparsers.phototourism_dataparser import \ - PhototourismDataParserConfig -from nerfstudio.data.dataparsers.sdfstudio_dataparser import \ - SDFStudioDataParserConfig -from nerfstudio.data.dataparsers.sitcoms3d_dataparser import \ - Sitcoms3DDataParserConfig +from nerfstudio.data.dataparsers.instant_ngp_dataparser import InstantNGPDataParserConfig +from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig +from nerfstudio.data.dataparsers.phototourism_dataparser import PhototourismDataParserConfig +from nerfstudio.data.dataparsers.sdfstudio_dataparser import SDFStudioDataParserConfig +from nerfstudio.data.dataparsers.sitcoms3d_dataparser import Sitcoms3DDataParserConfig from nerfstudio.data.datasets.depth_dataset import DepthDataset from nerfstudio.data.datasets.sdf_dataset import SDFDataset from nerfstudio.data.datasets.semantic_dataset import SemanticDataset -from nerfstudio.data.pixel_samplers import (PairPixelSamplerConfig, - PixelSamplerConfig) -from nerfstudio.engine.optimizers import (AdamOptimizerConfig, - RAdamOptimizerConfig) -from nerfstudio.engine.schedulers import (CosineDecaySchedulerConfig, - ExponentialDecaySchedulerConfig, - MultiStepSchedulerConfig) +from nerfstudio.data.pixel_samplers import PairPixelSamplerConfig +from nerfstudio.engine.optimizers import AdamOptimizerConfig, RAdamOptimizerConfig +from nerfstudio.engine.schedulers import ( + CosineDecaySchedulerConfig, + ExponentialDecaySchedulerConfig, + MultiStepSchedulerConfig, +) from nerfstudio.engine.trainer import TrainerConfig -from nerfstudio.field_components.temporal_distortions import \ - TemporalDistortionKind +from nerfstudio.field_components.temporal_distortions import TemporalDistortionKind from nerfstudio.fields.sdf_field import SDFFieldConfig from nerfstudio.models.depth_nerfacto import DepthNerfactoModelConfig from nerfstudio.models.gaussian_splatting import GaussianSplattingModelConfig diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py index 66363c060e..d97ddd2a30 100644 --- a/nerfstudio/data/datamanagers/base_datamanager.py +++ b/nerfstudio/data/datamanagers/base_datamanager.py @@ -23,8 +23,22 @@ from dataclasses import dataclass, field from functools import cached_property from pathlib import Path -from typing import (Any, Callable, Dict, ForwardRef, Generic, List, Literal, - Optional, Tuple, Type, Union, cast, get_args, get_origin) +from typing import ( + Any, + Callable, + Dict, + ForwardRef, + Generic, + List, + Literal, + Optional, + Tuple, + Type, + Union, + cast, + get_args, + get_origin, +) import torch from torch import nn @@ -38,17 +52,12 @@ from nerfstudio.configs.base_config import InstantiateConfig from nerfstudio.configs.dataparser_configs import AnnotatedDataParserUnion from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs -from nerfstudio.data.dataparsers.blender_dataparser import \ - BlenderDataParserConfig +from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig from nerfstudio.data.datasets.base_dataset import InputDataset -from nerfstudio.data.pixel_samplers import (PatchPixelSamplerConfig, - PixelSampler, PixelSamplerConfig) -from nerfstudio.data.utils.dataloaders import (CacheDataloader, - FixedIndicesEvalDataloader, - RandIndicesEvalDataloader) +from nerfstudio.data.pixel_samplers import PatchPixelSamplerConfig, PixelSampler, PixelSamplerConfig +from nerfstudio.data.utils.dataloaders import CacheDataloader, FixedIndicesEvalDataloader, RandIndicesEvalDataloader from nerfstudio.data.utils.nerfstudio_collate import nerfstudio_collate -from nerfstudio.engine.callbacks import (TrainingCallback, - TrainingCallbackAttributes) +from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes from nerfstudio.model_components.ray_generators import RayGenerator from nerfstudio.utils.misc import IterableWrapper, get_orig_class from nerfstudio.utils.rich_utils import CONSOLE diff --git a/nerfstudio/data/datamanagers/parallel_datamanager.py b/nerfstudio/data/datamanagers/parallel_datamanager.py index eb704ae3fd..bd66d01db8 100644 --- a/nerfstudio/data/datamanagers/parallel_datamanager.py +++ b/nerfstudio/data/datamanagers/parallel_datamanager.py @@ -32,13 +32,14 @@ from nerfstudio.cameras.cameras import Cameras, CameraType from nerfstudio.cameras.rays import RayBundle from nerfstudio.data.datamanagers.base_datamanager import ( - DataManager, TDataset, VanillaDataManagerConfig, variable_res_collate) + DataManager, + TDataset, + VanillaDataManagerConfig, + variable_res_collate, +) from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs -from nerfstudio.data.pixel_samplers import (PatchPixelSamplerConfig, - PixelSampler, PixelSamplerConfig) -from nerfstudio.data.utils.dataloaders import (CacheDataloader, - FixedIndicesEvalDataloader, - RandIndicesEvalDataloader) +from nerfstudio.data.pixel_samplers import PatchPixelSamplerConfig, PixelSampler, PixelSamplerConfig +from nerfstudio.data.utils.dataloaders import CacheDataloader, FixedIndicesEvalDataloader, RandIndicesEvalDataloader from nerfstudio.model_components.ray_generators import RayGenerator from nerfstudio.utils.rich_utils import CONSOLE diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py index 4cca0e9304..e545bae781 100644 --- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py +++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py @@ -25,15 +25,15 @@ from PIL import Image from nerfstudio.cameras import camera_utils -from nerfstudio.cameras.cameras import (CAMERA_MODEL_TO_TYPE, Cameras, - CameraType) -from nerfstudio.data.dataparsers.base_dataparser import (DataParser, - DataParserConfig, - DataparserOutputs) +from nerfstudio.cameras.cameras import CAMERA_MODEL_TO_TYPE, Cameras, CameraType +from nerfstudio.data.dataparsers.base_dataparser import DataParser, DataParserConfig, DataparserOutputs from nerfstudio.data.scene_box import SceneBox from nerfstudio.data.utils.dataparsers_utils import ( - get_train_eval_split_all, get_train_eval_split_filename, - get_train_eval_split_fraction, get_train_eval_split_interval) + get_train_eval_split_all, + get_train_eval_split_filename, + get_train_eval_split_fraction, + get_train_eval_split_interval, +) from nerfstudio.utils.io import load_from_json from nerfstudio.utils.rich_utils import CONSOLE @@ -293,7 +293,7 @@ def _generate_dataparser_outputs(self, split="train"): # for both fisheye and non-fisheye datasets. metadata = {} if (camera_type in [CameraType.FISHEYE, CameraType.FISHEYE624]) and (fisheye_crop_radius is not None): - metadata['fisheye_crop_radius'] = fisheye_crop_radius + metadata["fisheye_crop_radius"] = fisheye_crop_radius cameras = Cameras( fx=fx, diff --git a/nerfstudio/utils/tensor_dataclass.py b/nerfstudio/utils/tensor_dataclass.py index 6c77eef6fe..293d978d7e 100644 --- a/nerfstudio/utils/tensor_dataclass.py +++ b/nerfstudio/utils/tensor_dataclass.py @@ -16,8 +16,7 @@ import dataclasses from copy import deepcopy -from typing import (Callable, Dict, List, NoReturn, Optional, Tuple, TypeVar, - Union) +from typing import Callable, Dict, List, NoReturn, Optional, Tuple, TypeVar, Union import numpy as np import torch From 82d4f5ec67b7e7f0184daa39ba97f3b09319263f Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Tue, 16 Jan 2024 18:18:51 -0500 Subject: [PATCH 22/38] Apparently this file was missed when formatting originally --- nerfstudio/scripts/downloads/eyeful_tower.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/scripts/downloads/eyeful_tower.py b/nerfstudio/scripts/downloads/eyeful_tower.py index afc7486c0b..2d1cf47dc3 100644 --- a/nerfstudio/scripts/downloads/eyeful_tower.py +++ b/nerfstudio/scripts/downloads/eyeful_tower.py @@ -46,7 +46,7 @@ eyefultower_fisheye_radii = { "office1a": 0.43, "office2": 0.45, - "seating_area": 0.375, # could be .45 except for camera 2 + "seating_area": 0.375, # could be .45 except for camera 2 "table": 0.45, "workshop": 0.45, } From bf6d7b74cb14e8f979894a13ae70b89669b2b3b1 Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Thu, 18 Jan 2024 13:51:18 -0500 Subject: [PATCH 23/38] Added 1k resolution scenes --- nerfstudio/scripts/downloads/eyeful_tower.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nerfstudio/scripts/downloads/eyeful_tower.py b/nerfstudio/scripts/downloads/eyeful_tower.py index 2d1cf47dc3..df49498cd9 100644 --- a/nerfstudio/scripts/downloads/eyeful_tower.py +++ b/nerfstudio/scripts/downloads/eyeful_tower.py @@ -62,6 +62,7 @@ class EyefulTowerResolutionMetadata: eyefultower_resolutions = { "all": None, + "jpeg_1k": EyefulTowerResolutionMetadata("images-jpeg-1k", 684, 1024, "jpg"), "jpeg_2k": EyefulTowerResolutionMetadata("images-jpeg-2k", 1368, 2048, "jpg"), "jpeg_4k": EyefulTowerResolutionMetadata("images-jpeg-4k", 2736, 4096, "jpg"), "jpeg_8k": EyefulTowerResolutionMetadata("images-jpeg", 5784, 8660, "jpg"), From 71266942dd762789db93801857016fc0a2397a66 Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Thu, 18 Jan 2024 13:55:17 -0500 Subject: [PATCH 24/38] revert method_configs.py to original values --- nerfstudio/configs/method_configs.py | 43 ++-------------------------- 1 file changed, 2 insertions(+), 41 deletions(-) diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py index f0dd59c652..55ba44eb41 100644 --- a/nerfstudio/configs/method_configs.py +++ b/nerfstudio/configs/method_configs.py @@ -69,7 +69,6 @@ method_configs: Dict[str, TrainerConfig] = {} descriptions = { "nerfacto": "Recommended real-time model tuned for real captures. This model will be continually updated.", - "nerfacto-eyeful-tower": "Variant of nerfacto with settings tuned for EyefulTower dataset scenes.", "depth-nerfacto": "Nerfacto with depth supervision.", "instant-ngp": "Implementation of Instant-NGP. Recommended real-time model for unbounded scenes.", "instant-ngp-bounded": "Implementation of Instant-NGP. Recommended for bounded real and synthetic scenes", @@ -210,45 +209,6 @@ vis="viewer", ) -method_configs["nerfacto-eyeful-tower"] = TrainerConfig( - method_name="nerfacto", - steps_per_eval_batch=500, - steps_per_save=2000, - max_num_iterations=100_000, - steps_per_eval_all_images=100_000, - mixed_precision=True, - pipeline=VanillaPipelineConfig( - datamanager=ParallelDataManagerConfig( - dataparser=NerfstudioDataParserConfig(), - train_num_rays_per_batch=12_800, - eval_num_rays_per_batch=4096, - ), - model=NerfactoModelConfig( - eval_num_rays_per_chunk=1 << 15, - camera_optimizer=CameraOptimizerConfig(mode="off"), - max_res=19_912, - log2_hashmap_size=22, - far_plane=100.0, - ), - ), - optimizers={ - "proposal_networks": { - "optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15), - "scheduler": ExponentialDecaySchedulerConfig(lr_final=0.0001, max_steps=200000), - }, - "fields": { - "optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15), - "scheduler": ExponentialDecaySchedulerConfig(lr_final=0.0001, max_steps=200000), - }, - "camera_opt": { - "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15), - "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=5000), - }, - }, - viewer=ViewerConfig(num_rays_per_chunk=1 << 15), - vis="viewer+tensorboard", -) - method_configs["depth-nerfacto"] = TrainerConfig( method_name="depth-nerfacto", steps_per_eval_batch=500, @@ -337,7 +297,8 @@ viewer=ViewerConfig(num_rays_per_chunk=1 << 12), vis="viewer", ) - +# +# method_configs["mipnerf"] = TrainerConfig( method_name="mipnerf", pipeline=VanillaPipelineConfig( From e24ffe0718f248f9c5024b28c94d4093818e7ee2 Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Thu, 18 Jan 2024 15:59:00 -0500 Subject: [PATCH 25/38] Also add 1k exrs --- nerfstudio/scripts/downloads/eyeful_tower.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nerfstudio/scripts/downloads/eyeful_tower.py b/nerfstudio/scripts/downloads/eyeful_tower.py index df49498cd9..3bab323cc6 100644 --- a/nerfstudio/scripts/downloads/eyeful_tower.py +++ b/nerfstudio/scripts/downloads/eyeful_tower.py @@ -66,6 +66,7 @@ class EyefulTowerResolutionMetadata: "jpeg_2k": EyefulTowerResolutionMetadata("images-jpeg-2k", 1368, 2048, "jpg"), "jpeg_4k": EyefulTowerResolutionMetadata("images-jpeg-4k", 2736, 4096, "jpg"), "jpeg_8k": EyefulTowerResolutionMetadata("images-jpeg", 5784, 8660, "jpg"), + "exr_1k": EyefulTowerResolutionMetadata("images-1k", 684, 1024, "exr"), "exr_2k": EyefulTowerResolutionMetadata("images-2k", 1368, 2048, "exr"), } From a022a578db634dd675938c812ca701cd47260c3c Mon Sep 17 00:00:00 2001 From: xucr Date: Thu, 18 Jan 2024 18:29:17 -0800 Subject: [PATCH 26/38] Add option to modify bg color in gaussian splatting --- .../data/dataparsers/blender_dataparser.py | 2 -- nerfstudio/model_components/renderers.py | 6 ----- nerfstudio/models/gaussian_splatting.py | 23 ++++++++++++++----- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/nerfstudio/data/dataparsers/blender_dataparser.py b/nerfstudio/data/dataparsers/blender_dataparser.py index d8172579c7..b7c90c907e 100644 --- a/nerfstudio/data/dataparsers/blender_dataparser.py +++ b/nerfstudio/data/dataparsers/blender_dataparser.py @@ -32,7 +32,6 @@ from nerfstudio.data.scene_box import SceneBox from nerfstudio.utils.colors import get_color from nerfstudio.utils.io import load_from_json -from nerfstudio.model_components.renderers import force_background_color_override @dataclass @@ -64,7 +63,6 @@ def __init__(self, config: BlenderDataParserConfig): self.alpha_color = config.alpha_color if self.alpha_color is not None: self.alpha_color_tensor = get_color(self.alpha_color) - force_background_color_override(self.alpha_color_tensor) else: self.alpha_color_tensor = None diff --git a/nerfstudio/model_components/renderers.py b/nerfstudio/model_components/renderers.py index bbcca06624..1fde0d693c 100644 --- a/nerfstudio/model_components/renderers.py +++ b/nerfstudio/model_components/renderers.py @@ -55,12 +55,6 @@ def background_color_override_context(mode: Float[Tensor, "3"]) -> Generator[Non BACKGROUND_COLOR_OVERRIDE = old_background_color -def force_background_color_override(mode: Float[Tensor, "3"]) -> None: - """Force background color override.""" - global BACKGROUND_COLOR_OVERRIDE - BACKGROUND_COLOR_OVERRIDE = mode - - class RGBRenderer(nn.Module): """Standard volumetric rendering. diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py index 70f20706d8..081880ee2c 100644 --- a/nerfstudio/models/gaussian_splatting.py +++ b/nerfstudio/models/gaussian_splatting.py @@ -22,6 +22,7 @@ import math from dataclasses import dataclass, field from typing import Dict, List, Optional, Tuple, Type, Union +from typing_extensions import Literal import numpy as np import torch @@ -46,6 +47,7 @@ # need following import for background color override from nerfstudio.model_components import renderers from nerfstudio.models.base_model import Model, ModelConfig +from nerfstudio.utils.colors import get_color from nerfstudio.utils.rich_utils import CONSOLE @@ -111,10 +113,12 @@ class GaussianSplattingModelConfig(ModelConfig): _target: Type = field(default_factory=lambda: GaussianSplattingModel) warmup_length: int = 500 """period of steps where refinement is turned off""" - refine_every: int = 100 + refine_every: int = 150 """period of steps where gaussians are culled and densified""" resolution_schedule: int = 250 """training starts at 1/d resolution, every n steps this is doubled""" + background_color: Literal["random", "black", "white"] = "random" + """Whether to randomize the background color.""" num_downscales: int = 0 """at the beginning, resolution is 1/2^d, where d is this number""" cull_alpha_thresh: float = 0.1 @@ -211,7 +215,10 @@ def populate_modules(self): self.step = 0 self.crop_box: Optional[OrientedBox] = None - self.back_color = torch.zeros(3) + if self.config.background_color == "random": + self.back_color = torch.rand(3) + else: + self.back_color = get_color(self.config.background_color) self.camera_optimizer: CameraOptimizer = self.config.camera_optimizer.setup( num_cameras=self.num_train_data, device="cpu" @@ -596,13 +603,17 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: # currently relies on the branch vickie/camera-grads self.camera_optimizer.apply_to_camera(camera) # get the background color - if renderers.BACKGROUND_COLOR_OVERRIDE is not None: - background = renderers.BACKGROUND_COLOR_OVERRIDE.to(self.device) - else: - if self.training: + + if self.training: + if self.config.background_color == "random": background = torch.rand(3, device=self.device) else: background = self.back_color.to(self.device) + else: + if renderers.BACKGROUND_COLOR_OVERRIDE is not None: + background = renderers.BACKGROUND_COLOR_OVERRIDE.to(self.device) + else: + background = self.back_color.to(self.device) if self.crop_box is not None and not self.training: crop_ids = self.crop_box.within(self.means).squeeze() From 999e63c664019ff6d70f2fa637386a726d2158bf Mon Sep 17 00:00:00 2001 From: xucr Date: Thu, 18 Jan 2024 18:43:44 -0800 Subject: [PATCH 27/38] fix back the config, bg color should work now --- nerfstudio/models/gaussian_splatting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py index f7280d1831..f8a02dae58 100644 --- a/nerfstudio/models/gaussian_splatting.py +++ b/nerfstudio/models/gaussian_splatting.py @@ -107,7 +107,7 @@ class GaussianSplattingModelConfig(ModelConfig): _target: Type = field(default_factory=lambda: GaussianSplattingModel) warmup_length: int = 500 """period of steps where refinement is turned off""" - refine_every: int = 150 + refine_every: int = 100 """period of steps where gaussians are culled and densified""" resolution_schedule: int = 250 """training starts at 1/d resolution, every n steps this is doubled""" From bae5a5f47df89076de7c5641c3410936df1ffd08 Mon Sep 17 00:00:00 2001 From: xucr Date: Thu, 18 Jan 2024 18:51:25 -0800 Subject: [PATCH 28/38] removed camera optimizer for gs to align with main --- nerfstudio/models/gaussian_splatting.py | 266 ++++++++++++++++++------ 1 file changed, 208 insertions(+), 58 deletions(-) diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py index f8a02dae58..8092db00a0 100644 --- a/nerfstudio/models/gaussian_splatting.py +++ b/nerfstudio/models/gaussian_splatting.py @@ -35,7 +35,11 @@ from nerfstudio.cameras.cameras import Cameras from nerfstudio.data.scene_box import OrientedBox -from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes, TrainingCallbackLocation +from nerfstudio.engine.callbacks import ( + TrainingCallback, + TrainingCallbackAttributes, + TrainingCallbackLocation, +) from nerfstudio.engine.optimizers import Optimizers # need following import for background color override @@ -79,7 +83,9 @@ def SH2RGB(sh): return sh * C0 + 0.5 -def projection_matrix(znear, zfar, fovx, fovy, device: Union[str, torch.device] = "cpu"): +def projection_matrix( + znear, zfar, fovx, fovy, device: Union[str, torch.device] = "cpu" +): """ Constructs an OpenGL-style perspective projection matrix. """ @@ -111,7 +117,7 @@ class GaussianSplattingModelConfig(ModelConfig): """period of steps where gaussians are culled and densified""" resolution_schedule: int = 250 """training starts at 1/d resolution, every n steps this is doubled""" - background_color: Literal["random", "black", "white"] = "random" + background_color: Literal["random", "black", "white"] = "white" """Whether to randomize the background color.""" num_downscales: int = 0 """at the beginning, resolution is 1/2^d, where d is this number""" @@ -196,9 +202,13 @@ def populate_modules(self): self.features_rest = torch.nn.Parameter(shs[:, 1:, :]) else: self.features_dc = torch.nn.Parameter(torch.rand(self.num_points, 3)) - self.features_rest = torch.nn.Parameter(torch.zeros((self.num_points, dim_sh - 1, 3))) + self.features_rest = torch.nn.Parameter( + torch.zeros((self.num_points, dim_sh - 1, 3)) + ) - self.opacities = torch.nn.Parameter(torch.logit(0.1 * torch.ones(self.num_points, 1))) + self.opacities = torch.nn.Parameter( + torch.logit(0.1 * torch.ones(self.num_points, 1)) + ) # metrics from torchmetrics.image import PeakSignalNoiseRatio @@ -240,7 +250,9 @@ def load_state_dict(self, dict, **kwargs): # type: ignore self.opacities = torch.nn.Parameter(torch.zeros(newp, 1, device=self.device)) self.features_dc = torch.nn.Parameter(torch.zeros(newp, 3, device=self.device)) self.features_rest = torch.nn.Parameter( - torch.zeros(newp, num_sh_bases(self.config.sh_degree) - 1, 3, device=self.device) + torch.zeros( + newp, num_sh_bases(self.config.sh_degree) - 1, 3, device=self.device + ) ) super().load_state_dict(dict, **kwargs) @@ -256,7 +268,9 @@ def k_nearest_sklearn(self, x: torch.Tensor, k: int): # Build the nearest neighbors model from sklearn.neighbors import NearestNeighbors - nn_model = NearestNeighbors(n_neighbors=k + 1, algorithm="auto", metric="euclidean").fit(x_np) + nn_model = NearestNeighbors( + n_neighbors=k + 1, algorithm="auto", metric="euclidean" + ).fit(x_np) # Find the k-nearest neighbors distances, indices = nn_model.kneighbors(x_np) @@ -295,13 +309,20 @@ def dup_in_optim(self, optimizer, dup_mask, new_params, n=2): param_state = optimizer.state[param] repeat_dims = (n,) + tuple(1 for _ in range(param_state["exp_avg"].dim() - 1)) param_state["exp_avg"] = torch.cat( - [param_state["exp_avg"], torch.zeros_like(param_state["exp_avg"][dup_mask.squeeze()]).repeat(*repeat_dims)], + [ + param_state["exp_avg"], + torch.zeros_like(param_state["exp_avg"][dup_mask.squeeze()]).repeat( + *repeat_dims + ), + ], dim=0, ) param_state["exp_avg_sq"] = torch.cat( [ param_state["exp_avg_sq"], - torch.zeros_like(param_state["exp_avg_sq"][dup_mask.squeeze()]).repeat(*repeat_dims), + torch.zeros_like(param_state["exp_avg_sq"][dup_mask.squeeze()]).repeat( + *repeat_dims + ), ], dim=0, ) @@ -332,14 +353,17 @@ def after_train(self, step: int): else: assert self.vis_counts is not None self.vis_counts[visible_mask] = self.vis_counts[visible_mask] + 1 - self.xys_grad_norm[visible_mask] = grads[visible_mask] + self.xys_grad_norm[visible_mask] + self.xys_grad_norm[visible_mask] = ( + grads[visible_mask] + self.xys_grad_norm[visible_mask] + ) # update the max screen size, as a ratio of number of pixels if self.max_2Dsize is None: self.max_2Dsize = torch.zeros_like(self.radii, dtype=torch.float32) newradii = self.radii.detach()[visible_mask] self.max_2Dsize[visible_mask] = torch.maximum( - self.max_2Dsize[visible_mask], newradii / float(max(self.last_size[0], self.last_size[1])) + self.max_2Dsize[visible_mask], + newradii / float(max(self.last_size[0], self.last_size[1])), ) def set_crop(self, crop_box: Optional[OrientedBox]): @@ -361,16 +385,30 @@ def refinement_after(self, optimizers: Optimizers, step): reset_interval = self.config.reset_alpha_every * self.config.refine_every do_densification = ( self.step < self.config.stop_split_at - and self.step % reset_interval > self.num_train_data + self.config.refine_every + and self.step % reset_interval + > self.num_train_data + self.config.refine_every ) if do_densification: # then we densify - assert self.xys_grad_norm is not None and self.vis_counts is not None and self.max_2Dsize is not None - avg_grad_norm = (self.xys_grad_norm / self.vis_counts) * 0.5 * max(self.last_size[0], self.last_size[1]) + assert ( + self.xys_grad_norm is not None + and self.vis_counts is not None + and self.max_2Dsize is not None + ) + avg_grad_norm = ( + (self.xys_grad_norm / self.vis_counts) + * 0.5 + * max(self.last_size[0], self.last_size[1]) + ) high_grads = (avg_grad_norm > self.config.densify_grad_thresh).squeeze() - splits = (self.scales.exp().max(dim=-1).values > self.config.densify_size_thresh).squeeze() + splits = ( + self.scales.exp().max(dim=-1).values + > self.config.densify_size_thresh + ).squeeze() if self.step < self.config.stop_screen_size_at: - splits |= (self.max_2Dsize > self.config.split_screen_size).squeeze() + splits |= ( + self.max_2Dsize > self.config.split_screen_size + ).squeeze() splits &= high_grads nsamps = self.config.n_split_samples ( @@ -382,7 +420,10 @@ def refinement_after(self, optimizers: Optimizers, step): split_quats, ) = self.split_gaussians(splits, nsamps) - dups = (self.scales.exp().max(dim=-1).values <= self.config.densify_size_thresh).squeeze() + dups = ( + self.scales.exp().max(dim=-1).values + <= self.config.densify_size_thresh + ).squeeze() dups &= high_grads ( dup_means, @@ -392,19 +433,43 @@ def refinement_after(self, optimizers: Optimizers, step): dup_scales, dup_quats, ) = self.dup_gaussians(dups) - self.means = Parameter(torch.cat([self.means.detach(), split_means, dup_means], dim=0)) + self.means = Parameter( + torch.cat([self.means.detach(), split_means, dup_means], dim=0) + ) self.features_dc = Parameter( - torch.cat([self.features_dc.detach(), split_features_dc, dup_features_dc], dim=0) + torch.cat( + [self.features_dc.detach(), split_features_dc, dup_features_dc], + dim=0, + ) ) self.features_rest = Parameter( - torch.cat([self.features_rest.detach(), split_features_rest, dup_features_rest], dim=0) + torch.cat( + [ + self.features_rest.detach(), + split_features_rest, + dup_features_rest, + ], + dim=0, + ) + ) + self.opacities = Parameter( + torch.cat( + [self.opacities.detach(), split_opacities, dup_opacities], dim=0 + ) + ) + self.scales = Parameter( + torch.cat([self.scales.detach(), split_scales, dup_scales], dim=0) + ) + self.quats = Parameter( + torch.cat([self.quats.detach(), split_quats, dup_quats], dim=0) ) - self.opacities = Parameter(torch.cat([self.opacities.detach(), split_opacities, dup_opacities], dim=0)) - self.scales = Parameter(torch.cat([self.scales.detach(), split_scales, dup_scales], dim=0)) - self.quats = Parameter(torch.cat([self.quats.detach(), split_quats, dup_quats], dim=0)) # append zeros to the max_2Dsize tensor self.max_2Dsize = torch.cat( - [self.max_2Dsize, torch.zeros_like(split_scales[:, 0]), torch.zeros_like(dup_scales[:, 0])], + [ + self.max_2Dsize, + torch.zeros_like(split_scales[:, 0]), + torch.zeros_like(dup_scales[:, 0]), + ], dim=0, ) @@ -416,11 +481,21 @@ def refinement_after(self, optimizers: Optimizers, step): # After a guassian is split into two new gaussians, the original one should also be pruned. splits_mask = torch.cat( - (splits, torch.zeros(nsamps * splits.sum() + dups.sum(), device=self.device, dtype=torch.bool)) + ( + splits, + torch.zeros( + nsamps * splits.sum() + dups.sum(), + device=self.device, + dtype=torch.bool, + ), + ) ) deleted_mask = self.cull_gaussians(splits_mask) - elif self.step >= self.config.stop_split_at and self.config.continue_cull_post_densification: + elif ( + self.step >= self.config.stop_split_at + and self.config.continue_cull_post_densification + ): deleted_mask = self.cull_gaussians() else: # if we donot allow culling post refinement, no more gaussians will be pruned. @@ -429,11 +504,17 @@ def refinement_after(self, optimizers: Optimizers, step): if deleted_mask is not None: self.remove_from_all_optim(optimizers, deleted_mask) - if self.step < self.config.stop_split_at and self.step % reset_interval == self.config.refine_every: + if ( + self.step < self.config.stop_split_at + and self.step % reset_interval == self.config.refine_every + ): # Reset value is set to be twice of the cull_alpha_thresh reset_value = self.config.cull_alpha_thresh * 2.0 self.opacities.data = torch.clamp( - self.opacities.data, max=torch.logit(torch.tensor(reset_value, device=self.device)).item() + self.opacities.data, + max=torch.logit( + torch.tensor(reset_value, device=self.device) + ).item(), ) # reset the exp of optimizer optim = optimizers.optimizers["opacity"] @@ -453,18 +534,25 @@ def cull_gaussians(self, extra_cull_mask: Optional[torch.Tensor] = None): """ n_bef = self.num_points # cull transparent ones - culls = (torch.sigmoid(self.opacities) < self.config.cull_alpha_thresh).squeeze() + culls = ( + torch.sigmoid(self.opacities) < self.config.cull_alpha_thresh + ).squeeze() below_alpha_count = torch.sum(culls).item() toobigs_count = 0 if extra_cull_mask is not None: culls = culls | extra_cull_mask if self.step > self.config.refine_every * self.config.reset_alpha_every: # cull huge ones - toobigs = (torch.exp(self.scales).max(dim=-1).values > self.config.cull_scale_thresh).squeeze() + toobigs = ( + torch.exp(self.scales).max(dim=-1).values + > self.config.cull_scale_thresh + ).squeeze() if self.step < self.config.stop_screen_size_at: # cull big screen space assert self.max_2Dsize is not None - toobigs = toobigs | (self.max_2Dsize > self.config.cull_screen_size).squeeze() + toobigs = ( + toobigs | (self.max_2Dsize > self.config.cull_screen_size).squeeze() + ) culls = culls | toobigs toobigs_count = torch.sum(toobigs).item() self.means = Parameter(self.means[~culls].detach()) @@ -487,12 +575,18 @@ def split_gaussians(self, split_mask, samps): """ n_splits = split_mask.sum().item() - CONSOLE.log(f"Splitting {split_mask.sum().item()/self.num_points} gaussians: {n_splits}/{self.num_points}") - centered_samples = torch.randn((samps * n_splits, 3), device=self.device) # Nx3 of axis-aligned scales + CONSOLE.log( + f"Splitting {split_mask.sum().item()/self.num_points} gaussians: {n_splits}/{self.num_points}" + ) + centered_samples = torch.randn( + (samps * n_splits, 3), device=self.device + ) # Nx3 of axis-aligned scales scaled_samples = ( torch.exp(self.scales[split_mask].repeat(samps, 1)) * centered_samples ) # how these scales are rotated - quats = self.quats[split_mask] / self.quats[split_mask].norm(dim=-1, keepdim=True) # normalize them first + quats = self.quats[split_mask] / self.quats[split_mask].norm( + dim=-1, keepdim=True + ) # normalize them first rots = quat_to_rotmat(quats.repeat(samps, 1)) # how these scales are rotated rotated_samples = torch.bmm(rots, scaled_samples[..., None]).squeeze() new_means = rotated_samples + self.means[split_mask].repeat(samps, 1) @@ -503,25 +597,45 @@ def split_gaussians(self, split_mask, samps): new_opacities = self.opacities[split_mask].repeat(samps, 1) # step 4, sample new scales size_fac = 1.6 - new_scales = torch.log(torch.exp(self.scales[split_mask]) / size_fac).repeat(samps, 1) - self.scales[split_mask] = torch.log(torch.exp(self.scales[split_mask]) / size_fac) + new_scales = torch.log(torch.exp(self.scales[split_mask]) / size_fac).repeat( + samps, 1 + ) + self.scales[split_mask] = torch.log( + torch.exp(self.scales[split_mask]) / size_fac + ) # step 5, sample new quats new_quats = self.quats[split_mask].repeat(samps, 1) - return new_means, new_features_dc, new_features_rest, new_opacities, new_scales, new_quats + return ( + new_means, + new_features_dc, + new_features_rest, + new_opacities, + new_scales, + new_quats, + ) def dup_gaussians(self, dup_mask): """ This function duplicates gaussians that are too small """ n_dups = dup_mask.sum().item() - CONSOLE.log(f"Duplicating {dup_mask.sum().item()/self.num_points} gaussians: {n_dups}/{self.num_points}") + CONSOLE.log( + f"Duplicating {dup_mask.sum().item()/self.num_points} gaussians: {n_dups}/{self.num_points}" + ) dup_means = self.means[dup_mask] dup_features_dc = self.features_dc[dup_mask] dup_features_rest = self.features_rest[dup_mask] dup_opacities = self.opacities[dup_mask] dup_scales = self.scales[dup_mask] dup_quats = self.quats[dup_mask] - return dup_means, dup_features_dc, dup_features_rest, dup_opacities, dup_scales, dup_quats + return ( + dup_means, + dup_features_dc, + dup_features_rest, + dup_opacities, + dup_scales, + dup_quats, + ) @property def num_points(self): @@ -531,7 +645,11 @@ def get_training_callbacks( self, training_callback_attributes: TrainingCallbackAttributes ) -> List[TrainingCallback]: cbs = [] - cbs.append(TrainingCallback([TrainingCallbackLocation.BEFORE_TRAIN_ITERATION], self.step_cb)) + cbs.append( + TrainingCallback( + [TrainingCallbackLocation.BEFORE_TRAIN_ITERATION], self.step_cb + ) + ) # The order of these matters cbs.append( TrainingCallback( @@ -573,7 +691,13 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]: def _get_downscale_factor(self): if self.training: - return 2 ** max((self.config.num_downscales - self.step // self.config.resolution_schedule), 0) + return 2 ** max( + ( + self.config.num_downscales + - self.step // self.config.resolution_schedule + ), + 0, + ) else: return 1 @@ -591,10 +715,7 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: print("Called get_outputs with not a camera") return {} assert camera.shape[0] == 1, "Only one camera at a time" - if self.training: - # currently relies on the branch vickie/camera-grads - self.camera_optimizer.apply_to_camera(camera) - + # get the background color if self.training: if self.config.background_color == "random": @@ -610,7 +731,11 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: if self.crop_box is not None and not self.training: crop_ids = self.crop_box.within(self.means).squeeze() if crop_ids.sum() == 0: - return {"rgb": background.repeat(int(camera.height.item()), int(camera.width.item()), 1)} + return { + "rgb": background.repeat( + int(camera.height.item()), int(camera.width.item()), 1 + ) + } else: crop_ids = None camera_downscale = self._get_downscale_factor() @@ -619,7 +744,9 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: R = camera.camera_to_worlds[0, :3, :3] # 3 x 3 T = camera.camera_to_worlds[0, :3, 3:4] # 3 x 1 # flip the z and y axes to align with gsplat conventions - R_edit = torch.diag(torch.tensor([1, -1, -1], device=self.device, dtype=R.dtype)) + R_edit = torch.diag( + torch.tensor([1, -1, -1], device=self.device, dtype=R.dtype) + ) R = R @ R_edit # analytic matrix inverse to get world2camera matrix R_inv = R.T @@ -657,7 +784,9 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: scales_crop = self.scales quats_crop = self.quats - colors_crop = torch.cat((features_dc_crop[:, None, :], features_rest_crop), dim=1) + colors_crop = torch.cat( + (features_dc_crop[:, None, :], features_rest_crop), dim=1 + ) self.xys, depths, self.radii, conics, num_tiles_hit, cov3d = project_gaussians( # type: ignore means_crop, @@ -675,14 +804,20 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: tile_bounds, ) # type: ignore if (self.radii).sum() == 0: - return {"rgb": background.repeat(int(camera.height.item()), int(camera.width.item()), 1)} + return { + "rgb": background.repeat( + int(camera.height.item()), int(camera.width.item()), 1 + ) + } # Important to allow xys grads to populate properly if self.training: self.xys.retain_grad() if self.config.sh_degree > 0: - viewdirs = means_crop.detach() - camera.camera_to_worlds.detach()[..., :3, 3] # (N, 3) + viewdirs = ( + means_crop.detach() - camera.camera_to_worlds.detach()[..., :3, 3] + ) # (N, 3) viewdirs = viewdirs / viewdirs.norm(dim=-1, keepdim=True) n = min(self.step // self.config.sh_degree_interval, self.config.sh_degree) rgbs = spherical_harmonics(n, viewdirs, colors_crop) @@ -721,7 +856,9 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: H, W, background=torch.ones(3, device=self.device) * 10, - )[..., 0:1] # type: ignore + )[ + ..., 0:1 + ] # type: ignore return {"rgb": rgb, "depth": depth_im} # type: ignore @@ -740,7 +877,9 @@ def get_gt_img(self, image: torch.Tensor): # torchvision can be slow to import, so we do it lazily. import torchvision.transforms.functional as TF - gt_img = TF.resize(image.permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0) + gt_img = TF.resize(image.permute(2, 0, 1), newsize, antialias=None).permute( + 1, 2, 0 + ) else: gt_img = image return gt_img.to(self.device) @@ -760,7 +899,9 @@ def get_metrics_dict(self, outputs, batch) -> Dict[str, torch.Tensor]: metrics_dict["gaussian_count"] = self.num_points return metrics_dict - def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Tensor]: + def get_loss_dict( + self, outputs, batch, metrics_dict=None + ) -> Dict[str, torch.Tensor]: """Computes and returns the losses dict. Args: @@ -770,12 +911,16 @@ def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Te """ gt_img = self.get_gt_img(batch["image"]) Ll1 = torch.abs(gt_img - outputs["rgb"]).mean() - simloss = 1 - self.ssim(gt_img.permute(2, 0, 1)[None, ...], outputs["rgb"].permute(2, 0, 1)[None, ...]) + simloss = 1 - self.ssim( + gt_img.permute(2, 0, 1)[None, ...], + outputs["rgb"].permute(2, 0, 1)[None, ...], + ) if self.config.use_scale_regularization and self.step % 10 == 0: scale_exp = torch.exp(self.scales) scale_reg = ( torch.maximum( - scale_exp.amax(dim=-1) / scale_exp.amin(dim=-1), torch.tensor(self.config.max_gauss_ratio) + scale_exp.amax(dim=-1) / scale_exp.amin(dim=-1), + torch.tensor(self.config.max_gauss_ratio), ) - self.config.max_gauss_ratio ) @@ -784,12 +929,15 @@ def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Te scale_reg = torch.tensor(0.0).to(self.device) return { - "main_loss": (1 - self.config.ssim_lambda) * Ll1 + self.config.ssim_lambda * simloss, + "main_loss": (1 - self.config.ssim_lambda) * Ll1 + + self.config.ssim_lambda * simloss, "scale_reg": scale_reg, } @torch.no_grad() - def get_outputs_for_camera(self, camera: Cameras, obb_box: Optional[OrientedBox] = None) -> Dict[str, torch.Tensor]: + def get_outputs_for_camera( + self, camera: Cameras, obb_box: Optional[OrientedBox] = None + ) -> Dict[str, torch.Tensor]: """Takes in a camera, generates the raybundle, and computes the output of the model. Overridden for a camera-based gaussian model. @@ -822,7 +970,9 @@ def get_image_metrics_and_images( import torchvision.transforms.functional as TF newsize = [batch["image"].shape[0] // d, batch["image"].shape[1] // d] - predicted_rgb = TF.resize(outputs["rgb"].permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0) + predicted_rgb = TF.resize( + outputs["rgb"].permute(2, 0, 1), newsize, antialias=None + ).permute(1, 2, 0) else: predicted_rgb = outputs["rgb"] From 44581bf75ec084fb3d4e913acc99eb368e40ddc4 Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Thu, 18 Jan 2024 23:44:44 -0500 Subject: [PATCH 29/38] Address feedback --- nerfstudio/process_data/process_data_utils.py | 8 +- nerfstudio/scripts/downloads/eyeful_tower.py | 103 ++++++++++++++---- 2 files changed, 89 insertions(+), 22 deletions(-) diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py index 13682f90b7..91132fe4ac 100644 --- a/nerfstudio/process_data/process_data_utils.py +++ b/nerfstudio/process_data/process_data_utils.py @@ -58,16 +58,18 @@ class CameraModel(Enum): } -def list_images(data: Path) -> List[Path]: +def list_images(data: Path, recursive: bool = False) -> List[Path]: """Lists all supported images in a directory Args: - data: Path to the directory of images. Nested folders are searched as well. + data: Path to the directory of images. + recursive: Whether to search check nested folders in `data`. Returns: Paths to images contained in the directory """ allowed_exts = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] + ALLOWED_RAW_EXTS - image_paths = sorted([p for p in data.glob("**/[!.]*") if p.suffix.lower() in allowed_exts]) + glob = "**/[!.]*" if recursive else "[!.]*" + image_paths = sorted([p for p in data.glob(glob) if p.suffix.lower() in allowed_exts]) return image_paths diff --git a/nerfstudio/scripts/downloads/eyeful_tower.py b/nerfstudio/scripts/downloads/eyeful_tower.py index 3bab323cc6..23fe080e68 100644 --- a/nerfstudio/scripts/downloads/eyeful_tower.py +++ b/nerfstudio/scripts/downloads/eyeful_tower.py @@ -26,6 +26,7 @@ import tyro from nerfstudio.scripts.downloads.utils import DatasetDownload +from nerfstudio.utils.rich_utils import CONSOLE eyefultower_downloads = [ "all", @@ -90,7 +91,23 @@ class EyefulTowerDownload(DatasetDownload): resolution_name: Tuple[EyefulTowerResolution, ...] = () @staticmethod - def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, target_height: int): + def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, target_height: int) -> ET.ElementTree: + """Rescales parameters in metashape's cameras.xml format to match target width/height. + + The EyefulTower dataset provides images which have already been rescaled to smaller sizes from the original ~8K + resolution. However, the cameras.xml file provided, which contains the camera intrinsics in metashape's format, + only contains valid parameters for the original resolution. This function generates a new set of parameters + corresponding to a smaller resolution dataset by scaling the original values from cameras.xml. Non-uniform + scaling (different in X and Y) can be performed due to slight rounding differences. + + Args: + xml_tree: XML tree loaded from Metashape's cameras.xml file + target_width: Width of output images + target_height: Height of output images + + Returns: + Updated XML tree with scaled intrinsics and width/height parameters + """ transformed = copy.deepcopy(xml_tree) root = transformed.getroot() @@ -142,9 +159,34 @@ def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, targe return transformed + @staticmethod def convert_cameras_to_nerfstudio_transforms( - self, capture_name: str, cameras: dict, splits: dict, target_width: int, target_height: int, extension: str - ): + capture_name: str, cameras: dict, splits: dict, target_width: int, target_height: int, extension: str + ) -> dict: + """Converts EyefulTower cameras.json format to Nerfstudio's transforms.json format + + The EyefulTower dataset provides a cameras.json file containing geometric calibration information for the + original resolution ~8K images, similar to the cameras.xml file from Metashape. The main advantage is that data + is provided for each individual image, rather than being structured hierarchically with rig constraints (as in + the Metashape cameras.xml). + + This function takes the cameras.json file and converts it to the transforms.json Nerfstudio expects, with the + necessary scaling of intrinsics parameters applied. This function also handles the EyefulTower splits.json file, + describing the breakdown of training and validation images, and adds the appropriate fields to transforms.json. + This function works for both fisheye (V1) and pinhole (V2) cameras. Scene-specific fisheye mask radii are added + to the returned dictionary if needed. + + Args: + capture_name: Which specific EyefulTower capture is being converted + cameras: Data loaded from EyefulTower cameras.json + splits: Data loaded from EyefulTower splits.json + target_width: Width of output images + target_height: Height of output images + extension: Extension of output images + + Returns: + Dict in the Nerfstudio transforms.json format, with scaled camera parameters, splits, and optional metadata. + """ output = {} distortion_models = [c["distortionModel"] for c in cameras["KRT"]] @@ -221,7 +263,17 @@ def convert_cameras_to_nerfstudio_transforms( output["val_filenames"] = split_filenames["test"] return output - def subsample_nerfstudio_transforms(self, transforms: dict, n: int): + @staticmethod + def subsample_nerfstudio_transforms(transforms: dict, n: int): + """Uniformly samples n frames from a Nerfstudio transforms.json dict. + + Args: + transforms: Dictionary in Nerfstudio transforms.json format + n: Number of frames to uniformly subsample + + Returns: + New transforms.json dict with n frames. All other parameters are copied. + """ target = min(len(transforms["frames"]), n) indices = np.round(np.linspace(0, len(transforms["frames"]) - 1, target)).astype(int) @@ -239,17 +291,29 @@ def subsample_nerfstudio_transforms(self, transforms: dict, n: int): return output - def download(self, save_dir: Path): + def download(self, save_dir: Path) -> None: + """Entrypoint to download the EyefulTower dataset. + + * Fetches the specified dataset(s) at the specified resolution(s) from the EyefulTower AWS S3 bucket. Redundant + data is not downloaded, so this function can safely (and performantly) be called multiple times with + increasing scope of datasets and resolutions. + * Generates updated Metashape cameras.xml for lower resolution downloads. + * Generates Nerfstudio transform.json for each resolution. Additionally generates transforms_300.json and + transforms_half.json containing subsets (300 frames, half the frames) of the full set to help with iteration. + + Args: + save_dir: Directory to save dataset. Output will be in save_dir/eyefultower/ + """ if len(self.capture_name) == 0: self.capture_name = ("riverview",) - print( + CONSOLE.print( f"No capture specified, using {self.capture_name} by default.", "Add `--help` to this command to see all available captures.", ) if len(self.resolution_name) == 0: self.resolution_name = ("jpeg_2k",) - print( + CONSOLE.print( f"No resolution specified, using {self.resolution_name} by default.", "Add `--help` to this command to see all available resolutions.", ) @@ -262,7 +326,7 @@ def download(self, save_dir: Path): captures.add(capture) captures = sorted(captures) if len(captures) == 0: - print("WARNING: No EyefulTower captures specified. Nothing will be downloaded.") + CONSOLE.print("[bold yellow]WARNING: No EyefulTower captures specified. Nothing will be downloaded.") resolutions = set() for resolution in self.resolution_name: @@ -272,7 +336,7 @@ def download(self, save_dir: Path): resolutions.add(resolution) resolutions = sorted(resolutions) if len(resolutions) == 0: - print("WARNING: No EyefulTower resolutions specified. Nothing will be downloaded.") + CONSOLE.print("[bold yellow]WARNING: No EyefulTower resolutions specified. Nothing will be downloaded.") driver = awscli.clidriver.create_clidriver() @@ -287,9 +351,9 @@ def download(self, save_dir: Path): + includes + [base_url, str(output_path)] ) - print(f"[EyefulTower Capture {i+1: >2d}/{len(captures)}]: '{capture}'") + CONSOLE.print(f"[EyefulTower Capture {i+1: >2d}/{len(captures)}]: '{capture}'") print( - f" Downloading resolutions {resolutions}", + f"\tDownloading resolutions {resolutions}", f"to '{output_path.resolve()}' with command `aws {' '.join(command)}` ...", end=" ", flush=True, @@ -297,12 +361,13 @@ def download(self, save_dir: Path): driver.main(command) print("done!") - # After downloading, we'll insert an appropriate cameras.xml file into each directory - # It's quick enough that we can just redo it every time this is called, regardless - # of whether new data is downloaded. + # After downloading, we'll insert an appropriate cameras.xml file into each directory. It's quick enough + # that we can just redo it every time this is called, regardless of whether new data is downloaded. xml_input_path = output_path / "cameras.xml" if not xml_input_path.exists: - print(" WARNING: cameras.xml not found. Scaled cameras.xml will not be generated.") + CONSOLE.print( + "\t[bold yellow]WARNING: cameras.xml not found. Scaled cameras.xml will not be generated." + ) else: tree = ET.parse(output_path / "cameras.xml") @@ -310,7 +375,7 @@ def download(self, save_dir: Path): metadata = eyefultower_resolutions[resolution] xml_output_path = output_path / metadata.folder_name / "cameras.xml" print( - f" Generating cameras.xml for '{resolution}' to '{xml_output_path.resolve()}' ... ", + f"\tGenerating cameras.xml for '{resolution}' to '{xml_output_path.resolve()}' ... ", end=" ", flush=True, ) @@ -321,9 +386,9 @@ def download(self, save_dir: Path): json_input_path = output_path / "cameras.json" splits_input_path = output_path / "splits.json" if not json_input_path.exists: - print(" WARNING: cameras.json not found. transforms.json will not be generated.") + CONSOLE.print("\t[bold yellow]WARNING: cameras.json not found. transforms.json will not be generated.") elif not splits_input_path.exists: - print(" WARNING: splits.json not found. transforms.json will not be generated.") + CONSOLE.print("\t[bold yellow]WARNING: splits.json not found. transforms.json will not be generated.") else: with open(json_input_path, "r") as f: cameras = json.load(f) @@ -335,7 +400,7 @@ def download(self, save_dir: Path): metadata = eyefultower_resolutions[resolution] json_output_path = output_path / metadata.folder_name / "transforms.json" print( - f" Generating transforms.json for '{resolution}' to '{json_output_path.resolve()}' ... ", + f"\tGenerating transforms.json for '{resolution}' to '{json_output_path.resolve()}' ... ", end=" ", flush=True, ) From 544cb81dd21648ff109f69b204e79d13dc7bf8ec Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Thu, 18 Jan 2024 23:45:29 -0500 Subject: [PATCH 30/38] Revert changes to pyproject.toml, to be added in a later PR --- pyproject.toml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2b8f6d8059..2071141da6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,6 @@ classifiers = [ "Programming Language :: Python", ] dependencies = [ - "awscli>=1.31.10", "appdirs>=1.4", "av>=9.2.0", "comet_ml>=3.33.8", @@ -92,15 +91,12 @@ dev = [ "typeguard==2.13.3", "ruff==0.1.13", "sshconf==0.2.5", - # TODO(1480) enable when pycolmap windows wheels are available - # "pycolmap>=0.3.0", # NOTE: pycolmap==0.3.0 is not available on newer python versions + "pycolmap>=0.3.0", # NOTE: pycolmap==0.3.0 is not available on newer python versions "diffusers==0.16.1", "opencv-stubs==0.0.7", "transformers==4.29.2", "pyright==1.1.331", - # NOTE: Disabling projectaria-tools because it doesn't have prebuilt windows wheels - # Syntax comes from here: https://pip.pypa.io/en/stable/reference/requirement-specifiers/ - "projectaria-tools>=1.3.1; sys_platform != 'win32'", + "projectaria_tools[all]>=1.2.0", ] # Documentation related packages From d1abe18e66eb465b90901ec94ca8bef26463af9a Mon Sep 17 00:00:00 2001 From: Vasu Agrawal Date: Thu, 18 Jan 2024 23:53:47 -0500 Subject: [PATCH 31/38] Oops, probably shouldn't have gotten rid of awscli ... --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index bb68bd4eb6..c35ea04788 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ classifiers = [ dependencies = [ "appdirs>=1.4", "av>=9.2.0", + "awscli>=1.31.10", "comet_ml>=3.33.8", "cryptography>=38", "tyro>=0.6.6", From be6792fa04c6c16c0d8c5e50b38ce61042949945 Mon Sep 17 00:00:00 2001 From: xucr Date: Thu, 18 Jan 2024 21:08:09 -0800 Subject: [PATCH 32/38] adding support for bg color, tested and reformatted now --- nerfstudio/models/gaussian_splatting.py | 199 ++++++------------------ 1 file changed, 46 insertions(+), 153 deletions(-) diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py index 8092db00a0..ae3b544225 100644 --- a/nerfstudio/models/gaussian_splatting.py +++ b/nerfstudio/models/gaussian_splatting.py @@ -22,7 +22,6 @@ import math from dataclasses import dataclass, field from typing import Dict, List, Optional, Tuple, Type, Union -from typing_extensions import Literal import numpy as np import torch @@ -32,14 +31,11 @@ from gsplat.sh import num_sh_bases, spherical_harmonics from pytorch_msssim import SSIM from torch.nn import Parameter +from typing_extensions import Literal from nerfstudio.cameras.cameras import Cameras from nerfstudio.data.scene_box import OrientedBox -from nerfstudio.engine.callbacks import ( - TrainingCallback, - TrainingCallbackAttributes, - TrainingCallbackLocation, -) +from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes, TrainingCallbackLocation from nerfstudio.engine.optimizers import Optimizers # need following import for background color override @@ -83,9 +79,7 @@ def SH2RGB(sh): return sh * C0 + 0.5 -def projection_matrix( - znear, zfar, fovx, fovy, device: Union[str, torch.device] = "cpu" -): +def projection_matrix(znear, zfar, fovx, fovy, device: Union[str, torch.device] = "cpu"): """ Constructs an OpenGL-style perspective projection matrix. """ @@ -117,7 +111,7 @@ class GaussianSplattingModelConfig(ModelConfig): """period of steps where gaussians are culled and densified""" resolution_schedule: int = 250 """training starts at 1/d resolution, every n steps this is doubled""" - background_color: Literal["random", "black", "white"] = "white" + background_color: Literal["random", "black", "white"] = "random" """Whether to randomize the background color.""" num_downscales: int = 0 """at the beginning, resolution is 1/2^d, where d is this number""" @@ -202,13 +196,9 @@ def populate_modules(self): self.features_rest = torch.nn.Parameter(shs[:, 1:, :]) else: self.features_dc = torch.nn.Parameter(torch.rand(self.num_points, 3)) - self.features_rest = torch.nn.Parameter( - torch.zeros((self.num_points, dim_sh - 1, 3)) - ) + self.features_rest = torch.nn.Parameter(torch.zeros((self.num_points, dim_sh - 1, 3))) - self.opacities = torch.nn.Parameter( - torch.logit(0.1 * torch.ones(self.num_points, 1)) - ) + self.opacities = torch.nn.Parameter(torch.logit(0.1 * torch.ones(self.num_points, 1))) # metrics from torchmetrics.image import PeakSignalNoiseRatio @@ -250,9 +240,7 @@ def load_state_dict(self, dict, **kwargs): # type: ignore self.opacities = torch.nn.Parameter(torch.zeros(newp, 1, device=self.device)) self.features_dc = torch.nn.Parameter(torch.zeros(newp, 3, device=self.device)) self.features_rest = torch.nn.Parameter( - torch.zeros( - newp, num_sh_bases(self.config.sh_degree) - 1, 3, device=self.device - ) + torch.zeros(newp, num_sh_bases(self.config.sh_degree) - 1, 3, device=self.device) ) super().load_state_dict(dict, **kwargs) @@ -268,9 +256,7 @@ def k_nearest_sklearn(self, x: torch.Tensor, k: int): # Build the nearest neighbors model from sklearn.neighbors import NearestNeighbors - nn_model = NearestNeighbors( - n_neighbors=k + 1, algorithm="auto", metric="euclidean" - ).fit(x_np) + nn_model = NearestNeighbors(n_neighbors=k + 1, algorithm="auto", metric="euclidean").fit(x_np) # Find the k-nearest neighbors distances, indices = nn_model.kneighbors(x_np) @@ -311,18 +297,14 @@ def dup_in_optim(self, optimizer, dup_mask, new_params, n=2): param_state["exp_avg"] = torch.cat( [ param_state["exp_avg"], - torch.zeros_like(param_state["exp_avg"][dup_mask.squeeze()]).repeat( - *repeat_dims - ), + torch.zeros_like(param_state["exp_avg"][dup_mask.squeeze()]).repeat(*repeat_dims), ], dim=0, ) param_state["exp_avg_sq"] = torch.cat( [ param_state["exp_avg_sq"], - torch.zeros_like(param_state["exp_avg_sq"][dup_mask.squeeze()]).repeat( - *repeat_dims - ), + torch.zeros_like(param_state["exp_avg_sq"][dup_mask.squeeze()]).repeat(*repeat_dims), ], dim=0, ) @@ -353,9 +335,7 @@ def after_train(self, step: int): else: assert self.vis_counts is not None self.vis_counts[visible_mask] = self.vis_counts[visible_mask] + 1 - self.xys_grad_norm[visible_mask] = ( - grads[visible_mask] + self.xys_grad_norm[visible_mask] - ) + self.xys_grad_norm[visible_mask] = grads[visible_mask] + self.xys_grad_norm[visible_mask] # update the max screen size, as a ratio of number of pixels if self.max_2Dsize is None: @@ -385,30 +365,16 @@ def refinement_after(self, optimizers: Optimizers, step): reset_interval = self.config.reset_alpha_every * self.config.refine_every do_densification = ( self.step < self.config.stop_split_at - and self.step % reset_interval - > self.num_train_data + self.config.refine_every + and self.step % reset_interval > self.num_train_data + self.config.refine_every ) if do_densification: # then we densify - assert ( - self.xys_grad_norm is not None - and self.vis_counts is not None - and self.max_2Dsize is not None - ) - avg_grad_norm = ( - (self.xys_grad_norm / self.vis_counts) - * 0.5 - * max(self.last_size[0], self.last_size[1]) - ) + assert self.xys_grad_norm is not None and self.vis_counts is not None and self.max_2Dsize is not None + avg_grad_norm = (self.xys_grad_norm / self.vis_counts) * 0.5 * max(self.last_size[0], self.last_size[1]) high_grads = (avg_grad_norm > self.config.densify_grad_thresh).squeeze() - splits = ( - self.scales.exp().max(dim=-1).values - > self.config.densify_size_thresh - ).squeeze() + splits = (self.scales.exp().max(dim=-1).values > self.config.densify_size_thresh).squeeze() if self.step < self.config.stop_screen_size_at: - splits |= ( - self.max_2Dsize > self.config.split_screen_size - ).squeeze() + splits |= (self.max_2Dsize > self.config.split_screen_size).squeeze() splits &= high_grads nsamps = self.config.n_split_samples ( @@ -420,10 +386,7 @@ def refinement_after(self, optimizers: Optimizers, step): split_quats, ) = self.split_gaussians(splits, nsamps) - dups = ( - self.scales.exp().max(dim=-1).values - <= self.config.densify_size_thresh - ).squeeze() + dups = (self.scales.exp().max(dim=-1).values <= self.config.densify_size_thresh).squeeze() dups &= high_grads ( dup_means, @@ -433,9 +396,7 @@ def refinement_after(self, optimizers: Optimizers, step): dup_scales, dup_quats, ) = self.dup_gaussians(dups) - self.means = Parameter( - torch.cat([self.means.detach(), split_means, dup_means], dim=0) - ) + self.means = Parameter(torch.cat([self.means.detach(), split_means, dup_means], dim=0)) self.features_dc = Parameter( torch.cat( [self.features_dc.detach(), split_features_dc, dup_features_dc], @@ -452,17 +413,9 @@ def refinement_after(self, optimizers: Optimizers, step): dim=0, ) ) - self.opacities = Parameter( - torch.cat( - [self.opacities.detach(), split_opacities, dup_opacities], dim=0 - ) - ) - self.scales = Parameter( - torch.cat([self.scales.detach(), split_scales, dup_scales], dim=0) - ) - self.quats = Parameter( - torch.cat([self.quats.detach(), split_quats, dup_quats], dim=0) - ) + self.opacities = Parameter(torch.cat([self.opacities.detach(), split_opacities, dup_opacities], dim=0)) + self.scales = Parameter(torch.cat([self.scales.detach(), split_scales, dup_scales], dim=0)) + self.quats = Parameter(torch.cat([self.quats.detach(), split_quats, dup_quats], dim=0)) # append zeros to the max_2Dsize tensor self.max_2Dsize = torch.cat( [ @@ -492,10 +445,7 @@ def refinement_after(self, optimizers: Optimizers, step): ) deleted_mask = self.cull_gaussians(splits_mask) - elif ( - self.step >= self.config.stop_split_at - and self.config.continue_cull_post_densification - ): + elif self.step >= self.config.stop_split_at and self.config.continue_cull_post_densification: deleted_mask = self.cull_gaussians() else: # if we donot allow culling post refinement, no more gaussians will be pruned. @@ -504,17 +454,12 @@ def refinement_after(self, optimizers: Optimizers, step): if deleted_mask is not None: self.remove_from_all_optim(optimizers, deleted_mask) - if ( - self.step < self.config.stop_split_at - and self.step % reset_interval == self.config.refine_every - ): + if self.step < self.config.stop_split_at and self.step % reset_interval == self.config.refine_every: # Reset value is set to be twice of the cull_alpha_thresh reset_value = self.config.cull_alpha_thresh * 2.0 self.opacities.data = torch.clamp( self.opacities.data, - max=torch.logit( - torch.tensor(reset_value, device=self.device) - ).item(), + max=torch.logit(torch.tensor(reset_value, device=self.device)).item(), ) # reset the exp of optimizer optim = optimizers.optimizers["opacity"] @@ -534,25 +479,18 @@ def cull_gaussians(self, extra_cull_mask: Optional[torch.Tensor] = None): """ n_bef = self.num_points # cull transparent ones - culls = ( - torch.sigmoid(self.opacities) < self.config.cull_alpha_thresh - ).squeeze() + culls = (torch.sigmoid(self.opacities) < self.config.cull_alpha_thresh).squeeze() below_alpha_count = torch.sum(culls).item() toobigs_count = 0 if extra_cull_mask is not None: culls = culls | extra_cull_mask if self.step > self.config.refine_every * self.config.reset_alpha_every: # cull huge ones - toobigs = ( - torch.exp(self.scales).max(dim=-1).values - > self.config.cull_scale_thresh - ).squeeze() + toobigs = (torch.exp(self.scales).max(dim=-1).values > self.config.cull_scale_thresh).squeeze() if self.step < self.config.stop_screen_size_at: # cull big screen space assert self.max_2Dsize is not None - toobigs = ( - toobigs | (self.max_2Dsize > self.config.cull_screen_size).squeeze() - ) + toobigs = toobigs | (self.max_2Dsize > self.config.cull_screen_size).squeeze() culls = culls | toobigs toobigs_count = torch.sum(toobigs).item() self.means = Parameter(self.means[~culls].detach()) @@ -575,18 +513,12 @@ def split_gaussians(self, split_mask, samps): """ n_splits = split_mask.sum().item() - CONSOLE.log( - f"Splitting {split_mask.sum().item()/self.num_points} gaussians: {n_splits}/{self.num_points}" - ) - centered_samples = torch.randn( - (samps * n_splits, 3), device=self.device - ) # Nx3 of axis-aligned scales + CONSOLE.log(f"Splitting {split_mask.sum().item()/self.num_points} gaussians: {n_splits}/{self.num_points}") + centered_samples = torch.randn((samps * n_splits, 3), device=self.device) # Nx3 of axis-aligned scales scaled_samples = ( torch.exp(self.scales[split_mask].repeat(samps, 1)) * centered_samples ) # how these scales are rotated - quats = self.quats[split_mask] / self.quats[split_mask].norm( - dim=-1, keepdim=True - ) # normalize them first + quats = self.quats[split_mask] / self.quats[split_mask].norm(dim=-1, keepdim=True) # normalize them first rots = quat_to_rotmat(quats.repeat(samps, 1)) # how these scales are rotated rotated_samples = torch.bmm(rots, scaled_samples[..., None]).squeeze() new_means = rotated_samples + self.means[split_mask].repeat(samps, 1) @@ -597,12 +529,8 @@ def split_gaussians(self, split_mask, samps): new_opacities = self.opacities[split_mask].repeat(samps, 1) # step 4, sample new scales size_fac = 1.6 - new_scales = torch.log(torch.exp(self.scales[split_mask]) / size_fac).repeat( - samps, 1 - ) - self.scales[split_mask] = torch.log( - torch.exp(self.scales[split_mask]) / size_fac - ) + new_scales = torch.log(torch.exp(self.scales[split_mask]) / size_fac).repeat(samps, 1) + self.scales[split_mask] = torch.log(torch.exp(self.scales[split_mask]) / size_fac) # step 5, sample new quats new_quats = self.quats[split_mask].repeat(samps, 1) return ( @@ -619,9 +547,7 @@ def dup_gaussians(self, dup_mask): This function duplicates gaussians that are too small """ n_dups = dup_mask.sum().item() - CONSOLE.log( - f"Duplicating {dup_mask.sum().item()/self.num_points} gaussians: {n_dups}/{self.num_points}" - ) + CONSOLE.log(f"Duplicating {dup_mask.sum().item()/self.num_points} gaussians: {n_dups}/{self.num_points}") dup_means = self.means[dup_mask] dup_features_dc = self.features_dc[dup_mask] dup_features_rest = self.features_rest[dup_mask] @@ -645,11 +571,7 @@ def get_training_callbacks( self, training_callback_attributes: TrainingCallbackAttributes ) -> List[TrainingCallback]: cbs = [] - cbs.append( - TrainingCallback( - [TrainingCallbackLocation.BEFORE_TRAIN_ITERATION], self.step_cb - ) - ) + cbs.append(TrainingCallback([TrainingCallbackLocation.BEFORE_TRAIN_ITERATION], self.step_cb)) # The order of these matters cbs.append( TrainingCallback( @@ -692,10 +614,7 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]: def _get_downscale_factor(self): if self.training: return 2 ** max( - ( - self.config.num_downscales - - self.step // self.config.resolution_schedule - ), + (self.config.num_downscales - self.step // self.config.resolution_schedule), 0, ) else: @@ -731,11 +650,7 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: if self.crop_box is not None and not self.training: crop_ids = self.crop_box.within(self.means).squeeze() if crop_ids.sum() == 0: - return { - "rgb": background.repeat( - int(camera.height.item()), int(camera.width.item()), 1 - ) - } + return {"rgb": background.repeat(int(camera.height.item()), int(camera.width.item()), 1)} else: crop_ids = None camera_downscale = self._get_downscale_factor() @@ -744,9 +659,7 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: R = camera.camera_to_worlds[0, :3, :3] # 3 x 3 T = camera.camera_to_worlds[0, :3, 3:4] # 3 x 1 # flip the z and y axes to align with gsplat conventions - R_edit = torch.diag( - torch.tensor([1, -1, -1], device=self.device, dtype=R.dtype) - ) + R_edit = torch.diag(torch.tensor([1, -1, -1], device=self.device, dtype=R.dtype)) R = R @ R_edit # analytic matrix inverse to get world2camera matrix R_inv = R.T @@ -784,9 +697,7 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: scales_crop = self.scales quats_crop = self.quats - colors_crop = torch.cat( - (features_dc_crop[:, None, :], features_rest_crop), dim=1 - ) + colors_crop = torch.cat((features_dc_crop[:, None, :], features_rest_crop), dim=1) self.xys, depths, self.radii, conics, num_tiles_hit, cov3d = project_gaussians( # type: ignore means_crop, @@ -804,20 +715,14 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: tile_bounds, ) # type: ignore if (self.radii).sum() == 0: - return { - "rgb": background.repeat( - int(camera.height.item()), int(camera.width.item()), 1 - ) - } + return {"rgb": background.repeat(int(camera.height.item()), int(camera.width.item()), 1)} # Important to allow xys grads to populate properly if self.training: self.xys.retain_grad() if self.config.sh_degree > 0: - viewdirs = ( - means_crop.detach() - camera.camera_to_worlds.detach()[..., :3, 3] - ) # (N, 3) + viewdirs = means_crop.detach() - camera.camera_to_worlds.detach()[..., :3, 3] # (N, 3) viewdirs = viewdirs / viewdirs.norm(dim=-1, keepdim=True) n = min(self.step // self.config.sh_degree_interval, self.config.sh_degree) rgbs = spherical_harmonics(n, viewdirs, colors_crop) @@ -829,7 +734,6 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: camera.rescale_output_resolution(camera_downscale) assert (num_tiles_hit > 0).any() # type: ignore - rgb = rasterize_gaussians( # type: ignore self.xys, depths, @@ -856,9 +760,7 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: H, W, background=torch.ones(3, device=self.device) * 10, - )[ - ..., 0:1 - ] # type: ignore + )[..., 0:1] # type: ignore return {"rgb": rgb, "depth": depth_im} # type: ignore @@ -877,9 +779,7 @@ def get_gt_img(self, image: torch.Tensor): # torchvision can be slow to import, so we do it lazily. import torchvision.transforms.functional as TF - gt_img = TF.resize(image.permute(2, 0, 1), newsize, antialias=None).permute( - 1, 2, 0 - ) + gt_img = TF.resize(image.permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0) else: gt_img = image return gt_img.to(self.device) @@ -899,9 +799,7 @@ def get_metrics_dict(self, outputs, batch) -> Dict[str, torch.Tensor]: metrics_dict["gaussian_count"] = self.num_points return metrics_dict - def get_loss_dict( - self, outputs, batch, metrics_dict=None - ) -> Dict[str, torch.Tensor]: + def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Tensor]: """Computes and returns the losses dict. Args: @@ -929,15 +827,12 @@ def get_loss_dict( scale_reg = torch.tensor(0.0).to(self.device) return { - "main_loss": (1 - self.config.ssim_lambda) * Ll1 - + self.config.ssim_lambda * simloss, + "main_loss": (1 - self.config.ssim_lambda) * Ll1 + self.config.ssim_lambda * simloss, "scale_reg": scale_reg, } @torch.no_grad() - def get_outputs_for_camera( - self, camera: Cameras, obb_box: Optional[OrientedBox] = None - ) -> Dict[str, torch.Tensor]: + def get_outputs_for_camera(self, camera: Cameras, obb_box: Optional[OrientedBox] = None) -> Dict[str, torch.Tensor]: """Takes in a camera, generates the raybundle, and computes the output of the model. Overridden for a camera-based gaussian model. @@ -970,9 +865,7 @@ def get_image_metrics_and_images( import torchvision.transforms.functional as TF newsize = [batch["image"].shape[0] // d, batch["image"].shape[1] // d] - predicted_rgb = TF.resize( - outputs["rgb"].permute(2, 0, 1), newsize, antialias=None - ).permute(1, 2, 0) + predicted_rgb = TF.resize(outputs["rgb"].permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0) else: predicted_rgb = outputs["rgb"] From 8afc6495eed7015b54c6cca8b88f304090ea2b59 Mon Sep 17 00:00:00 2001 From: xucr Date: Thu, 18 Jan 2024 21:12:44 -0800 Subject: [PATCH 33/38] formatted --- .../datamanagers/full_images_datamanager.py | 82 ++++++++++++++++--- 1 file changed, 71 insertions(+), 11 deletions(-) diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py index f67bd120f4..7158d72eb6 100644 --- a/nerfstudio/data/datamanagers/full_images_datamanager.py +++ b/nerfstudio/data/datamanagers/full_images_datamanager.py @@ -26,7 +26,20 @@ from dataclasses import dataclass, field from functools import cached_property from pathlib import Path -from typing import Dict, ForwardRef, Generic, List, Literal, Optional, Tuple, Type, Union, cast, get_args, get_origin +from typing import ( + Dict, + ForwardRef, + Generic, + List, + Literal, + Optional, + Tuple, + Type, + Union, + cast, + get_args, + get_origin, +) import cv2 import numpy as np @@ -36,7 +49,11 @@ from nerfstudio.cameras.cameras import Cameras, CameraType from nerfstudio.configs.dataparser_configs import AnnotatedDataParserUnion -from nerfstudio.data.datamanagers.base_datamanager import DataManager, DataManagerConfig, TDataset +from nerfstudio.data.datamanagers.base_datamanager import ( + DataManager, + DataManagerConfig, + TDataset, +) from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig from nerfstudio.data.datasets.base_dataset import InputDataset @@ -106,7 +123,10 @@ def __init__( self.train_dataset = self.create_train_dataset() self.eval_dataset = self.create_eval_dataset() if len(self.train_dataset) > 500 and self.config.cache_images == "gpu": - CONSOLE.print("Train dataset has over 500 images, overriding cache_images to cpu", style="bold yellow") + CONSOLE.print( + "Train dataset has over 500 images, overriding cache_images to cpu", + style="bold yellow", + ) self.config.cache_images = "cpu" self.cached_train, self.cached_eval = self.cache_images(self.config.cache_images) self.exclude_batch_keys_from_device = self.train_dataset.exclude_batch_keys_from_device @@ -174,16 +194,36 @@ def cache_images(self, cache_images_option): elif camera.camera_type.item() == CameraType.FISHEYE.value: distortion_params = np.array( - [distortion_params[0], distortion_params[1], distortion_params[2], distortion_params[3]] + [ + distortion_params[0], + distortion_params[1], + distortion_params[2], + distortion_params[3], + ] ) newK = cv2.fisheye.estimateNewCameraMatrixForUndistortRectify( - K, distortion_params, (image.shape[1], image.shape[0]), np.eye(3), balance=0 + K, + distortion_params, + (image.shape[1], image.shape[0]), + np.eye(3), + balance=0, ) map1, map2 = cv2.fisheye.initUndistortRectifyMap( - K, distortion_params, np.eye(3), newK, (image.shape[1], image.shape[0]), cv2.CV_32FC1 + K, + distortion_params, + np.eye(3), + newK, + (image.shape[1], image.shape[0]), + cv2.CV_32FC1, ) # and then remap: - image = cv2.remap(image, map1, map2, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT) + image = cv2.remap( + image, + map1, + map2, + interpolation=cv2.INTER_LINEAR, + borderMode=cv2.BORDER_CONSTANT, + ) if "mask" in data: mask = data["mask"].numpy() mask = mask.astype(np.uint8) * 255 @@ -248,16 +288,36 @@ def cache_images(self, cache_images_option): elif camera.camera_type.item() == CameraType.FISHEYE.value: distortion_params = np.array( - [distortion_params[0], distortion_params[1], distortion_params[2], distortion_params[3]] + [ + distortion_params[0], + distortion_params[1], + distortion_params[2], + distortion_params[3], + ] ) newK = cv2.fisheye.estimateNewCameraMatrixForUndistortRectify( - K, distortion_params, (image.shape[1], image.shape[0]), np.eye(3), balance=0 + K, + distortion_params, + (image.shape[1], image.shape[0]), + np.eye(3), + balance=0, ) map1, map2 = cv2.fisheye.initUndistortRectifyMap( - K, distortion_params, np.eye(3), newK, (image.shape[1], image.shape[0]), cv2.CV_32FC1 + K, + distortion_params, + np.eye(3), + newK, + (image.shape[1], image.shape[0]), + cv2.CV_32FC1, ) # and then remap: - image = cv2.remap(image, map1, map2, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT) + image = cv2.remap( + image, + map1, + map2, + interpolation=cv2.INTER_LINEAR, + borderMode=cv2.BORDER_CONSTANT, + ) if "mask" in data: mask = data["mask"].numpy() mask = mask.astype(np.uint8) * 255 From a10c777cff9873af60f59645f4ae9e60aaa9cb2b Mon Sep 17 00:00:00 2001 From: xucr Date: Thu, 18 Jan 2024 21:42:16 -0800 Subject: [PATCH 34/38] formatted --- .../datamanagers/full_images_datamanager.py | 21 ++----------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py index 7158d72eb6..a08a3bbf9f 100644 --- a/nerfstudio/data/datamanagers/full_images_datamanager.py +++ b/nerfstudio/data/datamanagers/full_images_datamanager.py @@ -26,20 +26,7 @@ from dataclasses import dataclass, field from functools import cached_property from pathlib import Path -from typing import ( - Dict, - ForwardRef, - Generic, - List, - Literal, - Optional, - Tuple, - Type, - Union, - cast, - get_args, - get_origin, -) +from typing import Dict, ForwardRef, Generic, List, Literal, Optional, Tuple, Type, Union, cast, get_args, get_origin import cv2 import numpy as np @@ -49,11 +36,7 @@ from nerfstudio.cameras.cameras import Cameras, CameraType from nerfstudio.configs.dataparser_configs import AnnotatedDataParserUnion -from nerfstudio.data.datamanagers.base_datamanager import ( - DataManager, - DataManagerConfig, - TDataset, -) +from nerfstudio.data.datamanagers.base_datamanager import DataManager, DataManagerConfig, TDataset from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig from nerfstudio.data.datasets.base_dataset import InputDataset From cdd1d1507e78c92fdf0c192a2b2739950dec7d7c Mon Sep 17 00:00:00 2001 From: Ethan Weber Date: Fri, 19 Jan 2024 19:51:54 +0000 Subject: [PATCH 35/38] changed glob variable name --- nerfstudio/process_data/process_data_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py index 91132fe4ac..40381b2173 100644 --- a/nerfstudio/process_data/process_data_utils.py +++ b/nerfstudio/process_data/process_data_utils.py @@ -68,8 +68,8 @@ def list_images(data: Path, recursive: bool = False) -> List[Path]: Paths to images contained in the directory """ allowed_exts = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] + ALLOWED_RAW_EXTS - glob = "**/[!.]*" if recursive else "[!.]*" - image_paths = sorted([p for p in data.glob(glob) if p.suffix.lower() in allowed_exts]) + glob_str = "**/[!.]*" if recursive else "[!.]*" + image_paths = sorted([p for p in data.glob(glob_str) if p.suffix.lower() in allowed_exts]) return image_paths From 40c355307c7e73f89a590d062f54125965c28678 Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Fri, 19 Jan 2024 17:04:48 -0800 Subject: [PATCH 36/38] Revert "Remove legacy viewer from GitHub actions (#2798)" This reverts commit 70d83d42011e5956b6e59eb313254a2fad67b37f. --- .github/workflows/viewer_build_deploy.yml | 97 +++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 .github/workflows/viewer_build_deploy.yml diff --git a/.github/workflows/viewer_build_deploy.yml b/.github/workflows/viewer_build_deploy.yml new file mode 100644 index 0000000000..da65c20ea1 --- /dev/null +++ b/.github/workflows/viewer_build_deploy.yml @@ -0,0 +1,97 @@ +name: Viewer Build and Deploy. + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + build: + name: Build + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./nerfstudio/viewer_legacy/app + steps: + - name: Checkout code + uses: actions/checkout@master + + - name: Install Node.js + uses: actions/setup-node@v3 + with: + node-version: 17.8.0 + cache: 'yarn' + cache-dependency-path: ./nerfstudio/viewer_legacy/app/yarn.lock + + - name: Install packages + run: yarn install + + - name: Build project + run: CI=false yarn build + + - name: Upload production-ready build files + uses: actions/upload-artifact@v2 + with: + name: production-files + path: ./nerfstudio/viewer_legacy/app/build + + deploy: + name: Deploy + needs: build + runs-on: ubuntu-latest + + env: + SSH_KEY: ${{secrets.SSH_KEY}} + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.8.12 + uses: actions/setup-python@v4 + with: + python-version: '3.8.12' + + - name: Install dependencies + run: | + pip install -r ./nerfstudio/viewer_legacy/app/requirements.txt + + - name: Download artifact + uses: actions/download-artifact@v2 + with: + name: production-files + path: ./nerfstudio/viewer_legacy/app/build + + - name: Get branch name (merge) + if: github.event_name != 'pull_request' + shell: bash + run: echo "BRANCH_NAME=$(echo ${GITHUB_REF#refs/heads/} | tr / -)" >> $GITHUB_ENV + + - name: Get branch name (pull request) + if: github.event_name == 'pull_request' + shell: bash + run: echo "BRANCH_NAME=$(echo ${GITHUB_HEAD_REF} | tr / -)" >> $GITHUB_ENV + + # TODO: detect file or scheme changes of the viewer_legacy and only + # increment the version.txt file when there is a change. + # Update the version.txt code and push to master when things change. + # https://github.com/marketplace/actions/changed-files + # - name: Run changed-files with defaults on the dir1 + # id: changed-files-for-dir1 + # uses: tj-actions/changed-files@v29.0.3 + # with: + # path: nerfstudio/viewer_legacy/app + + # - name: List all added files in dir1 + # run: | + # for file in ${{ steps.changed-files-for-dir1.outputs.modified_files }}; do + # echo "$file was modified" + # done + + - run: | + python ./nerfstudio/viewer_legacy/app/run_deploy.py \ + --branch-name ${{ env.BRANCH_NAME }} \ + --ssh-key-string "$SSH_KEY" \ + --local-folder ./nerfstudio/viewer_legacy/app/build \ + --package-json-filename ./nerfstudio/viewer_legacy/app/package.json \ + --increment-version "False" + - run: cat ~/.ssh/config From 2ed1a71f2a2b50c23e96c3d2a1773b5979794b13 Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Wed, 31 Jan 2024 10:02:36 -0800 Subject: [PATCH 37/38] replace alpha depth logic with torch.where for differentiability --- nerfstudio/models/splatfacto.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nerfstudio/models/splatfacto.py b/nerfstudio/models/splatfacto.py index 84000e73c5..b23eabe002 100644 --- a/nerfstudio/models/splatfacto.py +++ b/nerfstudio/models/splatfacto.py @@ -779,8 +779,7 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]: W, background=torch.zeros(3, device=self.device), )[..., 0:1] # type: ignore - depth_im[alpha > 0] = depth_im[alpha > 0] / alpha[alpha > 0] - depth_im[alpha == 0] = 1000 + depth_im = torch.where(alpha > 0, depth_im / alpha, depth_im.detach().max()) return {"rgb": rgb, "depth": depth_im, "accumulation": alpha} # type: ignore From 830f20bfa7b41bfde7adc562f6635ae1492e06b7 Mon Sep 17 00:00:00 2001 From: Justin Kerr Date: Wed, 31 Jan 2024 10:05:16 -0800 Subject: [PATCH 38/38] rm weird file --- .github/workflows/viewer_build_deploy.yml | 97 ----------------------- 1 file changed, 97 deletions(-) delete mode 100644 .github/workflows/viewer_build_deploy.yml diff --git a/.github/workflows/viewer_build_deploy.yml b/.github/workflows/viewer_build_deploy.yml deleted file mode 100644 index da65c20ea1..0000000000 --- a/.github/workflows/viewer_build_deploy.yml +++ /dev/null @@ -1,97 +0,0 @@ -name: Viewer Build and Deploy. - -on: - push: - branches: [main] - pull_request: - branches: [main] - -jobs: - build: - name: Build - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./nerfstudio/viewer_legacy/app - steps: - - name: Checkout code - uses: actions/checkout@master - - - name: Install Node.js - uses: actions/setup-node@v3 - with: - node-version: 17.8.0 - cache: 'yarn' - cache-dependency-path: ./nerfstudio/viewer_legacy/app/yarn.lock - - - name: Install packages - run: yarn install - - - name: Build project - run: CI=false yarn build - - - name: Upload production-ready build files - uses: actions/upload-artifact@v2 - with: - name: production-files - path: ./nerfstudio/viewer_legacy/app/build - - deploy: - name: Deploy - needs: build - runs-on: ubuntu-latest - - env: - SSH_KEY: ${{secrets.SSH_KEY}} - - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.8.12 - uses: actions/setup-python@v4 - with: - python-version: '3.8.12' - - - name: Install dependencies - run: | - pip install -r ./nerfstudio/viewer_legacy/app/requirements.txt - - - name: Download artifact - uses: actions/download-artifact@v2 - with: - name: production-files - path: ./nerfstudio/viewer_legacy/app/build - - - name: Get branch name (merge) - if: github.event_name != 'pull_request' - shell: bash - run: echo "BRANCH_NAME=$(echo ${GITHUB_REF#refs/heads/} | tr / -)" >> $GITHUB_ENV - - - name: Get branch name (pull request) - if: github.event_name == 'pull_request' - shell: bash - run: echo "BRANCH_NAME=$(echo ${GITHUB_HEAD_REF} | tr / -)" >> $GITHUB_ENV - - # TODO: detect file or scheme changes of the viewer_legacy and only - # increment the version.txt file when there is a change. - # Update the version.txt code and push to master when things change. - # https://github.com/marketplace/actions/changed-files - # - name: Run changed-files with defaults on the dir1 - # id: changed-files-for-dir1 - # uses: tj-actions/changed-files@v29.0.3 - # with: - # path: nerfstudio/viewer_legacy/app - - # - name: List all added files in dir1 - # run: | - # for file in ${{ steps.changed-files-for-dir1.outputs.modified_files }}; do - # echo "$file was modified" - # done - - - run: | - python ./nerfstudio/viewer_legacy/app/run_deploy.py \ - --branch-name ${{ env.BRANCH_NAME }} \ - --ssh-key-string "$SSH_KEY" \ - --local-folder ./nerfstudio/viewer_legacy/app/build \ - --package-json-filename ./nerfstudio/viewer_legacy/app/package.json \ - --increment-version "False" - - run: cat ~/.ssh/config