From 70ade42e46f3dcf0e89e4efc445650f6d6525673 Mon Sep 17 00:00:00 2001
From: Justin Kerr <justin.g.kerr@gmail.com>
Date: Tue, 10 Oct 2023 17:11:36 -0700
Subject: [PATCH 01/38] fix jittering in markdown in viewer beta

---
 nerfstudio/viewer_beta/control_panel.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py
index 07f556dd46..d93d335468 100644
--- a/nerfstudio/viewer_beta/control_panel.py
+++ b/nerfstudio/viewer_beta/control_panel.py
@@ -270,10 +270,9 @@ def update_step(self, step):
             step: the train step to set the model to
         """
         with self.viser_server.atomic(), self.stat_folder:
-            # TODO change to a .value call instead of remove() and add, this makes it jittery
             with self.viser_server.atomic():
                 self.markdown.remove()
-                self.markdown = self.viser_server.add_gui_markdown(f"Step: {step}")
+                self.markdown.content = f"Step: {step}"
 
     def update_output_options(self, new_options: List[str]):
         """

From 622342cdceaa2b01ffc7518c2bf8542c687f5a23 Mon Sep 17 00:00:00 2001
From: Justin Kerr <justin.g.kerr@gmail.com>
Date: Tue, 10 Oct 2023 17:12:21 -0700
Subject: [PATCH 02/38] Revert "fix jittering in markdown in viewer beta"

This reverts commit 70ade42e46f3dcf0e89e4efc445650f6d6525673.
---
 nerfstudio/viewer_beta/control_panel.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nerfstudio/viewer_beta/control_panel.py b/nerfstudio/viewer_beta/control_panel.py
index d93d335468..07f556dd46 100644
--- a/nerfstudio/viewer_beta/control_panel.py
+++ b/nerfstudio/viewer_beta/control_panel.py
@@ -270,9 +270,10 @@ def update_step(self, step):
             step: the train step to set the model to
         """
         with self.viser_server.atomic(), self.stat_folder:
+            # TODO change to a .value call instead of remove() and add, this makes it jittery
             with self.viser_server.atomic():
                 self.markdown.remove()
-                self.markdown.content = f"Step: {step}"
+                self.markdown = self.viser_server.add_gui_markdown(f"Step: {step}")
 
     def update_output_options(self, new_options: List[str]):
         """

From 8a08740aedeac29d2ca168b900f79c2235275864 Mon Sep 17 00:00:00 2001
From: Justin Kerr <justin.g.kerr@gmail.com>
Date: Sun, 29 Oct 2023 16:31:59 -0700
Subject: [PATCH 03/38] print correctly formatted url in banner for viewer beta

---
 nerfstudio/viewer_beta/viewer.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/nerfstudio/viewer_beta/viewer.py b/nerfstudio/viewer_beta/viewer.py
index ae8ce3ae77..2ba3693b83 100644
--- a/nerfstudio/viewer_beta/viewer.py
+++ b/nerfstudio/viewer_beta/viewer.py
@@ -95,8 +95,6 @@ def __init__(
             websocket_port = self.config.websocket_port
         self.log_filename.parent.mkdir(exist_ok=True)
 
-        self.viewer_url = viewer_utils.get_viewer_url(websocket_port)
-
         # viewer specific variables
         self.output_type_changed = True
         self.output_split_type_changed = True
@@ -106,6 +104,19 @@ def __init__(
         self.last_move_time = 0
 
         self.viser_server = viser.ViserServer(host=config.websocket_host, port=websocket_port, share=share)
+        # Set the name of the URL either to the share link if available, or the localhost
+        if share:
+            assert self.viser_server._share_tunnel is not None
+            while self.viser_server._share_tunnel._shared_state["status"] == "connecting":
+                # wait for connection before grabbing URL
+                time.sleep(0.01)
+            url_maybe = self.viser_server._share_tunnel.get_url()
+            if url_maybe is not None:
+                self.viewer_url = url_maybe
+            else:
+                self.viewer_url = f"http://{config.websocket_host}:{websocket_port}"
+        else:
+            self.viewer_url = f"http://{config.websocket_host}:{websocket_port}"
         buttons = (
             viser.theme.TitlebarButton(
                 text="Getting Started",

From 09cc1d12a256377dab6c7c8fb532585fdf5dd45b Mon Sep 17 00:00:00 2001
From: xucr <xucr@shanghaitech.edu.cn>
Date: Sun, 24 Dec 2023 20:07:57 -0800
Subject: [PATCH 04/38] fix the bug when camera.distortion_params is None

---
 nerfstudio/data/datamanagers/full_images_datamanager.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py
index 35837f05a1..7d401e1ea3 100644
--- a/nerfstudio/data/datamanagers/full_images_datamanager.py
+++ b/nerfstudio/data/datamanagers/full_images_datamanager.py
@@ -129,6 +129,7 @@ def cache_images(self, cache_images_option):
             camera = self.train_dataset.cameras[i].reshape(())
             K = camera.get_intrinsics_matrices().numpy()
             if camera.distortion_params is None:
+                cached_train.append(data)
                 continue
             distortion_params = camera.distortion_params.numpy()
             image = data["image"].numpy()
@@ -203,6 +204,7 @@ def cache_images(self, cache_images_option):
             camera = self.eval_dataset.cameras[i].reshape(())
             K = camera.get_intrinsics_matrices().numpy()
             if camera.distortion_params is None:
+                cached_eval.append(data)
                 continue
             distortion_params = camera.distortion_params.numpy()
             image = data["image"].numpy()

From a4e640b274ed057a3754d596114d4fa839ea840f Mon Sep 17 00:00:00 2001
From: xucr <xucr@shanghaitech.edu.cn>
Date: Sun, 24 Dec 2023 22:37:13 -0800
Subject: [PATCH 05/38] Handle background color override when using blender.

---
 .../data/datamanagers/full_images_datamanager.py     |  4 ++--
 nerfstudio/data/dataparsers/blender_dataparser.py    | 11 ++++++-----
 nerfstudio/model_components/renderers.py             |  3 +--
 nerfstudio/models/gaussian_splatting.py              | 12 ++++++------
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py
index 7d401e1ea3..333a8d4c79 100644
--- a/nerfstudio/data/datamanagers/full_images_datamanager.py
+++ b/nerfstudio/data/datamanagers/full_images_datamanager.py
@@ -60,7 +60,7 @@ class FullImageDatamanagerConfig(DataManagerConfig):
     eval_image_indices: Optional[Tuple[int, ...]] = (0,)
     """Specifies the image indices to use during eval; if None, uses all."""
     cache_images: Literal["no-cache", "cpu", "gpu"] = "cpu"
-    """Whether to cache images in memory. If "numpy", caches as numpy arrays, if "torch", caches as torch tensors."""
+    """Whether to cache images in memory. If "gpu", images are cached on the GPU."""
 
 
 class FullImageDatamanager(DataManager, Generic[TDataset]):
@@ -104,7 +104,7 @@ def __init__(
         self.train_dataset = self.create_train_dataset()
         self.eval_dataset = self.create_eval_dataset()
         if len(self.train_dataset) > 500 and self.config.cache_images == "gpu":
-            CONSOLE.print("Train dataset has over 500 images, overriding cach_images to cpu", style="bold yellow")
+            CONSOLE.print("Train dataset has over 500 images, overriding cache_images to cpu", style="bold yellow")
             self.config.cache_images = "cpu"
         self.cached_train, self.cached_eval = self.cache_images(self.config.cache_images)
         self.exclude_batch_keys_from_device = self.train_dataset.exclude_batch_keys_from_device
diff --git a/nerfstudio/data/dataparsers/blender_dataparser.py b/nerfstudio/data/dataparsers/blender_dataparser.py
index 9d6524d4e0..0a207c9694 100644
--- a/nerfstudio/data/dataparsers/blender_dataparser.py
+++ b/nerfstudio/data/dataparsers/blender_dataparser.py
@@ -32,6 +32,7 @@
 from nerfstudio.data.scene_box import SceneBox
 from nerfstudio.utils.colors import get_color
 from nerfstudio.utils.io import load_from_json
+from nerfstudio.model_components.renderers import background_color_override_context
 
 
 @dataclass
@@ -61,13 +62,13 @@ def __init__(self, config: BlenderDataParserConfig):
         self.data: Path = config.data
         self.scale_factor: float = config.scale_factor
         self.alpha_color = config.alpha_color
-
-    def _generate_dataparser_outputs(self, split="train"):
         if self.alpha_color is not None:
-            alpha_color_tensor = get_color(self.alpha_color)
+            self.alpha_color_tensor = get_color(self.alpha_color)
+            background_color_override_context(self.alpha_color_tensor)
         else:
-            alpha_color_tensor = None
+            self.alpha_color_tensor = None
 
+    def _generate_dataparser_outputs(self, split="train"):
         meta = load_from_json(self.data / f"transforms_{split}.json")
         image_filenames = []
         poses = []
@@ -102,7 +103,7 @@ def _generate_dataparser_outputs(self, split="train"):
         dataparser_outputs = DataparserOutputs(
             image_filenames=image_filenames,
             cameras=cameras,
-            alpha_color=alpha_color_tensor,
+            alpha_color=self.alpha_color_tensor,
             scene_box=scene_box,
             dataparser_scale=self.scale_factor,
         )
diff --git a/nerfstudio/model_components/renderers.py b/nerfstudio/model_components/renderers.py
index 1fde0d693c..32329b2aca 100644
--- a/nerfstudio/model_components/renderers.py
+++ b/nerfstudio/model_components/renderers.py
@@ -50,8 +50,7 @@ def background_color_override_context(mode: Float[Tensor, "3"]) -> Generator[Non
     old_background_color = BACKGROUND_COLOR_OVERRIDE
     try:
         BACKGROUND_COLOR_OVERRIDE = mode
-        yield
-    finally:
+    except:
         BACKGROUND_COLOR_OVERRIDE = old_background_color
 
 
diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py
index 91686b44a2..ce35cd20a1 100644
--- a/nerfstudio/models/gaussian_splatting.py
+++ b/nerfstudio/models/gaussian_splatting.py
@@ -533,14 +533,14 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
         if self.training:
             # currently relies on the branch vickie/camera-grads
             self.camera_optimizer.apply_to_camera(camera)
-        if self.training:
+        # get the background color
+        if renderers.BACKGROUND_COLOR_OVERRIDE is not None:
+            background = renderers.BACKGROUND_COLOR_OVERRIDE.to(self.device)
+        elif self.training:
             background = torch.rand(3, device=self.device)
         else:
-            # logic for setting the background of the scene
-            if renderers.BACKGROUND_COLOR_OVERRIDE is not None:
-                background = renderers.BACKGROUND_COLOR_OVERRIDE
-            else:
-                background = self.back_color.to(self.device)
+            background = self.back_color.to(self.device)
+            
         if self.crop_box is not None and not self.training:
             crop_ids = self.crop_box.within(self.means).squeeze()
             if crop_ids.sum() == 0:

From bd07a71977caf748d230ed9f8f97112eea8ae158 Mon Sep 17 00:00:00 2001
From: xucr <xucr@shanghaitech.edu.cn>
Date: Sun, 24 Dec 2023 22:44:44 -0800
Subject: [PATCH 06/38] fix bare except

---
 nerfstudio/model_components/renderers.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/nerfstudio/model_components/renderers.py b/nerfstudio/model_components/renderers.py
index 32329b2aca..3492c94681 100644
--- a/nerfstudio/model_components/renderers.py
+++ b/nerfstudio/model_components/renderers.py
@@ -50,8 +50,10 @@ def background_color_override_context(mode: Float[Tensor, "3"]) -> Generator[Non
     old_background_color = BACKGROUND_COLOR_OVERRIDE
     try:
         BACKGROUND_COLOR_OVERRIDE = mode
-    except:
+        yield
+    except Exception:
         BACKGROUND_COLOR_OVERRIDE = old_background_color
+        raise (Exception)
 
 
 class RGBRenderer(nn.Module):

From 2fcd55617c8e6b272c0e6dd06fa64f5e4c908188 Mon Sep 17 00:00:00 2001
From: xucr <xucr@shanghaitech.edu.cn>
Date: Sun, 24 Dec 2023 22:53:29 -0800
Subject: [PATCH 07/38] format

---
 nerfstudio/models/gaussian_splatting.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py
index ce35cd20a1..850105543a 100644
--- a/nerfstudio/models/gaussian_splatting.py
+++ b/nerfstudio/models/gaussian_splatting.py
@@ -536,11 +536,12 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
         # get the background color
         if renderers.BACKGROUND_COLOR_OVERRIDE is not None:
             background = renderers.BACKGROUND_COLOR_OVERRIDE.to(self.device)
-        elif self.training:
-            background = torch.rand(3, device=self.device)
         else:
-            background = self.back_color.to(self.device)
-            
+            if self.training:
+                background = torch.rand(3, device=self.device)
+            else:
+                background = self.back_color.to(self.device)
+
         if self.crop_box is not None and not self.training:
             crop_ids = self.crop_box.within(self.means).squeeze()
             if crop_ids.sum() == 0:

From 00d10a170b267ab3a546349f6a972ec56e0b4739 Mon Sep 17 00:00:00 2001
From: xucr <xucr@shanghaitech.edu.cn>
Date: Mon, 25 Dec 2023 00:50:54 -0800
Subject: [PATCH 08/38] Update background color override in Blender dataparser

---
 nerfstudio/data/dataparsers/blender_dataparser.py | 4 ++--
 nerfstudio/model_components/renderers.py          | 9 +++++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/nerfstudio/data/dataparsers/blender_dataparser.py b/nerfstudio/data/dataparsers/blender_dataparser.py
index 0a207c9694..d8172579c7 100644
--- a/nerfstudio/data/dataparsers/blender_dataparser.py
+++ b/nerfstudio/data/dataparsers/blender_dataparser.py
@@ -32,7 +32,7 @@
 from nerfstudio.data.scene_box import SceneBox
 from nerfstudio.utils.colors import get_color
 from nerfstudio.utils.io import load_from_json
-from nerfstudio.model_components.renderers import background_color_override_context
+from nerfstudio.model_components.renderers import force_background_color_override
 
 
 @dataclass
@@ -64,7 +64,7 @@ def __init__(self, config: BlenderDataParserConfig):
         self.alpha_color = config.alpha_color
         if self.alpha_color is not None:
             self.alpha_color_tensor = get_color(self.alpha_color)
-            background_color_override_context(self.alpha_color_tensor)
+            force_background_color_override(self.alpha_color_tensor)
         else:
             self.alpha_color_tensor = None
 
diff --git a/nerfstudio/model_components/renderers.py b/nerfstudio/model_components/renderers.py
index 3492c94681..bbcca06624 100644
--- a/nerfstudio/model_components/renderers.py
+++ b/nerfstudio/model_components/renderers.py
@@ -51,9 +51,14 @@ def background_color_override_context(mode: Float[Tensor, "3"]) -> Generator[Non
     try:
         BACKGROUND_COLOR_OVERRIDE = mode
         yield
-    except Exception:
+    finally:
         BACKGROUND_COLOR_OVERRIDE = old_background_color
-        raise (Exception)
+
+
+def force_background_color_override(mode: Float[Tensor, "3"]) -> None:
+    """Force background color override."""
+    global BACKGROUND_COLOR_OVERRIDE
+    BACKGROUND_COLOR_OVERRIDE = mode
 
 
 class RGBRenderer(nn.Module):

From 2158946580a43ca40645c634ad52b93c665facf4 Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Thu, 7 Dec 2023 17:05:28 -0500
Subject: [PATCH 09/38] Add ability to download EyefulTower dataset

---
 nerfstudio/scripts/downloads/download_data.py | 118 ++++++++++++++++--
 pyproject.toml                                |   4 +-
 2 files changed, 110 insertions(+), 12 deletions(-)

diff --git a/nerfstudio/scripts/downloads/download_data.py b/nerfstudio/scripts/downloads/download_data.py
index 9560beba8d..1be76d6c7b 100644
--- a/nerfstudio/scripts/downloads/download_data.py
+++ b/nerfstudio/scripts/downloads/download_data.py
@@ -22,15 +22,16 @@
 import zipfile
 from dataclasses import dataclass
 from pathlib import Path
-from typing import TYPE_CHECKING, Union
+from typing import TYPE_CHECKING, Tuple, Union
 
+import awscli.clidriver
 import gdown
 import torch
 import tyro
-from nerfstudio.process_data import process_data_utils
 from typing_extensions import Annotated
 
 from nerfstudio.configs.base_config import PrintableConfig
+from nerfstudio.process_data import process_data_utils
 from nerfstudio.utils import install_checks
 from nerfstudio.utils.scripts import run_command
 
@@ -545,6 +546,108 @@ def download(self, save_dir: Path) -> None:
         shutil.rmtree(target_path / "val")
 
 
+eyefultower_downloads = [
+    "all",
+    "apartment",
+    "kitchen",
+    "office1a",
+    "office1b",
+    "office2",
+    "office_view1",
+    "office_view2",
+    "riverview",
+    "seating_area",
+    "table",
+    "workshop",
+]
+
+eyefultower_resolutions = {
+    "all": None,
+    "jpeg_2k": "images-jpeg-2k",
+    "jpeg_4k": "images-jpeg-4k",
+    "jpeg_8k": "images-jpeg",
+    "exr_2k": "images-2k",
+}
+
+if TYPE_CHECKING:
+    EyefulTowerCaptureName = str
+    EyefulTowerResolution = str
+else:
+    EyefulTowerCaptureName = tyro.extras.literal_type_from_choices(eyefultower_downloads)
+    EyefulTowerResolution = tyro.extras.literal_type_from_choices(eyefultower_resolutions.keys())
+
+
+@dataclass
+class EyefulTowerDownload(DatasetDownload):
+    """Download the EyefulTower dataset.
+
+    Use the --help flag with the `eyefultower` subcommand to see all available datasets.
+    Find more information about the dataset at https://github.com/facebookresearch/EyefulTower.
+    """
+
+    capture_name: Tuple[EyefulTowerCaptureName, ...] = ()
+    resolution_name: Tuple[EyefulTowerResolution, ...] = ()
+
+    def download(self, save_dir: Path):
+        if len(self.capture_name) == 0:
+            self.capture_name = ("seating_area",)
+            print(
+                f"No capture specified, using {self.capture_name} by default.",
+                "Add `--help` to this command to see all available captures.",
+            )
+
+        if len(self.resolution_name) == 0:
+            self.resolution_name = ("jpeg_2k",)
+            print(
+                f"No resolution specified, using {self.resolution_name} by default.",
+                "Add `--help` to this command to see all available resolutions.",
+            )
+
+        captures = set()
+        for capture in self.capture_name:
+            if capture == "all":
+                captures.update([c for c in eyefultower_downloads if c != "all"])
+            else:
+                captures.add(capture)
+        captures = sorted(captures)
+        if len(captures) == 0:
+            print("WARNING: No EyefulTower captures specified. Nothing will be downloaded.")
+
+        resolutions = set()
+        for resolution in self.resolution_name:
+            if resolution == "all":
+                resolutions.update([r for r in eyefultower_resolutions.keys() if r != "all"])
+            else:
+                resolutions.add(resolution)
+        resolutions = sorted(resolutions)
+        if len(resolutions) == 0:
+            print("WARNING: No EyefulTower resolutions specified. Nothing will be downloaded.")
+
+        driver = awscli.clidriver.create_clidriver()
+
+        for i, capture in enumerate(captures):
+            base_url = f"s3://fb-baas-f32eacb9-8abb-11eb-b2b8-4857dd089e15/EyefulTower/{capture}/"
+            output_path = save_dir / "eyefultower" / capture
+            includes = []
+            for resolution in resolutions:
+                includes.extend(["--include", f"{eyefultower_resolutions[resolution]}/*"])
+            command = (
+                ["s3", "sync", "--no-sign-request", "--only-show-errors", "--exclude", "images*/*"]
+                + includes
+                + [base_url, str(output_path)]
+            )
+            print(
+                f"[Capture {i+1: >2d}/{len(captures)}]:",
+                f"Downloading resolutions {resolutions} from EyefulTower capture '{capture}'",
+                f"to '{output_path.resolve()}' with command `aws {' '.join(command)}`",
+                "...",
+                end=" ",
+                flush=True,
+            )
+            driver.main(command)
+            print("done!")
+
+
 Commands = Union[
     Annotated[BlenderDownload, tyro.conf.subcommand(name="blender")],
     Annotated[Sitcoms3DDownload, tyro.conf.subcommand(name="sitcoms3d")],
@@ -555,6 +658,7 @@ def download(self, save_dir: Path) -> None:
     Annotated[SDFstudioDemoDownload, tyro.conf.subcommand(name="sdfstudio")],
     Annotated[NeRFOSRDownload, tyro.conf.subcommand(name="nerfosr")],
     Annotated[Mill19Download, tyro.conf.subcommand(name="mill19")],
+    Annotated[EyefulTowerDownload, tyro.conf.subcommand(name="eyefultower")],
 ]
 
 
@@ -562,15 +666,7 @@ def main(
     dataset: DatasetDownload,
 ):
     """Script to download existing datasets.
-    We currently support the following datasets:
-    - nerfstudio: Growing collection of real-world scenes. Use the `capture_name` argument to specify
-        which capture to download.
-    - blender: Blender synthetic scenes realeased with NeRF.
-    - sitcoms3d: Friends TV show scenes.
-    - record3d: Record3d dataset.
-    - dnerf: D-NeRF dataset.
-    - phototourism: PhotoTourism dataset. Use the `capture_name` argument to specify which capture to download.
-    - mill19: Mill 19 dataset. Use the `capture_name` argument to specify which capture to download.
+    We currently support the datasets listed above in the Commands.
 
     Args:
         dataset: The dataset to download (from).
diff --git a/pyproject.toml b/pyproject.toml
index cb432a1a6b..85611102a8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,7 @@ classifiers = [
     "Programming Language :: Python",
 ]
 dependencies = [
+    "awscli>=1.31.10",
     "appdirs>=1.4",
     "av>=9.2.0",
     "comet_ml>=3.33.8",
@@ -92,7 +93,8 @@ dev = [
     "typeguard==2.13.3",
     "ruff==0.0.267",
     "sshconf==0.2.5",
-    "pycolmap>=0.3.0",  # NOTE: pycolmap==0.3.0 is not available on newer python versions
+    # TODO(1480) enable when pycolmap windows wheels are available
+    # "pycolmap>=0.3.0",  # NOTE: pycolmap==0.3.0 is not available on newer python versions
     "diffusers==0.16.1",
     "opencv-stubs==0.0.7",
     "transformers==4.29.2",

From ba5ce12d4d87710c72db6099712c486c85290f41 Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Fri, 8 Dec 2023 14:47:28 -0500
Subject: [PATCH 10/38] wip before I copy linning's stuff in

---
 nerfstudio/process_data/metashape_utils.py    | 2 +-
 nerfstudio/process_data/process_data_utils.py | 4 ++--
 nerfstudio/scripts/process_data.py            | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/nerfstudio/process_data/metashape_utils.py b/nerfstudio/process_data/metashape_utils.py
index 8bea8707cb..fad2445ed0 100644
--- a/nerfstudio/process_data/metashape_utils.py
+++ b/nerfstudio/process_data/metashape_utils.py
@@ -17,7 +17,7 @@
 import json
 import xml.etree.ElementTree as ET
 from pathlib import Path
-from typing import Dict, List
+from typing import Dict, List, Optional
 
 import numpy as np
 
diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py
index c1946305b3..76239d0205 100644
--- a/nerfstudio/process_data/process_data_utils.py
+++ b/nerfstudio/process_data/process_data_utils.py
@@ -61,12 +61,12 @@ def list_images(data: Path) -> List[Path]:
     """Lists all supported images in a directory
 
     Args:
-        data: Path to the directory of images.
+        data: Path to the directory of images. Nested folders are searched as well.
     Returns:
         Paths to images contained in the directory
     """
     allowed_exts = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] + ALLOWED_RAW_EXTS
-    image_paths = sorted([p for p in data.glob("[!.]*") if p.suffix.lower() in allowed_exts])
+    image_paths = sorted([p for p in data.glob("**/[!.]*") if p.suffix.lower() in allowed_exts])
     return image_paths
 
 
diff --git a/nerfstudio/scripts/process_data.py b/nerfstudio/scripts/process_data.py
index 2c2cd7a381..bbe3b635af 100644
--- a/nerfstudio/scripts/process_data.py
+++ b/nerfstudio/scripts/process_data.py
@@ -20,7 +20,7 @@
 import zipfile
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Union
+from typing import Optional, Union
 
 import numpy as np
 import tyro
@@ -28,11 +28,11 @@
 
 from nerfstudio.process_data import (
     metashape_utils,
+    odm_utils,
     polycam_utils,
     process_data_utils,
     realitycapture_utils,
     record3d_utils,
-    odm_utils,
 )
 from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import BaseConverterToNerfstudioDataset
 from nerfstudio.process_data.images_to_nerfstudio_dataset import ImagesToNerfstudioDataset

From f95b4eb99728fb209f5a128334dc8d525721919f Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Fri, 8 Dec 2023 19:15:29 -0500
Subject: [PATCH 11/38] Generate per-resolution cameras.xml

---
 nerfstudio/scripts/downloads/download_data.py | 104 ++++++++++++++++--
 1 file changed, 94 insertions(+), 10 deletions(-)

diff --git a/nerfstudio/scripts/downloads/download_data.py b/nerfstudio/scripts/downloads/download_data.py
index 1be76d6c7b..aff58e97d1 100644
--- a/nerfstudio/scripts/downloads/download_data.py
+++ b/nerfstudio/scripts/downloads/download_data.py
@@ -15,10 +15,12 @@
 """Download datasets and specific captures from the datasets."""
 from __future__ import annotations
 
+import copy
 import json
 import os
 import shutil
 import tarfile
+import xml.etree.ElementTree as ET
 import zipfile
 from dataclasses import dataclass
 from pathlib import Path
@@ -561,12 +563,20 @@ def download(self, save_dir: Path) -> None:
     "workshop",
 ]
 
+
+@dataclass
+class EyefulTowerResolutionMetadata:
+    folder_name: str
+    width: int
+    height: int
+
+
 eyefultower_resolutions = {
     "all": None,
-    "jpeg_2k": "images-jpeg-2k",
-    "jpeg_4k": "images-jpeg-4k",
-    "jpeg_8k": "images-jpeg",
-    "exr_2k": "images-2k",
+    "jpeg_2k": EyefulTowerResolutionMetadata("images-jpeg-2k", 1368, 2048),
+    "jpeg_4k": EyefulTowerResolutionMetadata("images-jpeg-4k", 2736, 4096),
+    "jpeg_8k": EyefulTowerResolutionMetadata("images-jpeg", 5784, 8660),
+    "exr_2k": EyefulTowerResolutionMetadata("images-2k", 1368, 2048),
 }
 
 if TYPE_CHECKING:
@@ -588,9 +598,62 @@ class EyefulTowerDownload(DatasetDownload):
     capture_name: Tuple[EyefulTowerCaptureName, ...] = ()
     resolution_name: Tuple[EyefulTowerResolution, ...] = ()
 
+    @staticmethod
+    def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, target_height: int):
+        transformed = copy.deepcopy(xml_tree)
+
+        root = transformed.getroot()
+        assert len(root) == 1
+        chunk = root[0]
+        sensors = chunk.find("sensors")
+        assert sensors is not None
+
+        for sensor in sensors:
+            resolution = sensor.find("resolution")
+            assert resolution is not None, "Resolution not found in EyefulTower camera.xml"
+            original_width = int(resolution.get("width"))  # type: ignore
+            original_height = int(resolution.get("height"))  # type: ignore
+
+            if original_width > original_height:
+                target_width, target_height = max(target_width, target_height), min(target_width, target_height)
+            else:
+                target_height, target_width = max(target_width, target_height), min(target_width, target_height)
+
+            resolution.set("width", str(target_width))
+            resolution.set("height", str(target_height))
+
+            calib = sensor.find("calibration")
+            assert calib is not None, "Calibration not found in EyefulTower sensor"
+
+            calib_resolution = calib.find("resolution")
+            assert calib_resolution is not None
+            calib_resolution.set("width", str(target_width))
+            calib_resolution.set("height", str(target_height))
+
+            # Compute each scale individually and average for better rounding
+            x_scale = target_width / original_width
+            y_scale = target_height / original_height
+            scale = (x_scale + y_scale) / 2.0
+
+            f = calib.find("f")
+            assert f is not None and f.text is not None, "f not found in calib"
+            f.text = str(float(f.text) * scale)
+
+            cx = calib.find("cx")
+            assert cx is not None and cx.text is not None, "cx not found in calib"
+            cx.text = str(float(cx.text) * x_scale)
+
+            cy = calib.find("cy")
+            assert cy is not None and cy.text is not None, "cy not found in calib"
+            cy.text = str(float(cy.text) * y_scale)
+
+            # TODO: Maybe update pixel_width / pixel_height / focal_length / layer_index?
+
+        return transformed
+
     def download(self, save_dir: Path):
         if len(self.capture_name) == 0:
-            self.capture_name = ("seating_area",)
+            self.capture_name = ("riverview",)
             print(
                 f"No capture specified, using {self.capture_name} by default.",
                 "Add `--help` to this command to see all available captures.",
@@ -630,23 +693,44 @@ def download(self, save_dir: Path):
             output_path = save_dir / "eyefultower" / capture
             includes = []
             for resolution in resolutions:
-                includes.extend(["--include", f"{eyefultower_resolutions[resolution]}/*"])
+                includes.extend(["--include", f"{eyefultower_resolutions[resolution].folder_name}/*"])
             command = (
                 ["s3", "sync", "--no-sign-request", "--only-show-errors", "--exclude", "images*/*"]
                 + includes
                 + [base_url, str(output_path)]
             )
+            print(f"[EyefulTower Capture {i+1: >2d}/{len(captures)}]: '{capture}'")
             print(
-                f"[Capture {i+1: >2d}/{len(captures)}]:",
-                f"Downloading resolutions {resolutions} from EyefulTower capture '{capture}'",
-                f"to '{output_path.resolve()}' with command `aws {' '.join(command)}`",
-                "...",
+                f"    Downloading resolutions {resolutions}",
+                f"to '{output_path.resolve()}' with command `aws {' '.join(command)}` ...",
                 end=" ",
                 flush=True,
             )
             driver.main(command)
             print("done!")
 
+            # After downloading, we'll insert an appropriate cameras.xml file into each directory
+            # It's quick enough that we can just redo it every time this is called, regardless
+            # of whether new data is downloaded.
+            xml_input_path = output_path / "cameras.xml"
+            if not xml_input_path.exists:
+                print("    WARNING: cameras.xml not found. Scaled cameras.xml will not be generated.")
+                continue
+
+            tree = ET.parse(output_path / "cameras.xml")
+
+            for resolution in resolutions:
+                metadata = eyefultower_resolutions[resolution]
+                xml_output_path = output_path / metadata.folder_name / "cameras.xml"
+                print(
+                    f"    Generating cameras.xml for '{resolution}' to {xml_output_path.resolve()} ... ",
+                    end=" ",
+                    flush=True,
+                )
+                scaled_tree = self.scale_metashape_transform(tree, metadata.width, metadata.height)
+                scaled_tree.write(xml_output_path)
+                print("done!")
+
 
 Commands = Union[
     Annotated[BlenderDownload, tyro.conf.subcommand(name="blender")],

From f811bd244bfdc10d5d5a8c92fbe6f534a87aabc7 Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Tue, 12 Dec 2023 16:41:02 -0500
Subject: [PATCH 12/38] Generate transforms.json at download

---
 nerfstudio/scripts/downloads/download_data.py | 154 +++++++++++++++---
 1 file changed, 135 insertions(+), 19 deletions(-)

diff --git a/nerfstudio/scripts/downloads/download_data.py b/nerfstudio/scripts/downloads/download_data.py
index aff58e97d1..2ccda92296 100644
--- a/nerfstudio/scripts/downloads/download_data.py
+++ b/nerfstudio/scripts/downloads/download_data.py
@@ -28,6 +28,7 @@
 
 import awscli.clidriver
 import gdown
+import numpy as np
 import torch
 import tyro
 from typing_extensions import Annotated
@@ -569,14 +570,15 @@ class EyefulTowerResolutionMetadata:
     folder_name: str
     width: int
     height: int
+    extension: str
 
 
 eyefultower_resolutions = {
     "all": None,
-    "jpeg_2k": EyefulTowerResolutionMetadata("images-jpeg-2k", 1368, 2048),
-    "jpeg_4k": EyefulTowerResolutionMetadata("images-jpeg-4k", 2736, 4096),
-    "jpeg_8k": EyefulTowerResolutionMetadata("images-jpeg", 5784, 8660),
-    "exr_2k": EyefulTowerResolutionMetadata("images-2k", 1368, 2048),
+    "jpeg_2k": EyefulTowerResolutionMetadata("images-jpeg-2k", 1368, 2048, "jpg"),
+    "jpeg_4k": EyefulTowerResolutionMetadata("images-jpeg-4k", 2736, 4096, "jpg"),
+    "jpeg_8k": EyefulTowerResolutionMetadata("images-jpeg", 5784, 8660, "jpg"),
+    "exr_2k": EyefulTowerResolutionMetadata("images-2k", 1368, 2048, "exr"),
 }
 
 if TYPE_CHECKING:
@@ -651,6 +653,89 @@ def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, targe
 
         return transformed
 
+    def convert_cameras_to_nerfstudio_transforms(
+        self, cameras: dict, target_width: int, target_height: int, extension: str
+    ):
+        output = {}
+
+        distortion_models = [c["distortionModel"] for c in cameras["KRT"]]
+        distortion_model = list(set(distortion_models))
+        assert len(distortion_model) == 1
+        distortion_model = distortion_model[0]
+        if distortion_model == "RadialAndTangential":
+            output["camera_model"] = "OPENCV"
+        elif distortion_model == "Fisheye":
+            output["camera_model"] = "OPENCV_FISHEYE"
+        else:
+            raise NotImplementedError(f"Camera model {distortion_model} not implemented")
+
+        frames = []
+        for camera in cameras["KRT"]:
+            frame = {}
+            # TODO EXR
+            frame["file_path"] = camera["cameraId"] + f".{extension}"
+
+            original_width = camera["width"]
+            original_height = camera["height"]
+            if original_width > original_height:
+                target_width, target_height = max(target_width, target_height), min(target_width, target_height)
+            else:
+                target_height, target_width = max(target_width, target_height), min(target_width, target_height)
+            x_scale = target_width / original_width
+            y_scale = target_height / original_height
+
+            frame["w"] = target_width
+            frame["h"] = target_height
+            K = np.array(camera["K"]).T  # Data stored as column-major
+            frame["fl_x"] = K[0][0] * x_scale
+            frame["fl_y"] = K[1][1] * y_scale
+            frame["cx"] = K[0][2] * x_scale
+            frame["cy"] = K[1][2] * y_scale
+
+            if distortion_model == "RadialAndTangential":
+                # pinhole: [k1, k2, p1, p2, k3]
+                frame["k1"] = camera["distortion"][0]
+                frame["k2"] = camera["distortion"][1]
+                frame["k3"] = camera["distortion"][4]
+                frame["k4"] = 0.0
+                frame["p1"] = camera["distortion"][2]
+                frame["p2"] = camera["distortion"][3]
+            elif distortion_model == "Fisheye":
+                # fisheye: [k1, k2, k3, _, _, _, p1, p2]
+                frame["k1"] = camera["distortion"][0]
+                frame["k2"] = camera["distortion"][1]
+                frame["k3"] = camera["distortion"][2]
+                frame["p1"] = camera["distortion"][6]
+                frame["p2"] = camera["distortion"][7]
+            else:
+                raise NotImplementedError("This shouldn't happen")
+
+            T = np.array(camera["T"]).T  # Data stored as column-major
+            T = np.linalg.inv(T)
+            T = T[[2, 0, 1, 3], :]
+            T[:, 1:3] *= -1
+            frame["transform_matrix"] = T.tolist()
+
+            frames.append(frame)
+
+        frames = sorted(frames, key=lambda f: f["file_path"])
+
+        output["frames"] = frames
+        return output
+
+    def subsample_nerfstudio_transforms(self, transforms: dict, n: int):
+        target = min(len(transforms["frames"]), n)
+        indices = np.round(np.linspace(0, len(transforms["frames"]) - 1, target)).astype(int)
+
+        frames = []
+        for i in indices:
+            frames.append(transforms["frames"][i])
+
+        output = copy.deepcopy(transforms)
+        output["frames"] = frames
+
+        return output
+
     def download(self, save_dir: Path):
         if len(self.capture_name) == 0:
             self.capture_name = ("riverview",)
@@ -715,21 +800,52 @@ def download(self, save_dir: Path):
             xml_input_path = output_path / "cameras.xml"
             if not xml_input_path.exists:
                 print("    WARNING: cameras.xml not found. Scaled cameras.xml will not be generated.")
-                continue
-
-            tree = ET.parse(output_path / "cameras.xml")
-
-            for resolution in resolutions:
-                metadata = eyefultower_resolutions[resolution]
-                xml_output_path = output_path / metadata.folder_name / "cameras.xml"
-                print(
-                    f"    Generating cameras.xml for '{resolution}' to {xml_output_path.resolve()} ... ",
-                    end=" ",
-                    flush=True,
-                )
-                scaled_tree = self.scale_metashape_transform(tree, metadata.width, metadata.height)
-                scaled_tree.write(xml_output_path)
-                print("done!")
+            else:
+                tree = ET.parse(output_path / "cameras.xml")
+
+                for resolution in resolutions:
+                    metadata = eyefultower_resolutions[resolution]
+                    xml_output_path = output_path / metadata.folder_name / "cameras.xml"
+                    print(
+                        f"    Generating cameras.xml for '{resolution}' to {xml_output_path.resolve()} ... ",
+                        end=" ",
+                        flush=True,
+                    )
+                    scaled_tree = self.scale_metashape_transform(tree, metadata.width, metadata.height)
+                    scaled_tree.write(xml_output_path)
+                    print("done!")
+
+            json_input_path = output_path / "cameras.json"
+            if not json_input_path.exists:
+                print("    WARNING: cameras.json not found. transforms.json will not be generated.")
+            else:
+                with open(json_input_path, "r") as f:
+                    cameras = json.load(f)
+
+                for resolution in resolutions:
+                    metadata = eyefultower_resolutions[resolution]
+                    json_output_path = output_path / metadata.folder_name / "transforms.json"
+                    print(
+                        f"    Generating transforms.json for '{resolution}' to {json_output_path.resolve()} ... ",
+                        end=" ",
+                        flush=True,
+                    )
+                    transforms = self.convert_cameras_to_nerfstudio_transforms(
+                        cameras, metadata.width, metadata.height, metadata.extension
+                    )
+
+                    with open(json_output_path, "w", encoding="utf8") as f:
+                        json.dump(transforms, f, indent=4)
+
+                    for count, name in [
+                        (300, "transforms_300.json"),
+                        (int(len(cameras["KRT"]) // 2), "transforms_half.json"),
+                    ]:
+                        subsampled = self.subsample_nerfstudio_transforms(transforms, count)
+                        with open(json_output_path.with_name(name), "w", encoding="utf8") as f:
+                            json.dump(subsampled, f, indent=4)
+
+                    print("done!")
 
 
 Commands = Union[

From 59e1cf67e6ede298874e54aec6902832fd04dc21 Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Wed, 10 Jan 2024 15:08:31 -0500
Subject: [PATCH 13/38] Fix a couple of quotes

---
 nerfstudio/scripts/downloads/download_data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nerfstudio/scripts/downloads/download_data.py b/nerfstudio/scripts/downloads/download_data.py
index 2ccda92296..776f9809ef 100644
--- a/nerfstudio/scripts/downloads/download_data.py
+++ b/nerfstudio/scripts/downloads/download_data.py
@@ -807,7 +807,7 @@ def download(self, save_dir: Path):
                     metadata = eyefultower_resolutions[resolution]
                     xml_output_path = output_path / metadata.folder_name / "cameras.xml"
                     print(
-                        f"    Generating cameras.xml for '{resolution}' to {xml_output_path.resolve()} ... ",
+                        f"    Generating cameras.xml for '{resolution}' to '{xml_output_path.resolve()}' ... ",
                         end=" ",
                         flush=True,
                     )
@@ -826,7 +826,7 @@ def download(self, save_dir: Path):
                     metadata = eyefultower_resolutions[resolution]
                     json_output_path = output_path / metadata.folder_name / "transforms.json"
                     print(
-                        f"    Generating transforms.json for '{resolution}' to {json_output_path.resolve()} ... ",
+                        f"    Generating transforms.json for '{resolution}' to '{json_output_path.resolve()}' ... ",
                         end=" ",
                         flush=True,
                     )

From 688c39b750b0c4f7593d10ea0421715a3b5ee32e Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Wed, 10 Jan 2024 16:34:17 -0500
Subject: [PATCH 14/38] Use official EyefulTower splits for train and val

---
 nerfstudio/scripts/downloads/download_data.py | 24 +++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/nerfstudio/scripts/downloads/download_data.py b/nerfstudio/scripts/downloads/download_data.py
index 776f9809ef..1a236b2cbc 100644
--- a/nerfstudio/scripts/downloads/download_data.py
+++ b/nerfstudio/scripts/downloads/download_data.py
@@ -15,6 +15,7 @@
 """Download datasets and specific captures from the datasets."""
 from __future__ import annotations
 
+import collections
 import copy
 import json
 import os
@@ -654,7 +655,7 @@ def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, targe
         return transformed
 
     def convert_cameras_to_nerfstudio_transforms(
-        self, cameras: dict, target_width: int, target_height: int, extension: str
+        self, cameras: dict, splits: dict, target_width: int, target_height: int, extension: str
     ):
         output = {}
 
@@ -669,11 +670,17 @@ def convert_cameras_to_nerfstudio_transforms(
         else:
             raise NotImplementedError(f"Camera model {distortion_model} not implemented")
 
+        split_sets = {k: set(v) for k, v in splits.items()}
+
         frames = []
+        split_filenames = collections.defaultdict(list)
         for camera in cameras["KRT"]:
             frame = {}
             # TODO EXR
             frame["file_path"] = camera["cameraId"] + f".{extension}"
+            for split in split_sets:
+                if camera["cameraId"] in split_sets[split]:
+                    split_filenames[split].append(frame["file_path"])
 
             original_width = camera["width"]
             original_height = camera["height"]
@@ -721,6 +728,8 @@ def convert_cameras_to_nerfstudio_transforms(
         frames = sorted(frames, key=lambda f: f["file_path"])
 
         output["frames"] = frames
+        output["train_filenames"] = split_filenames["train"]
+        output["val_filenames"] = split_filenames["test"]
         return output
 
     def subsample_nerfstudio_transforms(self, transforms: dict, n: int):
@@ -734,6 +743,11 @@ def subsample_nerfstudio_transforms(self, transforms: dict, n: int):
         output = copy.deepcopy(transforms)
         output["frames"] = frames
 
+        # Remove the unused files from the splits
+        filenames = {f["file_path"] for f in frames}
+        for key in ["train_filenames", "val_filenames"]:
+            output[key] = sorted(list(set(transforms[key]) & filenames))
+
         return output
 
     def download(self, save_dir: Path):
@@ -816,12 +830,18 @@ def download(self, save_dir: Path):
                     print("done!")
 
             json_input_path = output_path / "cameras.json"
+            splits_input_path = output_path / "splits.json"
             if not json_input_path.exists:
                 print("    WARNING: cameras.json not found. transforms.json will not be generated.")
+            elif not splits_input_path.exists:
+                print("    WARNING: splits.json not found. transforms.json will not be generated.")
             else:
                 with open(json_input_path, "r") as f:
                     cameras = json.load(f)
 
+                with open(splits_input_path, "r") as f:
+                    splits = json.load(f)
+
                 for resolution in resolutions:
                     metadata = eyefultower_resolutions[resolution]
                     json_output_path = output_path / metadata.folder_name / "transforms.json"
@@ -831,7 +851,7 @@ def download(self, save_dir: Path):
                         flush=True,
                     )
                     transforms = self.convert_cameras_to_nerfstudio_transforms(
-                        cameras, metadata.width, metadata.height, metadata.extension
+                        cameras, splits, metadata.width, metadata.height, metadata.extension
                     )
 
                     with open(json_output_path, "w", encoding="utf8") as f:

From 25b6d58f282fbdf03a5311579841d29d95a0264b Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Thu, 11 Jan 2024 14:12:43 -0500
Subject: [PATCH 15/38] Disable projectaria-tools on windows

---
 pyproject.toml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 85611102a8..2debde67eb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -99,7 +99,9 @@ dev = [
     "opencv-stubs==0.0.7",
     "transformers==4.29.2",
     "pyright==1.1.331",
-    "projectaria_tools[all]>=1.2.0",
+    # NOTE: Disabling projectaria-tools because it doesn't have prebuilt windows wheels
+    # Syntax comes from here: https://pip.pypa.io/en/stable/reference/requirement-specifiers/
+    "projectaria-tools>=1.3.1; sys_platform != 'win32'",
 ]
 
 # Documentation related packages

From 0e7d29e78a01504dd098a5008bd9908f1035935f Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Thu, 11 Jan 2024 15:37:20 -0500
Subject: [PATCH 16/38] Fix extra imports

---
 nerfstudio/process_data/metashape_utils.py | 2 +-
 nerfstudio/scripts/process_data.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/nerfstudio/process_data/metashape_utils.py b/nerfstudio/process_data/metashape_utils.py
index fad2445ed0..8bea8707cb 100644
--- a/nerfstudio/process_data/metashape_utils.py
+++ b/nerfstudio/process_data/metashape_utils.py
@@ -17,7 +17,7 @@
 import json
 import xml.etree.ElementTree as ET
 from pathlib import Path
-from typing import Dict, List, Optional
+from typing import Dict, List
 
 import numpy as np
 
diff --git a/nerfstudio/scripts/process_data.py b/nerfstudio/scripts/process_data.py
index bbe3b635af..6890cd933a 100644
--- a/nerfstudio/scripts/process_data.py
+++ b/nerfstudio/scripts/process_data.py
@@ -20,7 +20,7 @@
 import zipfile
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Optional, Union
+from typing import Union
 
 import numpy as np
 import tyro

From 5393ee48650f298029771a70b75fb2505d10a754 Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Thu, 11 Jan 2024 16:14:59 -0500
Subject: [PATCH 17/38] Add a new nerfacto method tund for EyefulTower

---
 nerfstudio/configs/method_configs.py | 64 ++++++++++++++++++++--------
 1 file changed, 47 insertions(+), 17 deletions(-)

diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py
index 1f1a09edbb..96f4b175f9 100644
--- a/nerfstudio/configs/method_configs.py
+++ b/nerfstudio/configs/method_configs.py
@@ -26,23 +26,16 @@
 from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig
 from nerfstudio.configs.base_config import ViewerConfig
 from nerfstudio.configs.external_methods import get_external_methods
-from nerfstudio.data.datamanagers.base_datamanager import (
-    VanillaDataManager,
-    VanillaDataManagerConfig,
-)
+from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager, VanillaDataManagerConfig
+from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanagerConfig
 from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManagerConfig
-from nerfstudio.data.datamanagers.random_cameras_datamanager import (
-    RandomCamerasDataManagerConfig,
-)
+from nerfstudio.data.datamanagers.random_cameras_datamanager import RandomCamerasDataManagerConfig
 from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig
+from nerfstudio.data.dataparsers.colmap_dataparser import ColmapDataParserConfig
 from nerfstudio.data.dataparsers.dnerf_dataparser import DNeRFDataParserConfig
-from nerfstudio.data.dataparsers.instant_ngp_dataparser import (
-    InstantNGPDataParserConfig,
-)
+from nerfstudio.data.dataparsers.instant_ngp_dataparser import InstantNGPDataParserConfig
 from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig
-from nerfstudio.data.dataparsers.phototourism_dataparser import (
-    PhototourismDataParserConfig,
-)
+from nerfstudio.data.dataparsers.phototourism_dataparser import PhototourismDataParserConfig
 from nerfstudio.data.dataparsers.sdfstudio_dataparser import SDFStudioDataParserConfig
 from nerfstudio.data.dataparsers.sitcoms3d_dataparser import Sitcoms3DDataParserConfig
 from nerfstudio.data.datasets.depth_dataset import DepthDataset
@@ -62,7 +55,6 @@
 from nerfstudio.models.gaussian_splatting import GaussianSplattingModelConfig
 from nerfstudio.models.generfacto import GenerfactoModelConfig
 from nerfstudio.models.instant_ngp import InstantNGPModelConfig
-from nerfstudio.data.dataparsers.colmap_dataparser import ColmapDataParserConfig
 from nerfstudio.models.mipnerf import MipNerfModel
 from nerfstudio.models.nerfacto import NerfactoModelConfig
 from nerfstudio.models.neus import NeuSModelConfig
@@ -71,13 +63,13 @@
 from nerfstudio.models.tensorf import TensoRFModelConfig
 from nerfstudio.models.vanilla_nerf import NeRFModel, VanillaModelConfig
 from nerfstudio.pipelines.base_pipeline import VanillaPipelineConfig
-from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanagerConfig
 from nerfstudio.pipelines.dynamic_batch import DynamicBatchPipelineConfig
 from nerfstudio.plugins.registry import discover_methods
 
 method_configs: Dict[str, TrainerConfig] = {}
 descriptions = {
     "nerfacto": "Recommended real-time model tuned for real captures. This model will be continually updated.",
+    "nerfacto-eyeful-tower": "Variant of nerfacto with settings tuned for EyefulTower dataset scenes.",
     "depth-nerfacto": "Nerfacto with depth supervision.",
     "instant-ngp": "Implementation of Instant-NGP. Recommended real-time model for unbounded scenes.",
     "instant-ngp-bounded": "Implementation of Instant-NGP. Recommended for bounded real and synthetic scenes",
@@ -218,6 +210,45 @@
     vis="viewer",
 )
 
+method_configs["nerfacto-eyeful-tower"] = TrainerConfig(
+    method_name="nerfacto",
+    steps_per_eval_batch=500,
+    steps_per_save=2000,
+    max_num_iterations=100_000,
+    steps_per_eval_all_images=100_000,
+    mixed_precision=True,
+    pipeline=VanillaPipelineConfig(
+        datamanager=ParallelDataManagerConfig(
+            dataparser=NerfstudioDataParserConfig(),
+            train_num_rays_per_batch=12_800,
+            eval_num_rays_per_batch=4096,
+        ),
+        model=NerfactoModelConfig(
+            eval_num_rays_per_chunk=1 << 15,
+            camera_optimizer=CameraOptimizerConfig(mode="off"),
+            max_res=19_912,
+            log2_hashmap_size=22,
+            far_plane=100.0,
+        ),
+    ),
+    optimizers={
+        "proposal_networks": {
+            "optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15),
+            "scheduler": ExponentialDecaySchedulerConfig(lr_final=0.0001, max_steps=200000),
+        },
+        "fields": {
+            "optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15),
+            "scheduler": ExponentialDecaySchedulerConfig(lr_final=0.0001, max_steps=200000),
+        },
+        "camera_opt": {
+            "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15),
+            "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=5000),
+        },
+    },
+    viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
+    vis="viewer",
+)
+
 method_configs["depth-nerfacto"] = TrainerConfig(
     method_name="depth-nerfacto",
     steps_per_eval_batch=500,
@@ -306,8 +337,7 @@
     viewer=ViewerConfig(num_rays_per_chunk=1 << 12),
     vis="viewer",
 )
-#
-#
+
 method_configs["mipnerf"] = TrainerConfig(
     method_name="mipnerf",
     pipeline=VanillaPipelineConfig(

From 8fb0a8b65eb263726693dd485e7adf79b8ca6206 Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Fri, 12 Jan 2024 15:50:16 -0500
Subject: [PATCH 18/38] Split eyefultower download into a separate file

---
 nerfstudio/scripts/downloads/download_data.py | 342 +----------------
 nerfstudio/scripts/downloads/eyeful_tower.py  | 345 ++++++++++++++++++
 nerfstudio/scripts/downloads/utils.py         |  32 ++
 3 files changed, 380 insertions(+), 339 deletions(-)
 create mode 100644 nerfstudio/scripts/downloads/eyeful_tower.py
 create mode 100644 nerfstudio/scripts/downloads/utils.py

diff --git a/nerfstudio/scripts/downloads/download_data.py b/nerfstudio/scripts/downloads/download_data.py
index 1a236b2cbc..515ee45a62 100644
--- a/nerfstudio/scripts/downloads/download_data.py
+++ b/nerfstudio/scripts/downloads/download_data.py
@@ -15,45 +15,27 @@
 """Download datasets and specific captures from the datasets."""
 from __future__ import annotations
 
-import collections
-import copy
 import json
 import os
 import shutil
 import tarfile
-import xml.etree.ElementTree as ET
 import zipfile
 from dataclasses import dataclass
 from pathlib import Path
-from typing import TYPE_CHECKING, Tuple, Union
+from typing import TYPE_CHECKING, Union
 
-import awscli.clidriver
 import gdown
-import numpy as np
 import torch
 import tyro
 from typing_extensions import Annotated
 
-from nerfstudio.configs.base_config import PrintableConfig
 from nerfstudio.process_data import process_data_utils
+from nerfstudio.scripts.downloads.eyeful_tower import EyefulTowerDownload
+from nerfstudio.scripts.downloads.utils import DatasetDownload
 from nerfstudio.utils import install_checks
 from nerfstudio.utils.scripts import run_command
 
 
-@dataclass
-class DatasetDownload(PrintableConfig):
-    """Download a dataset"""
-
-    capture_name = None
-
-    save_dir: Path = Path("data/")
-    """The directory to save the dataset to"""
-
-    def download(self, save_dir: Path) -> None:
-        """Download the dataset"""
-        raise NotImplementedError
-
-
 @dataclass
 class BlenderDownload(DatasetDownload):
     """Download the blender dataset."""
@@ -550,324 +532,6 @@ def download(self, save_dir: Path) -> None:
         shutil.rmtree(target_path / "val")
 
 
-eyefultower_downloads = [
-    "all",
-    "apartment",
-    "kitchen",
-    "office1a",
-    "office1b",
-    "office2",
-    "office_view1",
-    "office_view2",
-    "riverview",
-    "seating_area",
-    "table",
-    "workshop",
-]
-
-
-@dataclass
-class EyefulTowerResolutionMetadata:
-    folder_name: str
-    width: int
-    height: int
-    extension: str
-
-
-eyefultower_resolutions = {
-    "all": None,
-    "jpeg_2k": EyefulTowerResolutionMetadata("images-jpeg-2k", 1368, 2048, "jpg"),
-    "jpeg_4k": EyefulTowerResolutionMetadata("images-jpeg-4k", 2736, 4096, "jpg"),
-    "jpeg_8k": EyefulTowerResolutionMetadata("images-jpeg", 5784, 8660, "jpg"),
-    "exr_2k": EyefulTowerResolutionMetadata("images-2k", 1368, 2048, "exr"),
-}
-
-if TYPE_CHECKING:
-    EyefulTowerCaptureName = str
-    EyefulTowerResolution = str
-else:
-    EyefulTowerCaptureName = tyro.extras.literal_type_from_choices(eyefultower_downloads)
-    EyefulTowerResolution = tyro.extras.literal_type_from_choices(eyefultower_resolutions.keys())
-
-
-@dataclass
-class EyefulTowerDownload(DatasetDownload):
-    """Download the EyefulTower dataset.
-
-    Use the --help flag with the `eyefultower` subcommand to see all available datasets.
-    Find more information about the dataset at https://github.com/facebookresearch/EyefulTower.
-    """
-
-    capture_name: Tuple[EyefulTowerCaptureName, ...] = ()
-    resolution_name: Tuple[EyefulTowerResolution, ...] = ()
-
-    @staticmethod
-    def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, target_height: int):
-        transformed = copy.deepcopy(xml_tree)
-
-        root = transformed.getroot()
-        assert len(root) == 1
-        chunk = root[0]
-        sensors = chunk.find("sensors")
-        assert sensors is not None
-
-        for sensor in sensors:
-            resolution = sensor.find("resolution")
-            assert resolution is not None, "Resolution not found in EyefulTower camera.xml"
-            original_width = int(resolution.get("width"))  # type: ignore
-            original_height = int(resolution.get("height"))  # type: ignore
-
-            if original_width > original_height:
-                target_width, target_height = max(target_width, target_height), min(target_width, target_height)
-            else:
-                target_height, target_width = max(target_width, target_height), min(target_width, target_height)
-
-            resolution.set("width", str(target_width))
-            resolution.set("height", str(target_height))
-
-            calib = sensor.find("calibration")
-            assert calib is not None, "Calibration not found in EyefulTower sensor"
-
-            calib_resolution = calib.find("resolution")
-            assert calib_resolution is not None
-            calib_resolution.set("width", str(target_width))
-            calib_resolution.set("height", str(target_height))
-
-            # Compute each scale individually and average for better rounding
-            x_scale = target_width / original_width
-            y_scale = target_height / original_height
-            scale = (x_scale + y_scale) / 2.0
-
-            f = calib.find("f")
-            assert f is not None and f.text is not None, "f not found in calib"
-            f.text = str(float(f.text) * scale)
-
-            cx = calib.find("cx")
-            assert cx is not None and cx.text is not None, "cx not found in calib"
-            cx.text = str(float(cx.text) * x_scale)
-
-            cy = calib.find("cy")
-            assert cy is not None and cy.text is not None, "cy not found in calib"
-            cy.text = str(float(cy.text) * y_scale)
-
-            # TODO: Maybe update pixel_width / pixel_height / focal_length / layer_index?
-
-        return transformed
-
-    def convert_cameras_to_nerfstudio_transforms(
-        self, cameras: dict, splits: dict, target_width: int, target_height: int, extension: str
-    ):
-        output = {}
-
-        distortion_models = [c["distortionModel"] for c in cameras["KRT"]]
-        distortion_model = list(set(distortion_models))
-        assert len(distortion_model) == 1
-        distortion_model = distortion_model[0]
-        if distortion_model == "RadialAndTangential":
-            output["camera_model"] = "OPENCV"
-        elif distortion_model == "Fisheye":
-            output["camera_model"] = "OPENCV_FISHEYE"
-        else:
-            raise NotImplementedError(f"Camera model {distortion_model} not implemented")
-
-        split_sets = {k: set(v) for k, v in splits.items()}
-
-        frames = []
-        split_filenames = collections.defaultdict(list)
-        for camera in cameras["KRT"]:
-            frame = {}
-            # TODO EXR
-            frame["file_path"] = camera["cameraId"] + f".{extension}"
-            for split in split_sets:
-                if camera["cameraId"] in split_sets[split]:
-                    split_filenames[split].append(frame["file_path"])
-
-            original_width = camera["width"]
-            original_height = camera["height"]
-            if original_width > original_height:
-                target_width, target_height = max(target_width, target_height), min(target_width, target_height)
-            else:
-                target_height, target_width = max(target_width, target_height), min(target_width, target_height)
-            x_scale = target_width / original_width
-            y_scale = target_height / original_height
-
-            frame["w"] = target_width
-            frame["h"] = target_height
-            K = np.array(camera["K"]).T  # Data stored as column-major
-            frame["fl_x"] = K[0][0] * x_scale
-            frame["fl_y"] = K[1][1] * y_scale
-            frame["cx"] = K[0][2] * x_scale
-            frame["cy"] = K[1][2] * y_scale
-
-            if distortion_model == "RadialAndTangential":
-                # pinhole: [k1, k2, p1, p2, k3]
-                frame["k1"] = camera["distortion"][0]
-                frame["k2"] = camera["distortion"][1]
-                frame["k3"] = camera["distortion"][4]
-                frame["k4"] = 0.0
-                frame["p1"] = camera["distortion"][2]
-                frame["p2"] = camera["distortion"][3]
-            elif distortion_model == "Fisheye":
-                # fisheye: [k1, k2, k3, _, _, _, p1, p2]
-                frame["k1"] = camera["distortion"][0]
-                frame["k2"] = camera["distortion"][1]
-                frame["k3"] = camera["distortion"][2]
-                frame["p1"] = camera["distortion"][6]
-                frame["p2"] = camera["distortion"][7]
-            else:
-                raise NotImplementedError("This shouldn't happen")
-
-            T = np.array(camera["T"]).T  # Data stored as column-major
-            T = np.linalg.inv(T)
-            T = T[[2, 0, 1, 3], :]
-            T[:, 1:3] *= -1
-            frame["transform_matrix"] = T.tolist()
-
-            frames.append(frame)
-
-        frames = sorted(frames, key=lambda f: f["file_path"])
-
-        output["frames"] = frames
-        output["train_filenames"] = split_filenames["train"]
-        output["val_filenames"] = split_filenames["test"]
-        return output
-
-    def subsample_nerfstudio_transforms(self, transforms: dict, n: int):
-        target = min(len(transforms["frames"]), n)
-        indices = np.round(np.linspace(0, len(transforms["frames"]) - 1, target)).astype(int)
-
-        frames = []
-        for i in indices:
-            frames.append(transforms["frames"][i])
-
-        output = copy.deepcopy(transforms)
-        output["frames"] = frames
-
-        # Remove the unused files from the splits
-        filenames = {f["file_path"] for f in frames}
-        for key in ["train_filenames", "val_filenames"]:
-            output[key] = sorted(list(set(transforms[key]) & filenames))
-
-        return output
-
-    def download(self, save_dir: Path):
-        if len(self.capture_name) == 0:
-            self.capture_name = ("riverview",)
-            print(
-                f"No capture specified, using {self.capture_name} by default.",
-                "Add `--help` to this command to see all available captures.",
-            )
-
-        if len(self.resolution_name) == 0:
-            self.resolution_name = ("jpeg_2k",)
-            print(
-                f"No resolution specified, using {self.resolution_name} by default.",
-                "Add `--help` to this command to see all available resolutions.",
-            )
-
-        captures = set()
-        for capture in self.capture_name:
-            if capture == "all":
-                captures.update([c for c in eyefultower_downloads if c != "all"])
-            else:
-                captures.add(capture)
-        captures = sorted(captures)
-        if len(captures) == 0:
-            print("WARNING: No EyefulTower captures specified. Nothing will be downloaded.")
-
-        resolutions = set()
-        for resolution in self.resolution_name:
-            if resolution == "all":
-                resolutions.update([r for r in eyefultower_resolutions.keys() if r != "all"])
-            else:
-                resolutions.add(resolution)
-        resolutions = sorted(resolutions)
-        if len(resolutions) == 0:
-            print("WARNING: No EyefulTower resolutions specified. Nothing will be downloaded.")
-
-        driver = awscli.clidriver.create_clidriver()
-
-        for i, capture in enumerate(captures):
-            base_url = f"s3://fb-baas-f32eacb9-8abb-11eb-b2b8-4857dd089e15/EyefulTower/{capture}/"
-            output_path = save_dir / "eyefultower" / capture
-            includes = []
-            for resolution in resolutions:
-                includes.extend(["--include", f"{eyefultower_resolutions[resolution].folder_name}/*"])
-            command = (
-                ["s3", "sync", "--no-sign-request", "--only-show-errors", "--exclude", "images*/*"]
-                + includes
-                + [base_url, str(output_path)]
-            )
-            print(f"[EyefulTower Capture {i+1: >2d}/{len(captures)}]: '{capture}'")
-            print(
-                f"    Downloading resolutions {resolutions}",
-                f"to '{output_path.resolve()}' with command `aws {' '.join(command)}` ...",
-                end=" ",
-                flush=True,
-            )
-            driver.main(command)
-            print("done!")
-
-            # After downloading, we'll insert an appropriate cameras.xml file into each directory
-            # It's quick enough that we can just redo it every time this is called, regardless
-            # of whether new data is downloaded.
-            xml_input_path = output_path / "cameras.xml"
-            if not xml_input_path.exists:
-                print("    WARNING: cameras.xml not found. Scaled cameras.xml will not be generated.")
-            else:
-                tree = ET.parse(output_path / "cameras.xml")
-
-                for resolution in resolutions:
-                    metadata = eyefultower_resolutions[resolution]
-                    xml_output_path = output_path / metadata.folder_name / "cameras.xml"
-                    print(
-                        f"    Generating cameras.xml for '{resolution}' to '{xml_output_path.resolve()}' ... ",
-                        end=" ",
-                        flush=True,
-                    )
-                    scaled_tree = self.scale_metashape_transform(tree, metadata.width, metadata.height)
-                    scaled_tree.write(xml_output_path)
-                    print("done!")
-
-            json_input_path = output_path / "cameras.json"
-            splits_input_path = output_path / "splits.json"
-            if not json_input_path.exists:
-                print("    WARNING: cameras.json not found. transforms.json will not be generated.")
-            elif not splits_input_path.exists:
-                print("    WARNING: splits.json not found. transforms.json will not be generated.")
-            else:
-                with open(json_input_path, "r") as f:
-                    cameras = json.load(f)
-
-                with open(splits_input_path, "r") as f:
-                    splits = json.load(f)
-
-                for resolution in resolutions:
-                    metadata = eyefultower_resolutions[resolution]
-                    json_output_path = output_path / metadata.folder_name / "transforms.json"
-                    print(
-                        f"    Generating transforms.json for '{resolution}' to '{json_output_path.resolve()}' ... ",
-                        end=" ",
-                        flush=True,
-                    )
-                    transforms = self.convert_cameras_to_nerfstudio_transforms(
-                        cameras, splits, metadata.width, metadata.height, metadata.extension
-                    )
-
-                    with open(json_output_path, "w", encoding="utf8") as f:
-                        json.dump(transforms, f, indent=4)
-
-                    for count, name in [
-                        (300, "transforms_300.json"),
-                        (int(len(cameras["KRT"]) // 2), "transforms_half.json"),
-                    ]:
-                        subsampled = self.subsample_nerfstudio_transforms(transforms, count)
-                        with open(json_output_path.with_name(name), "w", encoding="utf8") as f:
-                            json.dump(subsampled, f, indent=4)
-
-                    print("done!")
-
-
 Commands = Union[
     Annotated[BlenderDownload, tyro.conf.subcommand(name="blender")],
     Annotated[Sitcoms3DDownload, tyro.conf.subcommand(name="sitcoms3d")],
diff --git a/nerfstudio/scripts/downloads/eyeful_tower.py b/nerfstudio/scripts/downloads/eyeful_tower.py
new file mode 100644
index 0000000000..46353b4c9a
--- /dev/null
+++ b/nerfstudio/scripts/downloads/eyeful_tower.py
@@ -0,0 +1,345 @@
+# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import collections
+import copy
+import json
+import xml.etree.ElementTree as ET
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING, Tuple
+
+import awscli.clidriver
+import numpy as np
+import tyro
+
+from nerfstudio.scripts.downloads.utils import DatasetDownload
+
+eyefultower_downloads = [
+    "all",
+    "apartment",
+    "kitchen",
+    "office1a",
+    "office1b",
+    "office2",
+    "office_view1",
+    "office_view2",
+    "riverview",
+    "seating_area",
+    "table",
+    "workshop",
+]
+
+
+@dataclass
+class EyefulTowerResolutionMetadata:
+    folder_name: str
+    width: int
+    height: int
+    extension: str
+
+
+eyefultower_resolutions = {
+    "all": None,
+    "jpeg_2k": EyefulTowerResolutionMetadata("images-jpeg-2k", 1368, 2048, "jpg"),
+    "jpeg_4k": EyefulTowerResolutionMetadata("images-jpeg-4k", 2736, 4096, "jpg"),
+    "jpeg_8k": EyefulTowerResolutionMetadata("images-jpeg", 5784, 8660, "jpg"),
+    "exr_2k": EyefulTowerResolutionMetadata("images-2k", 1368, 2048, "exr"),
+}
+
+if TYPE_CHECKING:
+    EyefulTowerCaptureName = str
+    EyefulTowerResolution = str
+else:
+    EyefulTowerCaptureName = tyro.extras.literal_type_from_choices(eyefultower_downloads)
+    EyefulTowerResolution = tyro.extras.literal_type_from_choices(eyefultower_resolutions.keys())
+
+
+@dataclass
+class EyefulTowerDownload(DatasetDownload):
+    """Download the EyefulTower dataset.
+
+    Use the --help flag with the `eyefultower` subcommand to see all available datasets.
+    Find more information about the dataset at https://github.com/facebookresearch/EyefulTower.
+    """
+
+    capture_name: Tuple[EyefulTowerCaptureName, ...] = ()
+    resolution_name: Tuple[EyefulTowerResolution, ...] = ()
+
+    @staticmethod
+    def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, target_height: int):
+        transformed = copy.deepcopy(xml_tree)
+
+        root = transformed.getroot()
+        assert len(root) == 1
+        chunk = root[0]
+        sensors = chunk.find("sensors")
+        assert sensors is not None
+
+        for sensor in sensors:
+            resolution = sensor.find("resolution")
+            assert resolution is not None, "Resolution not found in EyefulTower camera.xml"
+            original_width = int(resolution.get("width"))  # type: ignore
+            original_height = int(resolution.get("height"))  # type: ignore
+
+            if original_width > original_height:
+                target_width, target_height = max(target_width, target_height), min(target_width, target_height)
+            else:
+                target_height, target_width = max(target_width, target_height), min(target_width, target_height)
+
+            resolution.set("width", str(target_width))
+            resolution.set("height", str(target_height))
+
+            calib = sensor.find("calibration")
+            assert calib is not None, "Calibration not found in EyefulTower sensor"
+
+            calib_resolution = calib.find("resolution")
+            assert calib_resolution is not None
+            calib_resolution.set("width", str(target_width))
+            calib_resolution.set("height", str(target_height))
+
+            # Compute each scale individually and average for better rounding
+            x_scale = target_width / original_width
+            y_scale = target_height / original_height
+            scale = (x_scale + y_scale) / 2.0
+
+            f = calib.find("f")
+            assert f is not None and f.text is not None, "f not found in calib"
+            f.text = str(float(f.text) * scale)
+
+            cx = calib.find("cx")
+            assert cx is not None and cx.text is not None, "cx not found in calib"
+            cx.text = str(float(cx.text) * x_scale)
+
+            cy = calib.find("cy")
+            assert cy is not None and cy.text is not None, "cy not found in calib"
+            cy.text = str(float(cy.text) * y_scale)
+
+            # TODO: Maybe update pixel_width / pixel_height / focal_length / layer_index?
+
+        return transformed
+
+    def convert_cameras_to_nerfstudio_transforms(
+        self, cameras: dict, splits: dict, target_width: int, target_height: int, extension: str
+    ):
+        output = {}
+
+        distortion_models = [c["distortionModel"] for c in cameras["KRT"]]
+        distortion_model = list(set(distortion_models))
+        assert len(distortion_model) == 1
+        distortion_model = distortion_model[0]
+        if distortion_model == "RadialAndTangential":
+            output["camera_model"] = "OPENCV"
+        elif distortion_model == "Fisheye":
+            output["camera_model"] = "OPENCV_FISHEYE"
+        else:
+            raise NotImplementedError(f"Camera model {distortion_model} not implemented")
+
+        split_sets = {k: set(v) for k, v in splits.items()}
+
+        frames = []
+        split_filenames = collections.defaultdict(list)
+        for camera in cameras["KRT"]:
+            frame = {}
+            # TODO EXR
+            frame["file_path"] = camera["cameraId"] + f".{extension}"
+            for split in split_sets:
+                if camera["cameraId"] in split_sets[split]:
+                    split_filenames[split].append(frame["file_path"])
+
+            original_width = camera["width"]
+            original_height = camera["height"]
+            if original_width > original_height:
+                target_width, target_height = max(target_width, target_height), min(target_width, target_height)
+            else:
+                target_height, target_width = max(target_width, target_height), min(target_width, target_height)
+            x_scale = target_width / original_width
+            y_scale = target_height / original_height
+
+            frame["w"] = target_width
+            frame["h"] = target_height
+            K = np.array(camera["K"]).T  # Data stored as column-major
+            frame["fl_x"] = K[0][0] * x_scale
+            frame["fl_y"] = K[1][1] * y_scale
+            frame["cx"] = K[0][2] * x_scale
+            frame["cy"] = K[1][2] * y_scale
+
+            if distortion_model == "RadialAndTangential":
+                # pinhole: [k1, k2, p1, p2, k3]
+                frame["k1"] = camera["distortion"][0]
+                frame["k2"] = camera["distortion"][1]
+                frame["k3"] = camera["distortion"][4]
+                frame["k4"] = 0.0
+                frame["p1"] = camera["distortion"][2]
+                frame["p2"] = camera["distortion"][3]
+            elif distortion_model == "Fisheye":
+                # fisheye: [k1, k2, k3, _, _, _, p1, p2]
+                frame["k1"] = camera["distortion"][0]
+                frame["k2"] = camera["distortion"][1]
+                frame["k3"] = camera["distortion"][2]
+                frame["p1"] = camera["distortion"][6]
+                frame["p2"] = camera["distortion"][7]
+            else:
+                raise NotImplementedError("This shouldn't happen")
+
+            T = np.array(camera["T"]).T  # Data stored as column-major
+            T = np.linalg.inv(T)
+            T = T[[2, 0, 1, 3], :]
+            T[:, 1:3] *= -1
+            frame["transform_matrix"] = T.tolist()
+
+            frames.append(frame)
+
+        frames = sorted(frames, key=lambda f: f["file_path"])
+
+        output["frames"] = frames
+        output["train_filenames"] = split_filenames["train"]
+        output["val_filenames"] = split_filenames["test"]
+        return output
+
+    def subsample_nerfstudio_transforms(self, transforms: dict, n: int):
+        target = min(len(transforms["frames"]), n)
+        indices = np.round(np.linspace(0, len(transforms["frames"]) - 1, target)).astype(int)
+
+        frames = []
+        for i in indices:
+            frames.append(transforms["frames"][i])
+
+        output = copy.deepcopy(transforms)
+        output["frames"] = frames
+
+        # Remove the unused files from the splits
+        filenames = {f["file_path"] for f in frames}
+        for key in ["train_filenames", "val_filenames"]:
+            output[key] = sorted(list(set(transforms[key]) & filenames))
+
+        return output
+
+    def download(self, save_dir: Path):
+        if len(self.capture_name) == 0:
+            self.capture_name = ("riverview",)
+            print(
+                f"No capture specified, using {self.capture_name} by default.",
+                "Add `--help` to this command to see all available captures.",
+            )
+
+        if len(self.resolution_name) == 0:
+            self.resolution_name = ("jpeg_2k",)
+            print(
+                f"No resolution specified, using {self.resolution_name} by default.",
+                "Add `--help` to this command to see all available resolutions.",
+            )
+
+        captures = set()
+        for capture in self.capture_name:
+            if capture == "all":
+                captures.update([c for c in eyefultower_downloads if c != "all"])
+            else:
+                captures.add(capture)
+        captures = sorted(captures)
+        if len(captures) == 0:
+            print("WARNING: No EyefulTower captures specified. Nothing will be downloaded.")
+
+        resolutions = set()
+        for resolution in self.resolution_name:
+            if resolution == "all":
+                resolutions.update([r for r in eyefultower_resolutions.keys() if r != "all"])
+            else:
+                resolutions.add(resolution)
+        resolutions = sorted(resolutions)
+        if len(resolutions) == 0:
+            print("WARNING: No EyefulTower resolutions specified. Nothing will be downloaded.")
+
+        driver = awscli.clidriver.create_clidriver()
+
+        for i, capture in enumerate(captures):
+            base_url = f"s3://fb-baas-f32eacb9-8abb-11eb-b2b8-4857dd089e15/EyefulTower/{capture}/"
+            output_path = save_dir / "eyefultower" / capture
+            includes = []
+            for resolution in resolutions:
+                includes.extend(["--include", f"{eyefultower_resolutions[resolution].folder_name}/*"])
+            command = (
+                ["s3", "sync", "--no-sign-request", "--only-show-errors", "--exclude", "images*/*"]
+                + includes
+                + [base_url, str(output_path)]
+            )
+            print(f"[EyefulTower Capture {i+1: >2d}/{len(captures)}]: '{capture}'")
+            print(
+                f"    Downloading resolutions {resolutions}",
+                f"to '{output_path.resolve()}' with command `aws {' '.join(command)}` ...",
+                end=" ",
+                flush=True,
+            )
+            driver.main(command)
+            print("done!")
+
+            # After downloading, we'll insert an appropriate cameras.xml file into each directory
+            # It's quick enough that we can just redo it every time this is called, regardless
+            # of whether new data is downloaded.
+            xml_input_path = output_path / "cameras.xml"
+            if not xml_input_path.exists:
+                print("    WARNING: cameras.xml not found. Scaled cameras.xml will not be generated.")
+            else:
+                tree = ET.parse(output_path / "cameras.xml")
+
+                for resolution in resolutions:
+                    metadata = eyefultower_resolutions[resolution]
+                    xml_output_path = output_path / metadata.folder_name / "cameras.xml"
+                    print(
+                        f"    Generating cameras.xml for '{resolution}' to '{xml_output_path.resolve()}' ... ",
+                        end=" ",
+                        flush=True,
+                    )
+                    scaled_tree = self.scale_metashape_transform(tree, metadata.width, metadata.height)
+                    scaled_tree.write(xml_output_path)
+                    print("done!")
+
+            json_input_path = output_path / "cameras.json"
+            splits_input_path = output_path / "splits.json"
+            if not json_input_path.exists:
+                print("    WARNING: cameras.json not found. transforms.json will not be generated.")
+            elif not splits_input_path.exists:
+                print("    WARNING: splits.json not found. transforms.json will not be generated.")
+            else:
+                with open(json_input_path, "r") as f:
+                    cameras = json.load(f)
+
+                with open(splits_input_path, "r") as f:
+                    splits = json.load(f)
+
+                for resolution in resolutions:
+                    metadata = eyefultower_resolutions[resolution]
+                    json_output_path = output_path / metadata.folder_name / "transforms.json"
+                    print(
+                        f"    Generating transforms.json for '{resolution}' to '{json_output_path.resolve()}' ... ",
+                        end=" ",
+                        flush=True,
+                    )
+                    transforms = self.convert_cameras_to_nerfstudio_transforms(
+                        cameras, splits, metadata.width, metadata.height, metadata.extension
+                    )
+
+                    with open(json_output_path, "w", encoding="utf8") as f:
+                        json.dump(transforms, f, indent=4)
+
+                    for count, name in [
+                        (300, "transforms_300.json"),
+                        (int(len(cameras["KRT"]) // 2), "transforms_half.json"),
+                    ]:
+                        subsampled = self.subsample_nerfstudio_transforms(transforms, count)
+                        with open(json_output_path.with_name(name), "w", encoding="utf8") as f:
+                            json.dump(subsampled, f, indent=4)
+
+                    print("done!")
diff --git a/nerfstudio/scripts/downloads/utils.py b/nerfstudio/scripts/downloads/utils.py
new file mode 100644
index 0000000000..72054edb03
--- /dev/null
+++ b/nerfstudio/scripts/downloads/utils.py
@@ -0,0 +1,32 @@
+# Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from pathlib import Path
+
+from nerfstudio.configs.base_config import PrintableConfig
+
+
+@dataclass
+class DatasetDownload(PrintableConfig):
+    """Download a dataset"""
+
+    capture_name = None
+
+    save_dir: Path = Path("data/")
+    """The directory to save the dataset to"""
+
+    def download(self, save_dir: Path) -> None:
+        """Download the dataset"""
+        raise NotImplementedError

From c481c29f256bb3e981bb4333516a2cb9e300cd55 Mon Sep 17 00:00:00 2001
From: xucr <xucr@shanghaitech.edu.cn>
Date: Sat, 13 Jan 2024 20:17:09 -0800
Subject: [PATCH 19/38] Fix typo

---
 nerfstudio/data/datamanagers/full_images_datamanager.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py
index 99bba3f3da..61296e010a 100644
--- a/nerfstudio/data/datamanagers/full_images_datamanager.py
+++ b/nerfstudio/data/datamanagers/full_images_datamanager.py
@@ -58,12 +58,11 @@ class FullImageDatamanagerConfig(DataManagerConfig):
     """When not evaluating on all images, number of iterations before picking
     new images. If -1, never pick new images."""
     eval_image_indices: Optional[Tuple[int, ...]] = (0,)
-    """Specifies the image indices to use during eval; if None, uses all.""
+    """Specifies the image indices to use during eval; if None, uses all."""
     cache_images: Literal["cpu", "gpu"] = "cpu"
     """Whether to cache images in memory. If "cpu", caches on cpu. If "gpu", caches on device."""
 
 
-
 class FullImageDatamanager(DataManager, Generic[TDataset]):
     """
     A datamanager that outputs full images and cameras instead of raybundles. This makes the

From ac5200af82a4709298d8f5713861100b52135f76 Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Tue, 16 Jan 2024 17:23:43 -0500
Subject: [PATCH 20/38] Add some fisheye support for eyeful data

---
 nerfstudio/configs/method_configs.py          | 52 ++++++++++++-------
 .../data/datamanagers/base_datamanager.py     | 35 +++++--------
 .../data/datamanagers/parallel_datamanager.py | 22 +++++---
 .../data/dataparsers/nerfstudio_dataparser.py | 21 +++++---
 nerfstudio/scripts/downloads/eyeful_tower.py  | 14 ++++-
 nerfstudio/utils/tensor_dataclass.py          |  6 ++-
 6 files changed, 89 insertions(+), 61 deletions(-)

diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py
index 96f4b175f9..4d3ea835bd 100644
--- a/nerfstudio/configs/method_configs.py
+++ b/nerfstudio/configs/method_configs.py
@@ -26,30 +26,42 @@
 from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig
 from nerfstudio.configs.base_config import ViewerConfig
 from nerfstudio.configs.external_methods import get_external_methods
-from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager, VanillaDataManagerConfig
-from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanagerConfig
-from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManagerConfig
-from nerfstudio.data.datamanagers.random_cameras_datamanager import RandomCamerasDataManagerConfig
-from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig
-from nerfstudio.data.dataparsers.colmap_dataparser import ColmapDataParserConfig
+from nerfstudio.data.datamanagers.base_datamanager import (
+    VanillaDataManager, VanillaDataManagerConfig)
+from nerfstudio.data.datamanagers.full_images_datamanager import \
+    FullImageDatamanagerConfig
+from nerfstudio.data.datamanagers.parallel_datamanager import \
+    ParallelDataManagerConfig
+from nerfstudio.data.datamanagers.random_cameras_datamanager import \
+    RandomCamerasDataManagerConfig
+from nerfstudio.data.dataparsers.blender_dataparser import \
+    BlenderDataParserConfig
+from nerfstudio.data.dataparsers.colmap_dataparser import \
+    ColmapDataParserConfig
 from nerfstudio.data.dataparsers.dnerf_dataparser import DNeRFDataParserConfig
-from nerfstudio.data.dataparsers.instant_ngp_dataparser import InstantNGPDataParserConfig
-from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig
-from nerfstudio.data.dataparsers.phototourism_dataparser import PhototourismDataParserConfig
-from nerfstudio.data.dataparsers.sdfstudio_dataparser import SDFStudioDataParserConfig
-from nerfstudio.data.dataparsers.sitcoms3d_dataparser import Sitcoms3DDataParserConfig
+from nerfstudio.data.dataparsers.instant_ngp_dataparser import \
+    InstantNGPDataParserConfig
+from nerfstudio.data.dataparsers.nerfstudio_dataparser import \
+    NerfstudioDataParserConfig
+from nerfstudio.data.dataparsers.phototourism_dataparser import \
+    PhototourismDataParserConfig
+from nerfstudio.data.dataparsers.sdfstudio_dataparser import \
+    SDFStudioDataParserConfig
+from nerfstudio.data.dataparsers.sitcoms3d_dataparser import \
+    Sitcoms3DDataParserConfig
 from nerfstudio.data.datasets.depth_dataset import DepthDataset
 from nerfstudio.data.datasets.sdf_dataset import SDFDataset
 from nerfstudio.data.datasets.semantic_dataset import SemanticDataset
-from nerfstudio.data.pixel_samplers import PairPixelSamplerConfig
-from nerfstudio.engine.optimizers import AdamOptimizerConfig, RAdamOptimizerConfig
-from nerfstudio.engine.schedulers import (
-    CosineDecaySchedulerConfig,
-    ExponentialDecaySchedulerConfig,
-    MultiStepSchedulerConfig,
-)
+from nerfstudio.data.pixel_samplers import (PairPixelSamplerConfig,
+                                            PixelSamplerConfig)
+from nerfstudio.engine.optimizers import (AdamOptimizerConfig,
+                                          RAdamOptimizerConfig)
+from nerfstudio.engine.schedulers import (CosineDecaySchedulerConfig,
+                                          ExponentialDecaySchedulerConfig,
+                                          MultiStepSchedulerConfig)
 from nerfstudio.engine.trainer import TrainerConfig
-from nerfstudio.field_components.temporal_distortions import TemporalDistortionKind
+from nerfstudio.field_components.temporal_distortions import \
+    TemporalDistortionKind
 from nerfstudio.fields.sdf_field import SDFFieldConfig
 from nerfstudio.models.depth_nerfacto import DepthNerfactoModelConfig
 from nerfstudio.models.gaussian_splatting import GaussianSplattingModelConfig
@@ -246,7 +258,7 @@
         },
     },
     viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
-    vis="viewer",
+    vis="viewer+tensorboard",
 )
 
 method_configs["depth-nerfacto"] = TrainerConfig(
diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py
index 2131d5260b..66363c060e 100644
--- a/nerfstudio/data/datamanagers/base_datamanager.py
+++ b/nerfstudio/data/datamanagers/base_datamanager.py
@@ -23,22 +23,8 @@
 from dataclasses import dataclass, field
 from functools import cached_property
 from pathlib import Path
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    ForwardRef,
-    Generic,
-    List,
-    Literal,
-    Optional,
-    Tuple,
-    Type,
-    Union,
-    cast,
-    get_args,
-    get_origin,
-)
+from typing import (Any, Callable, Dict, ForwardRef, Generic, List, Literal,
+                    Optional, Tuple, Type, Union, cast, get_args, get_origin)
 
 import torch
 from torch import nn
@@ -52,12 +38,17 @@
 from nerfstudio.configs.base_config import InstantiateConfig
 from nerfstudio.configs.dataparser_configs import AnnotatedDataParserUnion
 from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs
-from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig
+from nerfstudio.data.dataparsers.blender_dataparser import \
+    BlenderDataParserConfig
 from nerfstudio.data.datasets.base_dataset import InputDataset
-from nerfstudio.data.pixel_samplers import PatchPixelSamplerConfig, PixelSampler, PixelSamplerConfig
-from nerfstudio.data.utils.dataloaders import CacheDataloader, FixedIndicesEvalDataloader, RandIndicesEvalDataloader
+from nerfstudio.data.pixel_samplers import (PatchPixelSamplerConfig,
+                                            PixelSampler, PixelSamplerConfig)
+from nerfstudio.data.utils.dataloaders import (CacheDataloader,
+                                               FixedIndicesEvalDataloader,
+                                               RandIndicesEvalDataloader)
 from nerfstudio.data.utils.nerfstudio_collate import nerfstudio_collate
-from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes
+from nerfstudio.engine.callbacks import (TrainingCallback,
+                                         TrainingCallbackAttributes)
 from nerfstudio.model_components.ray_generators import RayGenerator
 from nerfstudio.utils.misc import IterableWrapper, get_orig_class
 from nerfstudio.utils.rich_utils import CONSOLE
@@ -468,8 +459,8 @@ def _get_pixel_sampler(self, dataset: TDataset, num_rays_per_batch: int) -> Pixe
             CONSOLE.print("[bold yellow]Warning: Some cameras are equirectangular, but using default pixel sampler.")
 
         fisheye_crop_radius = None
-        if dataset.cameras.metadata is not None and "fisheye_crop_radius" in dataset.cameras.metadata:
-            fisheye_crop_radius = dataset.cameras.metadata["fisheye_crop_radius"]
+        if dataset.cameras.metadata is not None:
+            fisheye_crop_radius = dataset.cameras.metadata.get("fisheye_crop_radius")
 
         return self.config.pixel_sampler.setup(
             is_equirectangular=is_equirectangular,
diff --git a/nerfstudio/data/datamanagers/parallel_datamanager.py b/nerfstudio/data/datamanagers/parallel_datamanager.py
index 9f36807a61..eb704ae3fd 100644
--- a/nerfstudio/data/datamanagers/parallel_datamanager.py
+++ b/nerfstudio/data/datamanagers/parallel_datamanager.py
@@ -32,14 +32,13 @@
 from nerfstudio.cameras.cameras import Cameras, CameraType
 from nerfstudio.cameras.rays import RayBundle
 from nerfstudio.data.datamanagers.base_datamanager import (
-    DataManager,
-    TDataset,
-    VanillaDataManagerConfig,
-    variable_res_collate,
-)
+    DataManager, TDataset, VanillaDataManagerConfig, variable_res_collate)
 from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs
-from nerfstudio.data.pixel_samplers import PatchPixelSamplerConfig, PixelSampler, PixelSamplerConfig
-from nerfstudio.data.utils.dataloaders import CacheDataloader, FixedIndicesEvalDataloader, RandIndicesEvalDataloader
+from nerfstudio.data.pixel_samplers import (PatchPixelSamplerConfig,
+                                            PixelSampler, PixelSamplerConfig)
+from nerfstudio.data.utils.dataloaders import (CacheDataloader,
+                                               FixedIndicesEvalDataloader,
+                                               RandIndicesEvalDataloader)
 from nerfstudio.model_components.ray_generators import RayGenerator
 from nerfstudio.utils.rich_utils import CONSOLE
 
@@ -198,8 +197,15 @@ def _get_pixel_sampler(self, dataset: TDataset, num_rays_per_batch: int) -> Pixe
         is_equirectangular = (dataset.cameras.camera_type == CameraType.EQUIRECTANGULAR.value).all()
         if is_equirectangular.any():
             CONSOLE.print("[bold yellow]Warning: Some cameras are equirectangular, but using default pixel sampler.")
+
+        fisheye_crop_radius = None
+        if dataset.cameras.metadata is not None:
+            fisheye_crop_radius = dataset.cameras.metadata.get("fisheye_crop_radius")
+
         return self.config.pixel_sampler.setup(
-            is_equirectangular=is_equirectangular, num_rays_per_batch=num_rays_per_batch
+            is_equirectangular=is_equirectangular,
+            num_rays_per_batch=num_rays_per_batch,
+            fisheye_crop_radius=fisheye_crop_radius,
         )
 
     def setup_train(self):
diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py
index aaa88c7691..4cca0e9304 100644
--- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py
+++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py
@@ -25,15 +25,15 @@
 from PIL import Image
 
 from nerfstudio.cameras import camera_utils
-from nerfstudio.cameras.cameras import CAMERA_MODEL_TO_TYPE, Cameras, CameraType
-from nerfstudio.data.dataparsers.base_dataparser import DataParser, DataParserConfig, DataparserOutputs
+from nerfstudio.cameras.cameras import (CAMERA_MODEL_TO_TYPE, Cameras,
+                                        CameraType)
+from nerfstudio.data.dataparsers.base_dataparser import (DataParser,
+                                                         DataParserConfig,
+                                                         DataparserOutputs)
 from nerfstudio.data.scene_box import SceneBox
 from nerfstudio.data.utils.dataparsers_utils import (
-    get_train_eval_split_all,
-    get_train_eval_split_filename,
-    get_train_eval_split_fraction,
-    get_train_eval_split_interval,
-)
+    get_train_eval_split_all, get_train_eval_split_filename,
+    get_train_eval_split_fraction, get_train_eval_split_interval)
 from nerfstudio.utils.io import load_from_json
 from nerfstudio.utils.rich_utils import CONSOLE
 
@@ -289,7 +289,12 @@ def _generate_dataparser_outputs(self, split="train"):
         else:
             distortion_params = torch.stack(distort, dim=0)[idx_tensor]
 
-        metadata = {"fisheye_crop_radius": fisheye_crop_radius} if fisheye_crop_radius is not None else None
+        # Only add fisheye crop radius parameter if the images are actually fisheye, to allow the same config to be used
+        # for both fisheye and non-fisheye datasets.
+        metadata = {}
+        if (camera_type in [CameraType.FISHEYE, CameraType.FISHEYE624]) and (fisheye_crop_radius is not None):
+            metadata['fisheye_crop_radius'] = fisheye_crop_radius
+
         cameras = Cameras(
             fx=fx,
             fy=fy,
diff --git a/nerfstudio/scripts/downloads/eyeful_tower.py b/nerfstudio/scripts/downloads/eyeful_tower.py
index 46353b4c9a..afc7486c0b 100644
--- a/nerfstudio/scripts/downloads/eyeful_tower.py
+++ b/nerfstudio/scripts/downloads/eyeful_tower.py
@@ -42,6 +42,15 @@
     "workshop",
 ]
 
+# Crop radii empirically chosen to try to avoid hitting the rig base or go out of bounds
+eyefultower_fisheye_radii = {
+    "office1a": 0.43,
+    "office2": 0.45,
+    "seating_area": 0.375, # could be .45 except for camera 2
+    "table": 0.45,
+    "workshop": 0.45,
+}
+
 
 @dataclass
 class EyefulTowerResolutionMetadata:
@@ -132,7 +141,7 @@ def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, targe
         return transformed
 
     def convert_cameras_to_nerfstudio_transforms(
-        self, cameras: dict, splits: dict, target_width: int, target_height: int, extension: str
+        self, capture_name: str, cameras: dict, splits: dict, target_width: int, target_height: int, extension: str
     ):
         output = {}
 
@@ -144,6 +153,7 @@ def convert_cameras_to_nerfstudio_transforms(
             output["camera_model"] = "OPENCV"
         elif distortion_model == "Fisheye":
             output["camera_model"] = "OPENCV_FISHEYE"
+            output["fisheye_crop_radius"] = eyefultower_fisheye_radii[capture_name]
         else:
             raise NotImplementedError(f"Camera model {distortion_model} not implemented")
 
@@ -328,7 +338,7 @@ def download(self, save_dir: Path):
                         flush=True,
                     )
                     transforms = self.convert_cameras_to_nerfstudio_transforms(
-                        cameras, splits, metadata.width, metadata.height, metadata.extension
+                        capture, cameras, splits, metadata.width, metadata.height, metadata.extension
                     )
 
                     with open(json_output_path, "w", encoding="utf8") as f:
diff --git a/nerfstudio/utils/tensor_dataclass.py b/nerfstudio/utils/tensor_dataclass.py
index a2b8d1dadb..6c77eef6fe 100644
--- a/nerfstudio/utils/tensor_dataclass.py
+++ b/nerfstudio/utils/tensor_dataclass.py
@@ -16,7 +16,8 @@
 
 import dataclasses
 from copy import deepcopy
-from typing import Callable, Dict, List, NoReturn, Optional, Tuple, TypeVar, Union
+from typing import (Callable, Dict, List, NoReturn, Optional, Tuple, TypeVar,
+                    Union)
 
 import numpy as np
 import torch
@@ -141,6 +142,9 @@ def _broadcast_dict_fields(self, dict_: Dict, batch_shape) -> Dict:
                 new_dict[k] = v.broadcast_to(batch_shape)
             elif isinstance(v, Dict):
                 new_dict[k] = self._broadcast_dict_fields(v, batch_shape)
+            else:
+                # Don't broadcast the remaining fields
+                new_dict[k] = v
         return new_dict
 
     def __getitem__(self: TensorDataclassT, indices) -> TensorDataclassT:

From 6998760c75e71d7aefd6c51fc4a5b5d9161c69ba Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Tue, 16 Jan 2024 17:50:26 -0500
Subject: [PATCH 21/38] Reformatted imports to not be dumb

---
 nerfstudio/configs/method_configs.py          | 50 +++++++------------
 .../data/datamanagers/base_datamanager.py     | 31 ++++++++----
 .../data/datamanagers/parallel_datamanager.py | 13 ++---
 .../data/dataparsers/nerfstudio_dataparser.py | 16 +++---
 nerfstudio/utils/tensor_dataclass.py          |  3 +-
 5 files changed, 55 insertions(+), 58 deletions(-)

diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py
index 4d3ea835bd..f0dd59c652 100644
--- a/nerfstudio/configs/method_configs.py
+++ b/nerfstudio/configs/method_configs.py
@@ -26,42 +26,30 @@
 from nerfstudio.cameras.camera_optimizers import CameraOptimizerConfig
 from nerfstudio.configs.base_config import ViewerConfig
 from nerfstudio.configs.external_methods import get_external_methods
-from nerfstudio.data.datamanagers.base_datamanager import (
-    VanillaDataManager, VanillaDataManagerConfig)
-from nerfstudio.data.datamanagers.full_images_datamanager import \
-    FullImageDatamanagerConfig
-from nerfstudio.data.datamanagers.parallel_datamanager import \
-    ParallelDataManagerConfig
-from nerfstudio.data.datamanagers.random_cameras_datamanager import \
-    RandomCamerasDataManagerConfig
-from nerfstudio.data.dataparsers.blender_dataparser import \
-    BlenderDataParserConfig
-from nerfstudio.data.dataparsers.colmap_dataparser import \
-    ColmapDataParserConfig
+from nerfstudio.data.datamanagers.base_datamanager import VanillaDataManager, VanillaDataManagerConfig
+from nerfstudio.data.datamanagers.full_images_datamanager import FullImageDatamanagerConfig
+from nerfstudio.data.datamanagers.parallel_datamanager import ParallelDataManagerConfig
+from nerfstudio.data.datamanagers.random_cameras_datamanager import RandomCamerasDataManagerConfig
+from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig
+from nerfstudio.data.dataparsers.colmap_dataparser import ColmapDataParserConfig
 from nerfstudio.data.dataparsers.dnerf_dataparser import DNeRFDataParserConfig
-from nerfstudio.data.dataparsers.instant_ngp_dataparser import \
-    InstantNGPDataParserConfig
-from nerfstudio.data.dataparsers.nerfstudio_dataparser import \
-    NerfstudioDataParserConfig
-from nerfstudio.data.dataparsers.phototourism_dataparser import \
-    PhototourismDataParserConfig
-from nerfstudio.data.dataparsers.sdfstudio_dataparser import \
-    SDFStudioDataParserConfig
-from nerfstudio.data.dataparsers.sitcoms3d_dataparser import \
-    Sitcoms3DDataParserConfig
+from nerfstudio.data.dataparsers.instant_ngp_dataparser import InstantNGPDataParserConfig
+from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig
+from nerfstudio.data.dataparsers.phototourism_dataparser import PhototourismDataParserConfig
+from nerfstudio.data.dataparsers.sdfstudio_dataparser import SDFStudioDataParserConfig
+from nerfstudio.data.dataparsers.sitcoms3d_dataparser import Sitcoms3DDataParserConfig
 from nerfstudio.data.datasets.depth_dataset import DepthDataset
 from nerfstudio.data.datasets.sdf_dataset import SDFDataset
 from nerfstudio.data.datasets.semantic_dataset import SemanticDataset
-from nerfstudio.data.pixel_samplers import (PairPixelSamplerConfig,
-                                            PixelSamplerConfig)
-from nerfstudio.engine.optimizers import (AdamOptimizerConfig,
-                                          RAdamOptimizerConfig)
-from nerfstudio.engine.schedulers import (CosineDecaySchedulerConfig,
-                                          ExponentialDecaySchedulerConfig,
-                                          MultiStepSchedulerConfig)
+from nerfstudio.data.pixel_samplers import PairPixelSamplerConfig
+from nerfstudio.engine.optimizers import AdamOptimizerConfig, RAdamOptimizerConfig
+from nerfstudio.engine.schedulers import (
+    CosineDecaySchedulerConfig,
+    ExponentialDecaySchedulerConfig,
+    MultiStepSchedulerConfig,
+)
 from nerfstudio.engine.trainer import TrainerConfig
-from nerfstudio.field_components.temporal_distortions import \
-    TemporalDistortionKind
+from nerfstudio.field_components.temporal_distortions import TemporalDistortionKind
 from nerfstudio.fields.sdf_field import SDFFieldConfig
 from nerfstudio.models.depth_nerfacto import DepthNerfactoModelConfig
 from nerfstudio.models.gaussian_splatting import GaussianSplattingModelConfig
diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py
index 66363c060e..d97ddd2a30 100644
--- a/nerfstudio/data/datamanagers/base_datamanager.py
+++ b/nerfstudio/data/datamanagers/base_datamanager.py
@@ -23,8 +23,22 @@
 from dataclasses import dataclass, field
 from functools import cached_property
 from pathlib import Path
-from typing import (Any, Callable, Dict, ForwardRef, Generic, List, Literal,
-                    Optional, Tuple, Type, Union, cast, get_args, get_origin)
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    ForwardRef,
+    Generic,
+    List,
+    Literal,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+    cast,
+    get_args,
+    get_origin,
+)
 
 import torch
 from torch import nn
@@ -38,17 +52,12 @@
 from nerfstudio.configs.base_config import InstantiateConfig
 from nerfstudio.configs.dataparser_configs import AnnotatedDataParserUnion
 from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs
-from nerfstudio.data.dataparsers.blender_dataparser import \
-    BlenderDataParserConfig
+from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig
 from nerfstudio.data.datasets.base_dataset import InputDataset
-from nerfstudio.data.pixel_samplers import (PatchPixelSamplerConfig,
-                                            PixelSampler, PixelSamplerConfig)
-from nerfstudio.data.utils.dataloaders import (CacheDataloader,
-                                               FixedIndicesEvalDataloader,
-                                               RandIndicesEvalDataloader)
+from nerfstudio.data.pixel_samplers import PatchPixelSamplerConfig, PixelSampler, PixelSamplerConfig
+from nerfstudio.data.utils.dataloaders import CacheDataloader, FixedIndicesEvalDataloader, RandIndicesEvalDataloader
 from nerfstudio.data.utils.nerfstudio_collate import nerfstudio_collate
-from nerfstudio.engine.callbacks import (TrainingCallback,
-                                         TrainingCallbackAttributes)
+from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes
 from nerfstudio.model_components.ray_generators import RayGenerator
 from nerfstudio.utils.misc import IterableWrapper, get_orig_class
 from nerfstudio.utils.rich_utils import CONSOLE
diff --git a/nerfstudio/data/datamanagers/parallel_datamanager.py b/nerfstudio/data/datamanagers/parallel_datamanager.py
index eb704ae3fd..bd66d01db8 100644
--- a/nerfstudio/data/datamanagers/parallel_datamanager.py
+++ b/nerfstudio/data/datamanagers/parallel_datamanager.py
@@ -32,13 +32,14 @@
 from nerfstudio.cameras.cameras import Cameras, CameraType
 from nerfstudio.cameras.rays import RayBundle
 from nerfstudio.data.datamanagers.base_datamanager import (
-    DataManager, TDataset, VanillaDataManagerConfig, variable_res_collate)
+    DataManager,
+    TDataset,
+    VanillaDataManagerConfig,
+    variable_res_collate,
+)
 from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs
-from nerfstudio.data.pixel_samplers import (PatchPixelSamplerConfig,
-                                            PixelSampler, PixelSamplerConfig)
-from nerfstudio.data.utils.dataloaders import (CacheDataloader,
-                                               FixedIndicesEvalDataloader,
-                                               RandIndicesEvalDataloader)
+from nerfstudio.data.pixel_samplers import PatchPixelSamplerConfig, PixelSampler, PixelSamplerConfig
+from nerfstudio.data.utils.dataloaders import CacheDataloader, FixedIndicesEvalDataloader, RandIndicesEvalDataloader
 from nerfstudio.model_components.ray_generators import RayGenerator
 from nerfstudio.utils.rich_utils import CONSOLE
 
diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py
index 4cca0e9304..e545bae781 100644
--- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py
+++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py
@@ -25,15 +25,15 @@
 from PIL import Image
 
 from nerfstudio.cameras import camera_utils
-from nerfstudio.cameras.cameras import (CAMERA_MODEL_TO_TYPE, Cameras,
-                                        CameraType)
-from nerfstudio.data.dataparsers.base_dataparser import (DataParser,
-                                                         DataParserConfig,
-                                                         DataparserOutputs)
+from nerfstudio.cameras.cameras import CAMERA_MODEL_TO_TYPE, Cameras, CameraType
+from nerfstudio.data.dataparsers.base_dataparser import DataParser, DataParserConfig, DataparserOutputs
 from nerfstudio.data.scene_box import SceneBox
 from nerfstudio.data.utils.dataparsers_utils import (
-    get_train_eval_split_all, get_train_eval_split_filename,
-    get_train_eval_split_fraction, get_train_eval_split_interval)
+    get_train_eval_split_all,
+    get_train_eval_split_filename,
+    get_train_eval_split_fraction,
+    get_train_eval_split_interval,
+)
 from nerfstudio.utils.io import load_from_json
 from nerfstudio.utils.rich_utils import CONSOLE
 
@@ -293,7 +293,7 @@ def _generate_dataparser_outputs(self, split="train"):
         # for both fisheye and non-fisheye datasets.
         metadata = {}
         if (camera_type in [CameraType.FISHEYE, CameraType.FISHEYE624]) and (fisheye_crop_radius is not None):
-            metadata['fisheye_crop_radius'] = fisheye_crop_radius
+            metadata["fisheye_crop_radius"] = fisheye_crop_radius
 
         cameras = Cameras(
             fx=fx,
diff --git a/nerfstudio/utils/tensor_dataclass.py b/nerfstudio/utils/tensor_dataclass.py
index 6c77eef6fe..293d978d7e 100644
--- a/nerfstudio/utils/tensor_dataclass.py
+++ b/nerfstudio/utils/tensor_dataclass.py
@@ -16,8 +16,7 @@
 
 import dataclasses
 from copy import deepcopy
-from typing import (Callable, Dict, List, NoReturn, Optional, Tuple, TypeVar,
-                    Union)
+from typing import Callable, Dict, List, NoReturn, Optional, Tuple, TypeVar, Union
 
 import numpy as np
 import torch

From 82d4f5ec67b7e7f0184daa39ba97f3b09319263f Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Tue, 16 Jan 2024 18:18:51 -0500
Subject: [PATCH 22/38] Apparently this file was missed when formatting
 originally

---
 nerfstudio/scripts/downloads/eyeful_tower.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nerfstudio/scripts/downloads/eyeful_tower.py b/nerfstudio/scripts/downloads/eyeful_tower.py
index afc7486c0b..2d1cf47dc3 100644
--- a/nerfstudio/scripts/downloads/eyeful_tower.py
+++ b/nerfstudio/scripts/downloads/eyeful_tower.py
@@ -46,7 +46,7 @@
 eyefultower_fisheye_radii = {
     "office1a": 0.43,
     "office2": 0.45,
-    "seating_area": 0.375, # could be .45 except for camera 2
+    "seating_area": 0.375,  # could be .45 except for camera 2
     "table": 0.45,
     "workshop": 0.45,
 }

From bf6d7b74cb14e8f979894a13ae70b89669b2b3b1 Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Thu, 18 Jan 2024 13:51:18 -0500
Subject: [PATCH 23/38] Added 1k resolution scenes

---
 nerfstudio/scripts/downloads/eyeful_tower.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nerfstudio/scripts/downloads/eyeful_tower.py b/nerfstudio/scripts/downloads/eyeful_tower.py
index 2d1cf47dc3..df49498cd9 100644
--- a/nerfstudio/scripts/downloads/eyeful_tower.py
+++ b/nerfstudio/scripts/downloads/eyeful_tower.py
@@ -62,6 +62,7 @@ class EyefulTowerResolutionMetadata:
 
 eyefultower_resolutions = {
     "all": None,
+    "jpeg_1k": EyefulTowerResolutionMetadata("images-jpeg-1k", 684, 1024, "jpg"),
     "jpeg_2k": EyefulTowerResolutionMetadata("images-jpeg-2k", 1368, 2048, "jpg"),
     "jpeg_4k": EyefulTowerResolutionMetadata("images-jpeg-4k", 2736, 4096, "jpg"),
     "jpeg_8k": EyefulTowerResolutionMetadata("images-jpeg", 5784, 8660, "jpg"),

From 71266942dd762789db93801857016fc0a2397a66 Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Thu, 18 Jan 2024 13:55:17 -0500
Subject: [PATCH 24/38] revert method_configs.py to original values

---
 nerfstudio/configs/method_configs.py | 43 ++--------------------------
 1 file changed, 2 insertions(+), 41 deletions(-)

diff --git a/nerfstudio/configs/method_configs.py b/nerfstudio/configs/method_configs.py
index f0dd59c652..55ba44eb41 100644
--- a/nerfstudio/configs/method_configs.py
+++ b/nerfstudio/configs/method_configs.py
@@ -69,7 +69,6 @@
 method_configs: Dict[str, TrainerConfig] = {}
 descriptions = {
     "nerfacto": "Recommended real-time model tuned for real captures. This model will be continually updated.",
-    "nerfacto-eyeful-tower": "Variant of nerfacto with settings tuned for EyefulTower dataset scenes.",
     "depth-nerfacto": "Nerfacto with depth supervision.",
     "instant-ngp": "Implementation of Instant-NGP. Recommended real-time model for unbounded scenes.",
     "instant-ngp-bounded": "Implementation of Instant-NGP. Recommended for bounded real and synthetic scenes",
@@ -210,45 +209,6 @@
     vis="viewer",
 )
 
-method_configs["nerfacto-eyeful-tower"] = TrainerConfig(
-    method_name="nerfacto",
-    steps_per_eval_batch=500,
-    steps_per_save=2000,
-    max_num_iterations=100_000,
-    steps_per_eval_all_images=100_000,
-    mixed_precision=True,
-    pipeline=VanillaPipelineConfig(
-        datamanager=ParallelDataManagerConfig(
-            dataparser=NerfstudioDataParserConfig(),
-            train_num_rays_per_batch=12_800,
-            eval_num_rays_per_batch=4096,
-        ),
-        model=NerfactoModelConfig(
-            eval_num_rays_per_chunk=1 << 15,
-            camera_optimizer=CameraOptimizerConfig(mode="off"),
-            max_res=19_912,
-            log2_hashmap_size=22,
-            far_plane=100.0,
-        ),
-    ),
-    optimizers={
-        "proposal_networks": {
-            "optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15),
-            "scheduler": ExponentialDecaySchedulerConfig(lr_final=0.0001, max_steps=200000),
-        },
-        "fields": {
-            "optimizer": AdamOptimizerConfig(lr=1e-2, eps=1e-15),
-            "scheduler": ExponentialDecaySchedulerConfig(lr_final=0.0001, max_steps=200000),
-        },
-        "camera_opt": {
-            "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15),
-            "scheduler": ExponentialDecaySchedulerConfig(lr_final=1e-4, max_steps=5000),
-        },
-    },
-    viewer=ViewerConfig(num_rays_per_chunk=1 << 15),
-    vis="viewer+tensorboard",
-)
-
 method_configs["depth-nerfacto"] = TrainerConfig(
     method_name="depth-nerfacto",
     steps_per_eval_batch=500,
@@ -337,7 +297,8 @@
     viewer=ViewerConfig(num_rays_per_chunk=1 << 12),
     vis="viewer",
 )
-
+#
+#
 method_configs["mipnerf"] = TrainerConfig(
     method_name="mipnerf",
     pipeline=VanillaPipelineConfig(

From e24ffe0718f248f9c5024b28c94d4093818e7ee2 Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Thu, 18 Jan 2024 15:59:00 -0500
Subject: [PATCH 25/38] Also add 1k exrs

---
 nerfstudio/scripts/downloads/eyeful_tower.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nerfstudio/scripts/downloads/eyeful_tower.py b/nerfstudio/scripts/downloads/eyeful_tower.py
index df49498cd9..3bab323cc6 100644
--- a/nerfstudio/scripts/downloads/eyeful_tower.py
+++ b/nerfstudio/scripts/downloads/eyeful_tower.py
@@ -66,6 +66,7 @@ class EyefulTowerResolutionMetadata:
     "jpeg_2k": EyefulTowerResolutionMetadata("images-jpeg-2k", 1368, 2048, "jpg"),
     "jpeg_4k": EyefulTowerResolutionMetadata("images-jpeg-4k", 2736, 4096, "jpg"),
     "jpeg_8k": EyefulTowerResolutionMetadata("images-jpeg", 5784, 8660, "jpg"),
+    "exr_1k": EyefulTowerResolutionMetadata("images-1k", 684, 1024, "exr"),
     "exr_2k": EyefulTowerResolutionMetadata("images-2k", 1368, 2048, "exr"),
 }
 

From a022a578db634dd675938c812ca701cd47260c3c Mon Sep 17 00:00:00 2001
From: xucr <xucr@shanghaitech.edu.cn>
Date: Thu, 18 Jan 2024 18:29:17 -0800
Subject: [PATCH 26/38] Add option to modify bg color in gaussian splatting

---
 .../data/dataparsers/blender_dataparser.py    |  2 --
 nerfstudio/model_components/renderers.py      |  6 -----
 nerfstudio/models/gaussian_splatting.py       | 23 ++++++++++++++-----
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/nerfstudio/data/dataparsers/blender_dataparser.py b/nerfstudio/data/dataparsers/blender_dataparser.py
index d8172579c7..b7c90c907e 100644
--- a/nerfstudio/data/dataparsers/blender_dataparser.py
+++ b/nerfstudio/data/dataparsers/blender_dataparser.py
@@ -32,7 +32,6 @@
 from nerfstudio.data.scene_box import SceneBox
 from nerfstudio.utils.colors import get_color
 from nerfstudio.utils.io import load_from_json
-from nerfstudio.model_components.renderers import force_background_color_override
 
 
 @dataclass
@@ -64,7 +63,6 @@ def __init__(self, config: BlenderDataParserConfig):
         self.alpha_color = config.alpha_color
         if self.alpha_color is not None:
             self.alpha_color_tensor = get_color(self.alpha_color)
-            force_background_color_override(self.alpha_color_tensor)
         else:
             self.alpha_color_tensor = None
 
diff --git a/nerfstudio/model_components/renderers.py b/nerfstudio/model_components/renderers.py
index bbcca06624..1fde0d693c 100644
--- a/nerfstudio/model_components/renderers.py
+++ b/nerfstudio/model_components/renderers.py
@@ -55,12 +55,6 @@ def background_color_override_context(mode: Float[Tensor, "3"]) -> Generator[Non
         BACKGROUND_COLOR_OVERRIDE = old_background_color
 
 
-def force_background_color_override(mode: Float[Tensor, "3"]) -> None:
-    """Force background color override."""
-    global BACKGROUND_COLOR_OVERRIDE
-    BACKGROUND_COLOR_OVERRIDE = mode
-
-
 class RGBRenderer(nn.Module):
     """Standard volumetric rendering.
 
diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py
index 70f20706d8..081880ee2c 100644
--- a/nerfstudio/models/gaussian_splatting.py
+++ b/nerfstudio/models/gaussian_splatting.py
@@ -22,6 +22,7 @@
 import math
 from dataclasses import dataclass, field
 from typing import Dict, List, Optional, Tuple, Type, Union
+from typing_extensions import Literal
 
 import numpy as np
 import torch
@@ -46,6 +47,7 @@
 # need following import for background color override
 from nerfstudio.model_components import renderers
 from nerfstudio.models.base_model import Model, ModelConfig
+from nerfstudio.utils.colors import get_color
 from nerfstudio.utils.rich_utils import CONSOLE
 
 
@@ -111,10 +113,12 @@ class GaussianSplattingModelConfig(ModelConfig):
     _target: Type = field(default_factory=lambda: GaussianSplattingModel)
     warmup_length: int = 500
     """period of steps where refinement is turned off"""
-    refine_every: int = 100
+    refine_every: int = 150
     """period of steps where gaussians are culled and densified"""
     resolution_schedule: int = 250
     """training starts at 1/d resolution, every n steps this is doubled"""
+    background_color: Literal["random", "black", "white"] = "random"
+    """Whether to randomize the background color."""
     num_downscales: int = 0
     """at the beginning, resolution is 1/2^d, where d is this number"""
     cull_alpha_thresh: float = 0.1
@@ -211,7 +215,10 @@ def populate_modules(self):
         self.step = 0
 
         self.crop_box: Optional[OrientedBox] = None
-        self.back_color = torch.zeros(3)
+        if self.config.background_color == "random":
+            self.back_color = torch.rand(3)
+        else:
+            self.back_color = get_color(self.config.background_color)
 
         self.camera_optimizer: CameraOptimizer = self.config.camera_optimizer.setup(
             num_cameras=self.num_train_data, device="cpu"
@@ -596,13 +603,17 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
             # currently relies on the branch vickie/camera-grads
             self.camera_optimizer.apply_to_camera(camera)
         # get the background color
-        if renderers.BACKGROUND_COLOR_OVERRIDE is not None:
-            background = renderers.BACKGROUND_COLOR_OVERRIDE.to(self.device)
-        else:
-            if self.training:
+
+        if self.training:
+            if self.config.background_color == "random":
                 background = torch.rand(3, device=self.device)
             else:
                 background = self.back_color.to(self.device)
+        else:
+            if renderers.BACKGROUND_COLOR_OVERRIDE is not None:
+                background = renderers.BACKGROUND_COLOR_OVERRIDE.to(self.device)
+            else:
+                background = self.back_color.to(self.device)
 
         if self.crop_box is not None and not self.training:
             crop_ids = self.crop_box.within(self.means).squeeze()

From 999e63c664019ff6d70f2fa637386a726d2158bf Mon Sep 17 00:00:00 2001
From: xucr <xucr@shanghaitech.edu.cn>
Date: Thu, 18 Jan 2024 18:43:44 -0800
Subject: [PATCH 27/38] fix back the config, bg color should work now

---
 nerfstudio/models/gaussian_splatting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py
index f7280d1831..f8a02dae58 100644
--- a/nerfstudio/models/gaussian_splatting.py
+++ b/nerfstudio/models/gaussian_splatting.py
@@ -107,7 +107,7 @@ class GaussianSplattingModelConfig(ModelConfig):
     _target: Type = field(default_factory=lambda: GaussianSplattingModel)
     warmup_length: int = 500
     """period of steps where refinement is turned off"""
-    refine_every: int = 150
+    refine_every: int = 100
     """period of steps where gaussians are culled and densified"""
     resolution_schedule: int = 250
     """training starts at 1/d resolution, every n steps this is doubled"""

From bae5a5f47df89076de7c5641c3410936df1ffd08 Mon Sep 17 00:00:00 2001
From: xucr <xucr@shanghaitech.edu.cn>
Date: Thu, 18 Jan 2024 18:51:25 -0800
Subject: [PATCH 28/38] removed camera optimizer for gs to align with main

---
 nerfstudio/models/gaussian_splatting.py | 266 ++++++++++++++++++------
 1 file changed, 208 insertions(+), 58 deletions(-)

diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py
index f8a02dae58..8092db00a0 100644
--- a/nerfstudio/models/gaussian_splatting.py
+++ b/nerfstudio/models/gaussian_splatting.py
@@ -35,7 +35,11 @@
 
 from nerfstudio.cameras.cameras import Cameras
 from nerfstudio.data.scene_box import OrientedBox
-from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes, TrainingCallbackLocation
+from nerfstudio.engine.callbacks import (
+    TrainingCallback,
+    TrainingCallbackAttributes,
+    TrainingCallbackLocation,
+)
 from nerfstudio.engine.optimizers import Optimizers
 
 # need following import for background color override
@@ -79,7 +83,9 @@ def SH2RGB(sh):
     return sh * C0 + 0.5
 
 
-def projection_matrix(znear, zfar, fovx, fovy, device: Union[str, torch.device] = "cpu"):
+def projection_matrix(
+    znear, zfar, fovx, fovy, device: Union[str, torch.device] = "cpu"
+):
     """
     Constructs an OpenGL-style perspective projection matrix.
     """
@@ -111,7 +117,7 @@ class GaussianSplattingModelConfig(ModelConfig):
     """period of steps where gaussians are culled and densified"""
     resolution_schedule: int = 250
     """training starts at 1/d resolution, every n steps this is doubled"""
-    background_color: Literal["random", "black", "white"] = "random"
+    background_color: Literal["random", "black", "white"] = "white"
     """Whether to randomize the background color."""
     num_downscales: int = 0
     """at the beginning, resolution is 1/2^d, where d is this number"""
@@ -196,9 +202,13 @@ def populate_modules(self):
             self.features_rest = torch.nn.Parameter(shs[:, 1:, :])
         else:
             self.features_dc = torch.nn.Parameter(torch.rand(self.num_points, 3))
-            self.features_rest = torch.nn.Parameter(torch.zeros((self.num_points, dim_sh - 1, 3)))
+            self.features_rest = torch.nn.Parameter(
+                torch.zeros((self.num_points, dim_sh - 1, 3))
+            )
 
-        self.opacities = torch.nn.Parameter(torch.logit(0.1 * torch.ones(self.num_points, 1)))
+        self.opacities = torch.nn.Parameter(
+            torch.logit(0.1 * torch.ones(self.num_points, 1))
+        )
 
         # metrics
         from torchmetrics.image import PeakSignalNoiseRatio
@@ -240,7 +250,9 @@ def load_state_dict(self, dict, **kwargs):  # type: ignore
         self.opacities = torch.nn.Parameter(torch.zeros(newp, 1, device=self.device))
         self.features_dc = torch.nn.Parameter(torch.zeros(newp, 3, device=self.device))
         self.features_rest = torch.nn.Parameter(
-            torch.zeros(newp, num_sh_bases(self.config.sh_degree) - 1, 3, device=self.device)
+            torch.zeros(
+                newp, num_sh_bases(self.config.sh_degree) - 1, 3, device=self.device
+            )
         )
         super().load_state_dict(dict, **kwargs)
 
@@ -256,7 +268,9 @@ def k_nearest_sklearn(self, x: torch.Tensor, k: int):
         # Build the nearest neighbors model
         from sklearn.neighbors import NearestNeighbors
 
-        nn_model = NearestNeighbors(n_neighbors=k + 1, algorithm="auto", metric="euclidean").fit(x_np)
+        nn_model = NearestNeighbors(
+            n_neighbors=k + 1, algorithm="auto", metric="euclidean"
+        ).fit(x_np)
 
         # Find the k-nearest neighbors
         distances, indices = nn_model.kneighbors(x_np)
@@ -295,13 +309,20 @@ def dup_in_optim(self, optimizer, dup_mask, new_params, n=2):
         param_state = optimizer.state[param]
         repeat_dims = (n,) + tuple(1 for _ in range(param_state["exp_avg"].dim() - 1))
         param_state["exp_avg"] = torch.cat(
-            [param_state["exp_avg"], torch.zeros_like(param_state["exp_avg"][dup_mask.squeeze()]).repeat(*repeat_dims)],
+            [
+                param_state["exp_avg"],
+                torch.zeros_like(param_state["exp_avg"][dup_mask.squeeze()]).repeat(
+                    *repeat_dims
+                ),
+            ],
             dim=0,
         )
         param_state["exp_avg_sq"] = torch.cat(
             [
                 param_state["exp_avg_sq"],
-                torch.zeros_like(param_state["exp_avg_sq"][dup_mask.squeeze()]).repeat(*repeat_dims),
+                torch.zeros_like(param_state["exp_avg_sq"][dup_mask.squeeze()]).repeat(
+                    *repeat_dims
+                ),
             ],
             dim=0,
         )
@@ -332,14 +353,17 @@ def after_train(self, step: int):
             else:
                 assert self.vis_counts is not None
                 self.vis_counts[visible_mask] = self.vis_counts[visible_mask] + 1
-                self.xys_grad_norm[visible_mask] = grads[visible_mask] + self.xys_grad_norm[visible_mask]
+                self.xys_grad_norm[visible_mask] = (
+                    grads[visible_mask] + self.xys_grad_norm[visible_mask]
+                )
 
             # update the max screen size, as a ratio of number of pixels
             if self.max_2Dsize is None:
                 self.max_2Dsize = torch.zeros_like(self.radii, dtype=torch.float32)
             newradii = self.radii.detach()[visible_mask]
             self.max_2Dsize[visible_mask] = torch.maximum(
-                self.max_2Dsize[visible_mask], newradii / float(max(self.last_size[0], self.last_size[1]))
+                self.max_2Dsize[visible_mask],
+                newradii / float(max(self.last_size[0], self.last_size[1])),
             )
 
     def set_crop(self, crop_box: Optional[OrientedBox]):
@@ -361,16 +385,30 @@ def refinement_after(self, optimizers: Optimizers, step):
             reset_interval = self.config.reset_alpha_every * self.config.refine_every
             do_densification = (
                 self.step < self.config.stop_split_at
-                and self.step % reset_interval > self.num_train_data + self.config.refine_every
+                and self.step % reset_interval
+                > self.num_train_data + self.config.refine_every
             )
             if do_densification:
                 # then we densify
-                assert self.xys_grad_norm is not None and self.vis_counts is not None and self.max_2Dsize is not None
-                avg_grad_norm = (self.xys_grad_norm / self.vis_counts) * 0.5 * max(self.last_size[0], self.last_size[1])
+                assert (
+                    self.xys_grad_norm is not None
+                    and self.vis_counts is not None
+                    and self.max_2Dsize is not None
+                )
+                avg_grad_norm = (
+                    (self.xys_grad_norm / self.vis_counts)
+                    * 0.5
+                    * max(self.last_size[0], self.last_size[1])
+                )
                 high_grads = (avg_grad_norm > self.config.densify_grad_thresh).squeeze()
-                splits = (self.scales.exp().max(dim=-1).values > self.config.densify_size_thresh).squeeze()
+                splits = (
+                    self.scales.exp().max(dim=-1).values
+                    > self.config.densify_size_thresh
+                ).squeeze()
                 if self.step < self.config.stop_screen_size_at:
-                    splits |= (self.max_2Dsize > self.config.split_screen_size).squeeze()
+                    splits |= (
+                        self.max_2Dsize > self.config.split_screen_size
+                    ).squeeze()
                 splits &= high_grads
                 nsamps = self.config.n_split_samples
                 (
@@ -382,7 +420,10 @@ def refinement_after(self, optimizers: Optimizers, step):
                     split_quats,
                 ) = self.split_gaussians(splits, nsamps)
 
-                dups = (self.scales.exp().max(dim=-1).values <= self.config.densify_size_thresh).squeeze()
+                dups = (
+                    self.scales.exp().max(dim=-1).values
+                    <= self.config.densify_size_thresh
+                ).squeeze()
                 dups &= high_grads
                 (
                     dup_means,
@@ -392,19 +433,43 @@ def refinement_after(self, optimizers: Optimizers, step):
                     dup_scales,
                     dup_quats,
                 ) = self.dup_gaussians(dups)
-                self.means = Parameter(torch.cat([self.means.detach(), split_means, dup_means], dim=0))
+                self.means = Parameter(
+                    torch.cat([self.means.detach(), split_means, dup_means], dim=0)
+                )
                 self.features_dc = Parameter(
-                    torch.cat([self.features_dc.detach(), split_features_dc, dup_features_dc], dim=0)
+                    torch.cat(
+                        [self.features_dc.detach(), split_features_dc, dup_features_dc],
+                        dim=0,
+                    )
                 )
                 self.features_rest = Parameter(
-                    torch.cat([self.features_rest.detach(), split_features_rest, dup_features_rest], dim=0)
+                    torch.cat(
+                        [
+                            self.features_rest.detach(),
+                            split_features_rest,
+                            dup_features_rest,
+                        ],
+                        dim=0,
+                    )
+                )
+                self.opacities = Parameter(
+                    torch.cat(
+                        [self.opacities.detach(), split_opacities, dup_opacities], dim=0
+                    )
+                )
+                self.scales = Parameter(
+                    torch.cat([self.scales.detach(), split_scales, dup_scales], dim=0)
+                )
+                self.quats = Parameter(
+                    torch.cat([self.quats.detach(), split_quats, dup_quats], dim=0)
                 )
-                self.opacities = Parameter(torch.cat([self.opacities.detach(), split_opacities, dup_opacities], dim=0))
-                self.scales = Parameter(torch.cat([self.scales.detach(), split_scales, dup_scales], dim=0))
-                self.quats = Parameter(torch.cat([self.quats.detach(), split_quats, dup_quats], dim=0))
                 # append zeros to the max_2Dsize tensor
                 self.max_2Dsize = torch.cat(
-                    [self.max_2Dsize, torch.zeros_like(split_scales[:, 0]), torch.zeros_like(dup_scales[:, 0])],
+                    [
+                        self.max_2Dsize,
+                        torch.zeros_like(split_scales[:, 0]),
+                        torch.zeros_like(dup_scales[:, 0]),
+                    ],
                     dim=0,
                 )
 
@@ -416,11 +481,21 @@ def refinement_after(self, optimizers: Optimizers, step):
 
                 # After a guassian is split into two new gaussians, the original one should also be pruned.
                 splits_mask = torch.cat(
-                    (splits, torch.zeros(nsamps * splits.sum() + dups.sum(), device=self.device, dtype=torch.bool))
+                    (
+                        splits,
+                        torch.zeros(
+                            nsamps * splits.sum() + dups.sum(),
+                            device=self.device,
+                            dtype=torch.bool,
+                        ),
+                    )
                 )
 
                 deleted_mask = self.cull_gaussians(splits_mask)
-            elif self.step >= self.config.stop_split_at and self.config.continue_cull_post_densification:
+            elif (
+                self.step >= self.config.stop_split_at
+                and self.config.continue_cull_post_densification
+            ):
                 deleted_mask = self.cull_gaussians()
             else:
                 # if we donot allow culling post refinement, no more gaussians will be pruned.
@@ -429,11 +504,17 @@ def refinement_after(self, optimizers: Optimizers, step):
             if deleted_mask is not None:
                 self.remove_from_all_optim(optimizers, deleted_mask)
 
-            if self.step < self.config.stop_split_at and self.step % reset_interval == self.config.refine_every:
+            if (
+                self.step < self.config.stop_split_at
+                and self.step % reset_interval == self.config.refine_every
+            ):
                 # Reset value is set to be twice of the cull_alpha_thresh
                 reset_value = self.config.cull_alpha_thresh * 2.0
                 self.opacities.data = torch.clamp(
-                    self.opacities.data, max=torch.logit(torch.tensor(reset_value, device=self.device)).item()
+                    self.opacities.data,
+                    max=torch.logit(
+                        torch.tensor(reset_value, device=self.device)
+                    ).item(),
                 )
                 # reset the exp of optimizer
                 optim = optimizers.optimizers["opacity"]
@@ -453,18 +534,25 @@ def cull_gaussians(self, extra_cull_mask: Optional[torch.Tensor] = None):
         """
         n_bef = self.num_points
         # cull transparent ones
-        culls = (torch.sigmoid(self.opacities) < self.config.cull_alpha_thresh).squeeze()
+        culls = (
+            torch.sigmoid(self.opacities) < self.config.cull_alpha_thresh
+        ).squeeze()
         below_alpha_count = torch.sum(culls).item()
         toobigs_count = 0
         if extra_cull_mask is not None:
             culls = culls | extra_cull_mask
         if self.step > self.config.refine_every * self.config.reset_alpha_every:
             # cull huge ones
-            toobigs = (torch.exp(self.scales).max(dim=-1).values > self.config.cull_scale_thresh).squeeze()
+            toobigs = (
+                torch.exp(self.scales).max(dim=-1).values
+                > self.config.cull_scale_thresh
+            ).squeeze()
             if self.step < self.config.stop_screen_size_at:
                 # cull big screen space
                 assert self.max_2Dsize is not None
-                toobigs = toobigs | (self.max_2Dsize > self.config.cull_screen_size).squeeze()
+                toobigs = (
+                    toobigs | (self.max_2Dsize > self.config.cull_screen_size).squeeze()
+                )
             culls = culls | toobigs
             toobigs_count = torch.sum(toobigs).item()
         self.means = Parameter(self.means[~culls].detach())
@@ -487,12 +575,18 @@ def split_gaussians(self, split_mask, samps):
         """
 
         n_splits = split_mask.sum().item()
-        CONSOLE.log(f"Splitting {split_mask.sum().item()/self.num_points} gaussians: {n_splits}/{self.num_points}")
-        centered_samples = torch.randn((samps * n_splits, 3), device=self.device)  # Nx3 of axis-aligned scales
+        CONSOLE.log(
+            f"Splitting {split_mask.sum().item()/self.num_points} gaussians: {n_splits}/{self.num_points}"
+        )
+        centered_samples = torch.randn(
+            (samps * n_splits, 3), device=self.device
+        )  # Nx3 of axis-aligned scales
         scaled_samples = (
             torch.exp(self.scales[split_mask].repeat(samps, 1)) * centered_samples
         )  # how these scales are rotated
-        quats = self.quats[split_mask] / self.quats[split_mask].norm(dim=-1, keepdim=True)  # normalize them first
+        quats = self.quats[split_mask] / self.quats[split_mask].norm(
+            dim=-1, keepdim=True
+        )  # normalize them first
         rots = quat_to_rotmat(quats.repeat(samps, 1))  # how these scales are rotated
         rotated_samples = torch.bmm(rots, scaled_samples[..., None]).squeeze()
         new_means = rotated_samples + self.means[split_mask].repeat(samps, 1)
@@ -503,25 +597,45 @@ def split_gaussians(self, split_mask, samps):
         new_opacities = self.opacities[split_mask].repeat(samps, 1)
         # step 4, sample new scales
         size_fac = 1.6
-        new_scales = torch.log(torch.exp(self.scales[split_mask]) / size_fac).repeat(samps, 1)
-        self.scales[split_mask] = torch.log(torch.exp(self.scales[split_mask]) / size_fac)
+        new_scales = torch.log(torch.exp(self.scales[split_mask]) / size_fac).repeat(
+            samps, 1
+        )
+        self.scales[split_mask] = torch.log(
+            torch.exp(self.scales[split_mask]) / size_fac
+        )
         # step 5, sample new quats
         new_quats = self.quats[split_mask].repeat(samps, 1)
-        return new_means, new_features_dc, new_features_rest, new_opacities, new_scales, new_quats
+        return (
+            new_means,
+            new_features_dc,
+            new_features_rest,
+            new_opacities,
+            new_scales,
+            new_quats,
+        )
 
     def dup_gaussians(self, dup_mask):
         """
         This function duplicates gaussians that are too small
         """
         n_dups = dup_mask.sum().item()
-        CONSOLE.log(f"Duplicating {dup_mask.sum().item()/self.num_points} gaussians: {n_dups}/{self.num_points}")
+        CONSOLE.log(
+            f"Duplicating {dup_mask.sum().item()/self.num_points} gaussians: {n_dups}/{self.num_points}"
+        )
         dup_means = self.means[dup_mask]
         dup_features_dc = self.features_dc[dup_mask]
         dup_features_rest = self.features_rest[dup_mask]
         dup_opacities = self.opacities[dup_mask]
         dup_scales = self.scales[dup_mask]
         dup_quats = self.quats[dup_mask]
-        return dup_means, dup_features_dc, dup_features_rest, dup_opacities, dup_scales, dup_quats
+        return (
+            dup_means,
+            dup_features_dc,
+            dup_features_rest,
+            dup_opacities,
+            dup_scales,
+            dup_quats,
+        )
 
     @property
     def num_points(self):
@@ -531,7 +645,11 @@ def get_training_callbacks(
         self, training_callback_attributes: TrainingCallbackAttributes
     ) -> List[TrainingCallback]:
         cbs = []
-        cbs.append(TrainingCallback([TrainingCallbackLocation.BEFORE_TRAIN_ITERATION], self.step_cb))
+        cbs.append(
+            TrainingCallback(
+                [TrainingCallbackLocation.BEFORE_TRAIN_ITERATION], self.step_cb
+            )
+        )
         # The order of these matters
         cbs.append(
             TrainingCallback(
@@ -573,7 +691,13 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]:
 
     def _get_downscale_factor(self):
         if self.training:
-            return 2 ** max((self.config.num_downscales - self.step // self.config.resolution_schedule), 0)
+            return 2 ** max(
+                (
+                    self.config.num_downscales
+                    - self.step // self.config.resolution_schedule
+                ),
+                0,
+            )
         else:
             return 1
 
@@ -591,10 +715,7 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
             print("Called get_outputs with not a camera")
             return {}
         assert camera.shape[0] == 1, "Only one camera at a time"
-        if self.training:
-            # currently relies on the branch vickie/camera-grads
-            self.camera_optimizer.apply_to_camera(camera)
-            
+
         # get the background color
         if self.training:
             if self.config.background_color == "random":
@@ -610,7 +731,11 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
         if self.crop_box is not None and not self.training:
             crop_ids = self.crop_box.within(self.means).squeeze()
             if crop_ids.sum() == 0:
-                return {"rgb": background.repeat(int(camera.height.item()), int(camera.width.item()), 1)}
+                return {
+                    "rgb": background.repeat(
+                        int(camera.height.item()), int(camera.width.item()), 1
+                    )
+                }
         else:
             crop_ids = None
         camera_downscale = self._get_downscale_factor()
@@ -619,7 +744,9 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
         R = camera.camera_to_worlds[0, :3, :3]  # 3 x 3
         T = camera.camera_to_worlds[0, :3, 3:4]  # 3 x 1
         # flip the z and y axes to align with gsplat conventions
-        R_edit = torch.diag(torch.tensor([1, -1, -1], device=self.device, dtype=R.dtype))
+        R_edit = torch.diag(
+            torch.tensor([1, -1, -1], device=self.device, dtype=R.dtype)
+        )
         R = R @ R_edit
         # analytic matrix inverse to get world2camera matrix
         R_inv = R.T
@@ -657,7 +784,9 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
             scales_crop = self.scales
             quats_crop = self.quats
 
-        colors_crop = torch.cat((features_dc_crop[:, None, :], features_rest_crop), dim=1)
+        colors_crop = torch.cat(
+            (features_dc_crop[:, None, :], features_rest_crop), dim=1
+        )
 
         self.xys, depths, self.radii, conics, num_tiles_hit, cov3d = project_gaussians(  # type: ignore
             means_crop,
@@ -675,14 +804,20 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
             tile_bounds,
         )  # type: ignore
         if (self.radii).sum() == 0:
-            return {"rgb": background.repeat(int(camera.height.item()), int(camera.width.item()), 1)}
+            return {
+                "rgb": background.repeat(
+                    int(camera.height.item()), int(camera.width.item()), 1
+                )
+            }
 
         # Important to allow xys grads to populate properly
         if self.training:
             self.xys.retain_grad()
 
         if self.config.sh_degree > 0:
-            viewdirs = means_crop.detach() - camera.camera_to_worlds.detach()[..., :3, 3]  # (N, 3)
+            viewdirs = (
+                means_crop.detach() - camera.camera_to_worlds.detach()[..., :3, 3]
+            )  # (N, 3)
             viewdirs = viewdirs / viewdirs.norm(dim=-1, keepdim=True)
             n = min(self.step // self.config.sh_degree_interval, self.config.sh_degree)
             rgbs = spherical_harmonics(n, viewdirs, colors_crop)
@@ -721,7 +856,9 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
                 H,
                 W,
                 background=torch.ones(3, device=self.device) * 10,
-            )[..., 0:1]  # type: ignore
+            )[
+                ..., 0:1
+            ]  # type: ignore
 
         return {"rgb": rgb, "depth": depth_im}  # type: ignore
 
@@ -740,7 +877,9 @@ def get_gt_img(self, image: torch.Tensor):
             # torchvision can be slow to import, so we do it lazily.
             import torchvision.transforms.functional as TF
 
-            gt_img = TF.resize(image.permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0)
+            gt_img = TF.resize(image.permute(2, 0, 1), newsize, antialias=None).permute(
+                1, 2, 0
+            )
         else:
             gt_img = image
         return gt_img.to(self.device)
@@ -760,7 +899,9 @@ def get_metrics_dict(self, outputs, batch) -> Dict[str, torch.Tensor]:
         metrics_dict["gaussian_count"] = self.num_points
         return metrics_dict
 
-    def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Tensor]:
+    def get_loss_dict(
+        self, outputs, batch, metrics_dict=None
+    ) -> Dict[str, torch.Tensor]:
         """Computes and returns the losses dict.
 
         Args:
@@ -770,12 +911,16 @@ def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Te
         """
         gt_img = self.get_gt_img(batch["image"])
         Ll1 = torch.abs(gt_img - outputs["rgb"]).mean()
-        simloss = 1 - self.ssim(gt_img.permute(2, 0, 1)[None, ...], outputs["rgb"].permute(2, 0, 1)[None, ...])
+        simloss = 1 - self.ssim(
+            gt_img.permute(2, 0, 1)[None, ...],
+            outputs["rgb"].permute(2, 0, 1)[None, ...],
+        )
         if self.config.use_scale_regularization and self.step % 10 == 0:
             scale_exp = torch.exp(self.scales)
             scale_reg = (
                 torch.maximum(
-                    scale_exp.amax(dim=-1) / scale_exp.amin(dim=-1), torch.tensor(self.config.max_gauss_ratio)
+                    scale_exp.amax(dim=-1) / scale_exp.amin(dim=-1),
+                    torch.tensor(self.config.max_gauss_ratio),
                 )
                 - self.config.max_gauss_ratio
             )
@@ -784,12 +929,15 @@ def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Te
             scale_reg = torch.tensor(0.0).to(self.device)
 
         return {
-            "main_loss": (1 - self.config.ssim_lambda) * Ll1 + self.config.ssim_lambda * simloss,
+            "main_loss": (1 - self.config.ssim_lambda) * Ll1
+            + self.config.ssim_lambda * simloss,
             "scale_reg": scale_reg,
         }
 
     @torch.no_grad()
-    def get_outputs_for_camera(self, camera: Cameras, obb_box: Optional[OrientedBox] = None) -> Dict[str, torch.Tensor]:
+    def get_outputs_for_camera(
+        self, camera: Cameras, obb_box: Optional[OrientedBox] = None
+    ) -> Dict[str, torch.Tensor]:
         """Takes in a camera, generates the raybundle, and computes the output of the model.
         Overridden for a camera-based gaussian model.
 
@@ -822,7 +970,9 @@ def get_image_metrics_and_images(
             import torchvision.transforms.functional as TF
 
             newsize = [batch["image"].shape[0] // d, batch["image"].shape[1] // d]
-            predicted_rgb = TF.resize(outputs["rgb"].permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0)
+            predicted_rgb = TF.resize(
+                outputs["rgb"].permute(2, 0, 1), newsize, antialias=None
+            ).permute(1, 2, 0)
         else:
             predicted_rgb = outputs["rgb"]
 

From 44581bf75ec084fb3d4e913acc99eb368e40ddc4 Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Thu, 18 Jan 2024 23:44:44 -0500
Subject: [PATCH 29/38] Address feedback

---
 nerfstudio/process_data/process_data_utils.py |   8 +-
 nerfstudio/scripts/downloads/eyeful_tower.py  | 103 ++++++++++++++----
 2 files changed, 89 insertions(+), 22 deletions(-)

diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py
index 13682f90b7..91132fe4ac 100644
--- a/nerfstudio/process_data/process_data_utils.py
+++ b/nerfstudio/process_data/process_data_utils.py
@@ -58,16 +58,18 @@ class CameraModel(Enum):
 }
 
 
-def list_images(data: Path) -> List[Path]:
+def list_images(data: Path, recursive: bool = False) -> List[Path]:
     """Lists all supported images in a directory
 
     Args:
-        data: Path to the directory of images. Nested folders are searched as well.
+        data: Path to the directory of images.
+        recursive: Whether to search check nested folders in `data`.
     Returns:
         Paths to images contained in the directory
     """
     allowed_exts = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] + ALLOWED_RAW_EXTS
-    image_paths = sorted([p for p in data.glob("**/[!.]*") if p.suffix.lower() in allowed_exts])
+    glob = "**/[!.]*" if recursive else "[!.]*"
+    image_paths = sorted([p for p in data.glob(glob) if p.suffix.lower() in allowed_exts])
     return image_paths
 
 
diff --git a/nerfstudio/scripts/downloads/eyeful_tower.py b/nerfstudio/scripts/downloads/eyeful_tower.py
index 3bab323cc6..23fe080e68 100644
--- a/nerfstudio/scripts/downloads/eyeful_tower.py
+++ b/nerfstudio/scripts/downloads/eyeful_tower.py
@@ -26,6 +26,7 @@
 import tyro
 
 from nerfstudio.scripts.downloads.utils import DatasetDownload
+from nerfstudio.utils.rich_utils import CONSOLE
 
 eyefultower_downloads = [
     "all",
@@ -90,7 +91,23 @@ class EyefulTowerDownload(DatasetDownload):
     resolution_name: Tuple[EyefulTowerResolution, ...] = ()
 
     @staticmethod
-    def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, target_height: int):
+    def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, target_height: int) -> ET.ElementTree:
+        """Rescales parameters in metashape's cameras.xml format to match target width/height.
+
+        The EyefulTower dataset provides images which have already been rescaled to smaller sizes from the original ~8K
+        resolution. However, the cameras.xml file provided, which contains the camera intrinsics in metashape's format,
+        only contains valid parameters for the original resolution. This function generates a new set of parameters
+        corresponding to a smaller resolution dataset by scaling the original values from cameras.xml. Non-uniform
+        scaling (different in X and Y) can be performed due to slight rounding differences.
+
+        Args:
+            xml_tree: XML tree loaded from Metashape's cameras.xml file
+            target_width: Width of output images
+            target_height: Height of output images
+
+        Returns:
+            Updated XML tree with scaled intrinsics and width/height parameters
+        """
         transformed = copy.deepcopy(xml_tree)
 
         root = transformed.getroot()
@@ -142,9 +159,34 @@ def scale_metashape_transform(xml_tree: ET.ElementTree, target_width: int, targe
 
         return transformed
 
+    @staticmethod
     def convert_cameras_to_nerfstudio_transforms(
-        self, capture_name: str, cameras: dict, splits: dict, target_width: int, target_height: int, extension: str
-    ):
+        capture_name: str, cameras: dict, splits: dict, target_width: int, target_height: int, extension: str
+    ) -> dict:
+        """Converts EyefulTower cameras.json format to Nerfstudio's transforms.json format
+
+        The EyefulTower dataset provides a cameras.json file containing geometric calibration information for the
+        original resolution ~8K images, similar to the cameras.xml file from Metashape. The main advantage is that data
+        is provided for each individual image, rather than being structured hierarchically with rig constraints (as in
+        the Metashape cameras.xml).
+
+        This function takes the cameras.json file and converts it to the transforms.json Nerfstudio expects, with the
+        necessary scaling of intrinsics parameters applied. This function also handles the EyefulTower splits.json file,
+        describing the breakdown of training and validation images, and adds the appropriate fields to transforms.json.
+        This function works for both fisheye (V1) and pinhole (V2) cameras. Scene-specific fisheye mask radii are added
+        to the returned dictionary if needed.
+
+        Args:
+            capture_name: Which specific EyefulTower capture is being converted
+            cameras: Data loaded from EyefulTower cameras.json
+            splits: Data loaded from EyefulTower splits.json
+            target_width: Width of output images
+            target_height: Height of output images
+            extension: Extension of output images
+
+        Returns:
+            Dict in the Nerfstudio transforms.json format, with scaled camera parameters, splits, and optional metadata.
+        """
         output = {}
 
         distortion_models = [c["distortionModel"] for c in cameras["KRT"]]
@@ -221,7 +263,17 @@ def convert_cameras_to_nerfstudio_transforms(
         output["val_filenames"] = split_filenames["test"]
         return output
 
-    def subsample_nerfstudio_transforms(self, transforms: dict, n: int):
+    @staticmethod
+    def subsample_nerfstudio_transforms(transforms: dict, n: int):
+        """Uniformly samples n frames from a Nerfstudio transforms.json dict.
+
+        Args:
+            transforms: Dictionary in Nerfstudio transforms.json format
+            n: Number of frames to uniformly subsample
+
+        Returns:
+            New transforms.json dict with n frames. All other parameters are copied.
+        """
         target = min(len(transforms["frames"]), n)
         indices = np.round(np.linspace(0, len(transforms["frames"]) - 1, target)).astype(int)
 
@@ -239,17 +291,29 @@ def subsample_nerfstudio_transforms(self, transforms: dict, n: int):
 
         return output
 
-    def download(self, save_dir: Path):
+    def download(self, save_dir: Path) -> None:
+        """Entrypoint to download the EyefulTower dataset.
+
+        * Fetches the specified dataset(s) at the specified resolution(s) from the EyefulTower AWS S3 bucket. Redundant
+          data is not downloaded, so this function can safely (and performantly) be called multiple times with
+          increasing scope of datasets and resolutions.
+        * Generates updated Metashape cameras.xml for lower resolution downloads.
+        * Generates Nerfstudio transform.json for each resolution. Additionally generates transforms_300.json and
+          transforms_half.json containing subsets (300 frames, half the frames) of the full set to help with iteration.
+
+        Args:
+            save_dir: Directory to save dataset. Output will be in save_dir/eyefultower/<dataset>
+        """
         if len(self.capture_name) == 0:
             self.capture_name = ("riverview",)
-            print(
+            CONSOLE.print(
                 f"No capture specified, using {self.capture_name} by default.",
                 "Add `--help` to this command to see all available captures.",
             )
 
         if len(self.resolution_name) == 0:
             self.resolution_name = ("jpeg_2k",)
-            print(
+            CONSOLE.print(
                 f"No resolution specified, using {self.resolution_name} by default.",
                 "Add `--help` to this command to see all available resolutions.",
             )
@@ -262,7 +326,7 @@ def download(self, save_dir: Path):
                 captures.add(capture)
         captures = sorted(captures)
         if len(captures) == 0:
-            print("WARNING: No EyefulTower captures specified. Nothing will be downloaded.")
+            CONSOLE.print("[bold yellow]WARNING: No EyefulTower captures specified. Nothing will be downloaded.")
 
         resolutions = set()
         for resolution in self.resolution_name:
@@ -272,7 +336,7 @@ def download(self, save_dir: Path):
                 resolutions.add(resolution)
         resolutions = sorted(resolutions)
         if len(resolutions) == 0:
-            print("WARNING: No EyefulTower resolutions specified. Nothing will be downloaded.")
+            CONSOLE.print("[bold yellow]WARNING: No EyefulTower resolutions specified. Nothing will be downloaded.")
 
         driver = awscli.clidriver.create_clidriver()
 
@@ -287,9 +351,9 @@ def download(self, save_dir: Path):
                 + includes
                 + [base_url, str(output_path)]
             )
-            print(f"[EyefulTower Capture {i+1: >2d}/{len(captures)}]: '{capture}'")
+            CONSOLE.print(f"[EyefulTower Capture {i+1: >2d}/{len(captures)}]: '{capture}'")
             print(
-                f"    Downloading resolutions {resolutions}",
+                f"\tDownloading resolutions {resolutions}",
                 f"to '{output_path.resolve()}' with command `aws {' '.join(command)}` ...",
                 end=" ",
                 flush=True,
@@ -297,12 +361,13 @@ def download(self, save_dir: Path):
             driver.main(command)
             print("done!")
 
-            # After downloading, we'll insert an appropriate cameras.xml file into each directory
-            # It's quick enough that we can just redo it every time this is called, regardless
-            # of whether new data is downloaded.
+            # After downloading, we'll insert an appropriate cameras.xml file into each directory. It's quick enough
+            # that we can just redo it every time this is called, regardless of whether new data is downloaded.
             xml_input_path = output_path / "cameras.xml"
             if not xml_input_path.exists:
-                print("    WARNING: cameras.xml not found. Scaled cameras.xml will not be generated.")
+                CONSOLE.print(
+                    "\t[bold yellow]WARNING: cameras.xml not found. Scaled cameras.xml will not be generated."
+                )
             else:
                 tree = ET.parse(output_path / "cameras.xml")
 
@@ -310,7 +375,7 @@ def download(self, save_dir: Path):
                     metadata = eyefultower_resolutions[resolution]
                     xml_output_path = output_path / metadata.folder_name / "cameras.xml"
                     print(
-                        f"    Generating cameras.xml for '{resolution}' to '{xml_output_path.resolve()}' ... ",
+                        f"\tGenerating cameras.xml for '{resolution}' to '{xml_output_path.resolve()}' ... ",
                         end=" ",
                         flush=True,
                     )
@@ -321,9 +386,9 @@ def download(self, save_dir: Path):
             json_input_path = output_path / "cameras.json"
             splits_input_path = output_path / "splits.json"
             if not json_input_path.exists:
-                print("    WARNING: cameras.json not found. transforms.json will not be generated.")
+                CONSOLE.print("\t[bold yellow]WARNING: cameras.json not found. transforms.json will not be generated.")
             elif not splits_input_path.exists:
-                print("    WARNING: splits.json not found. transforms.json will not be generated.")
+                CONSOLE.print("\t[bold yellow]WARNING: splits.json not found. transforms.json will not be generated.")
             else:
                 with open(json_input_path, "r") as f:
                     cameras = json.load(f)
@@ -335,7 +400,7 @@ def download(self, save_dir: Path):
                     metadata = eyefultower_resolutions[resolution]
                     json_output_path = output_path / metadata.folder_name / "transforms.json"
                     print(
-                        f"    Generating transforms.json for '{resolution}' to '{json_output_path.resolve()}' ... ",
+                        f"\tGenerating transforms.json for '{resolution}' to '{json_output_path.resolve()}' ... ",
                         end=" ",
                         flush=True,
                     )

From 544cb81dd21648ff109f69b204e79d13dc7bf8ec Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Thu, 18 Jan 2024 23:45:29 -0500
Subject: [PATCH 30/38] Revert changes to pyproject.toml, to be added in a
 later PR

---
 pyproject.toml | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2b8f6d8059..2071141da6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,6 @@ classifiers = [
     "Programming Language :: Python",
 ]
 dependencies = [
-    "awscli>=1.31.10",
     "appdirs>=1.4",
     "av>=9.2.0",
     "comet_ml>=3.33.8",
@@ -92,15 +91,12 @@ dev = [
     "typeguard==2.13.3",
     "ruff==0.1.13",
     "sshconf==0.2.5",
-    # TODO(1480) enable when pycolmap windows wheels are available
-    # "pycolmap>=0.3.0",  # NOTE: pycolmap==0.3.0 is not available on newer python versions
+    "pycolmap>=0.3.0",  # NOTE: pycolmap==0.3.0 is not available on newer python versions
     "diffusers==0.16.1",
     "opencv-stubs==0.0.7",
     "transformers==4.29.2",
     "pyright==1.1.331",
-    # NOTE: Disabling projectaria-tools because it doesn't have prebuilt windows wheels
-    # Syntax comes from here: https://pip.pypa.io/en/stable/reference/requirement-specifiers/
-    "projectaria-tools>=1.3.1; sys_platform != 'win32'",
+    "projectaria_tools[all]>=1.2.0",
 ]
 
 # Documentation related packages

From d1abe18e66eb465b90901ec94ca8bef26463af9a Mon Sep 17 00:00:00 2001
From: Vasu Agrawal <vasuagrawal@meta.com>
Date: Thu, 18 Jan 2024 23:53:47 -0500
Subject: [PATCH 31/38] Oops, probably shouldn't have gotten rid of awscli ...

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index bb68bd4eb6..c35ea04788 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,7 @@ classifiers = [
 dependencies = [
     "appdirs>=1.4",
     "av>=9.2.0",
+    "awscli>=1.31.10",
     "comet_ml>=3.33.8",
     "cryptography>=38",
     "tyro>=0.6.6",

From be6792fa04c6c16c0d8c5e50b38ce61042949945 Mon Sep 17 00:00:00 2001
From: xucr <xucr@shanghaitech.edu.cn>
Date: Thu, 18 Jan 2024 21:08:09 -0800
Subject: [PATCH 32/38] adding support for bg color, tested and reformatted now

---
 nerfstudio/models/gaussian_splatting.py | 199 ++++++------------------
 1 file changed, 46 insertions(+), 153 deletions(-)

diff --git a/nerfstudio/models/gaussian_splatting.py b/nerfstudio/models/gaussian_splatting.py
index 8092db00a0..ae3b544225 100644
--- a/nerfstudio/models/gaussian_splatting.py
+++ b/nerfstudio/models/gaussian_splatting.py
@@ -22,7 +22,6 @@
 import math
 from dataclasses import dataclass, field
 from typing import Dict, List, Optional, Tuple, Type, Union
-from typing_extensions import Literal
 
 import numpy as np
 import torch
@@ -32,14 +31,11 @@
 from gsplat.sh import num_sh_bases, spherical_harmonics
 from pytorch_msssim import SSIM
 from torch.nn import Parameter
+from typing_extensions import Literal
 
 from nerfstudio.cameras.cameras import Cameras
 from nerfstudio.data.scene_box import OrientedBox
-from nerfstudio.engine.callbacks import (
-    TrainingCallback,
-    TrainingCallbackAttributes,
-    TrainingCallbackLocation,
-)
+from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes, TrainingCallbackLocation
 from nerfstudio.engine.optimizers import Optimizers
 
 # need following import for background color override
@@ -83,9 +79,7 @@ def SH2RGB(sh):
     return sh * C0 + 0.5
 
 
-def projection_matrix(
-    znear, zfar, fovx, fovy, device: Union[str, torch.device] = "cpu"
-):
+def projection_matrix(znear, zfar, fovx, fovy, device: Union[str, torch.device] = "cpu"):
     """
     Constructs an OpenGL-style perspective projection matrix.
     """
@@ -117,7 +111,7 @@ class GaussianSplattingModelConfig(ModelConfig):
     """period of steps where gaussians are culled and densified"""
     resolution_schedule: int = 250
     """training starts at 1/d resolution, every n steps this is doubled"""
-    background_color: Literal["random", "black", "white"] = "white"
+    background_color: Literal["random", "black", "white"] = "random"
     """Whether to randomize the background color."""
     num_downscales: int = 0
     """at the beginning, resolution is 1/2^d, where d is this number"""
@@ -202,13 +196,9 @@ def populate_modules(self):
             self.features_rest = torch.nn.Parameter(shs[:, 1:, :])
         else:
             self.features_dc = torch.nn.Parameter(torch.rand(self.num_points, 3))
-            self.features_rest = torch.nn.Parameter(
-                torch.zeros((self.num_points, dim_sh - 1, 3))
-            )
+            self.features_rest = torch.nn.Parameter(torch.zeros((self.num_points, dim_sh - 1, 3)))
 
-        self.opacities = torch.nn.Parameter(
-            torch.logit(0.1 * torch.ones(self.num_points, 1))
-        )
+        self.opacities = torch.nn.Parameter(torch.logit(0.1 * torch.ones(self.num_points, 1)))
 
         # metrics
         from torchmetrics.image import PeakSignalNoiseRatio
@@ -250,9 +240,7 @@ def load_state_dict(self, dict, **kwargs):  # type: ignore
         self.opacities = torch.nn.Parameter(torch.zeros(newp, 1, device=self.device))
         self.features_dc = torch.nn.Parameter(torch.zeros(newp, 3, device=self.device))
         self.features_rest = torch.nn.Parameter(
-            torch.zeros(
-                newp, num_sh_bases(self.config.sh_degree) - 1, 3, device=self.device
-            )
+            torch.zeros(newp, num_sh_bases(self.config.sh_degree) - 1, 3, device=self.device)
         )
         super().load_state_dict(dict, **kwargs)
 
@@ -268,9 +256,7 @@ def k_nearest_sklearn(self, x: torch.Tensor, k: int):
         # Build the nearest neighbors model
         from sklearn.neighbors import NearestNeighbors
 
-        nn_model = NearestNeighbors(
-            n_neighbors=k + 1, algorithm="auto", metric="euclidean"
-        ).fit(x_np)
+        nn_model = NearestNeighbors(n_neighbors=k + 1, algorithm="auto", metric="euclidean").fit(x_np)
 
         # Find the k-nearest neighbors
         distances, indices = nn_model.kneighbors(x_np)
@@ -311,18 +297,14 @@ def dup_in_optim(self, optimizer, dup_mask, new_params, n=2):
         param_state["exp_avg"] = torch.cat(
             [
                 param_state["exp_avg"],
-                torch.zeros_like(param_state["exp_avg"][dup_mask.squeeze()]).repeat(
-                    *repeat_dims
-                ),
+                torch.zeros_like(param_state["exp_avg"][dup_mask.squeeze()]).repeat(*repeat_dims),
             ],
             dim=0,
         )
         param_state["exp_avg_sq"] = torch.cat(
             [
                 param_state["exp_avg_sq"],
-                torch.zeros_like(param_state["exp_avg_sq"][dup_mask.squeeze()]).repeat(
-                    *repeat_dims
-                ),
+                torch.zeros_like(param_state["exp_avg_sq"][dup_mask.squeeze()]).repeat(*repeat_dims),
             ],
             dim=0,
         )
@@ -353,9 +335,7 @@ def after_train(self, step: int):
             else:
                 assert self.vis_counts is not None
                 self.vis_counts[visible_mask] = self.vis_counts[visible_mask] + 1
-                self.xys_grad_norm[visible_mask] = (
-                    grads[visible_mask] + self.xys_grad_norm[visible_mask]
-                )
+                self.xys_grad_norm[visible_mask] = grads[visible_mask] + self.xys_grad_norm[visible_mask]
 
             # update the max screen size, as a ratio of number of pixels
             if self.max_2Dsize is None:
@@ -385,30 +365,16 @@ def refinement_after(self, optimizers: Optimizers, step):
             reset_interval = self.config.reset_alpha_every * self.config.refine_every
             do_densification = (
                 self.step < self.config.stop_split_at
-                and self.step % reset_interval
-                > self.num_train_data + self.config.refine_every
+                and self.step % reset_interval > self.num_train_data + self.config.refine_every
             )
             if do_densification:
                 # then we densify
-                assert (
-                    self.xys_grad_norm is not None
-                    and self.vis_counts is not None
-                    and self.max_2Dsize is not None
-                )
-                avg_grad_norm = (
-                    (self.xys_grad_norm / self.vis_counts)
-                    * 0.5
-                    * max(self.last_size[0], self.last_size[1])
-                )
+                assert self.xys_grad_norm is not None and self.vis_counts is not None and self.max_2Dsize is not None
+                avg_grad_norm = (self.xys_grad_norm / self.vis_counts) * 0.5 * max(self.last_size[0], self.last_size[1])
                 high_grads = (avg_grad_norm > self.config.densify_grad_thresh).squeeze()
-                splits = (
-                    self.scales.exp().max(dim=-1).values
-                    > self.config.densify_size_thresh
-                ).squeeze()
+                splits = (self.scales.exp().max(dim=-1).values > self.config.densify_size_thresh).squeeze()
                 if self.step < self.config.stop_screen_size_at:
-                    splits |= (
-                        self.max_2Dsize > self.config.split_screen_size
-                    ).squeeze()
+                    splits |= (self.max_2Dsize > self.config.split_screen_size).squeeze()
                 splits &= high_grads
                 nsamps = self.config.n_split_samples
                 (
@@ -420,10 +386,7 @@ def refinement_after(self, optimizers: Optimizers, step):
                     split_quats,
                 ) = self.split_gaussians(splits, nsamps)
 
-                dups = (
-                    self.scales.exp().max(dim=-1).values
-                    <= self.config.densify_size_thresh
-                ).squeeze()
+                dups = (self.scales.exp().max(dim=-1).values <= self.config.densify_size_thresh).squeeze()
                 dups &= high_grads
                 (
                     dup_means,
@@ -433,9 +396,7 @@ def refinement_after(self, optimizers: Optimizers, step):
                     dup_scales,
                     dup_quats,
                 ) = self.dup_gaussians(dups)
-                self.means = Parameter(
-                    torch.cat([self.means.detach(), split_means, dup_means], dim=0)
-                )
+                self.means = Parameter(torch.cat([self.means.detach(), split_means, dup_means], dim=0))
                 self.features_dc = Parameter(
                     torch.cat(
                         [self.features_dc.detach(), split_features_dc, dup_features_dc],
@@ -452,17 +413,9 @@ def refinement_after(self, optimizers: Optimizers, step):
                         dim=0,
                     )
                 )
-                self.opacities = Parameter(
-                    torch.cat(
-                        [self.opacities.detach(), split_opacities, dup_opacities], dim=0
-                    )
-                )
-                self.scales = Parameter(
-                    torch.cat([self.scales.detach(), split_scales, dup_scales], dim=0)
-                )
-                self.quats = Parameter(
-                    torch.cat([self.quats.detach(), split_quats, dup_quats], dim=0)
-                )
+                self.opacities = Parameter(torch.cat([self.opacities.detach(), split_opacities, dup_opacities], dim=0))
+                self.scales = Parameter(torch.cat([self.scales.detach(), split_scales, dup_scales], dim=0))
+                self.quats = Parameter(torch.cat([self.quats.detach(), split_quats, dup_quats], dim=0))
                 # append zeros to the max_2Dsize tensor
                 self.max_2Dsize = torch.cat(
                     [
@@ -492,10 +445,7 @@ def refinement_after(self, optimizers: Optimizers, step):
                 )
 
                 deleted_mask = self.cull_gaussians(splits_mask)
-            elif (
-                self.step >= self.config.stop_split_at
-                and self.config.continue_cull_post_densification
-            ):
+            elif self.step >= self.config.stop_split_at and self.config.continue_cull_post_densification:
                 deleted_mask = self.cull_gaussians()
             else:
                 # if we donot allow culling post refinement, no more gaussians will be pruned.
@@ -504,17 +454,12 @@ def refinement_after(self, optimizers: Optimizers, step):
             if deleted_mask is not None:
                 self.remove_from_all_optim(optimizers, deleted_mask)
 
-            if (
-                self.step < self.config.stop_split_at
-                and self.step % reset_interval == self.config.refine_every
-            ):
+            if self.step < self.config.stop_split_at and self.step % reset_interval == self.config.refine_every:
                 # Reset value is set to be twice of the cull_alpha_thresh
                 reset_value = self.config.cull_alpha_thresh * 2.0
                 self.opacities.data = torch.clamp(
                     self.opacities.data,
-                    max=torch.logit(
-                        torch.tensor(reset_value, device=self.device)
-                    ).item(),
+                    max=torch.logit(torch.tensor(reset_value, device=self.device)).item(),
                 )
                 # reset the exp of optimizer
                 optim = optimizers.optimizers["opacity"]
@@ -534,25 +479,18 @@ def cull_gaussians(self, extra_cull_mask: Optional[torch.Tensor] = None):
         """
         n_bef = self.num_points
         # cull transparent ones
-        culls = (
-            torch.sigmoid(self.opacities) < self.config.cull_alpha_thresh
-        ).squeeze()
+        culls = (torch.sigmoid(self.opacities) < self.config.cull_alpha_thresh).squeeze()
         below_alpha_count = torch.sum(culls).item()
         toobigs_count = 0
         if extra_cull_mask is not None:
             culls = culls | extra_cull_mask
         if self.step > self.config.refine_every * self.config.reset_alpha_every:
             # cull huge ones
-            toobigs = (
-                torch.exp(self.scales).max(dim=-1).values
-                > self.config.cull_scale_thresh
-            ).squeeze()
+            toobigs = (torch.exp(self.scales).max(dim=-1).values > self.config.cull_scale_thresh).squeeze()
             if self.step < self.config.stop_screen_size_at:
                 # cull big screen space
                 assert self.max_2Dsize is not None
-                toobigs = (
-                    toobigs | (self.max_2Dsize > self.config.cull_screen_size).squeeze()
-                )
+                toobigs = toobigs | (self.max_2Dsize > self.config.cull_screen_size).squeeze()
             culls = culls | toobigs
             toobigs_count = torch.sum(toobigs).item()
         self.means = Parameter(self.means[~culls].detach())
@@ -575,18 +513,12 @@ def split_gaussians(self, split_mask, samps):
         """
 
         n_splits = split_mask.sum().item()
-        CONSOLE.log(
-            f"Splitting {split_mask.sum().item()/self.num_points} gaussians: {n_splits}/{self.num_points}"
-        )
-        centered_samples = torch.randn(
-            (samps * n_splits, 3), device=self.device
-        )  # Nx3 of axis-aligned scales
+        CONSOLE.log(f"Splitting {split_mask.sum().item()/self.num_points} gaussians: {n_splits}/{self.num_points}")
+        centered_samples = torch.randn((samps * n_splits, 3), device=self.device)  # Nx3 of axis-aligned scales
         scaled_samples = (
             torch.exp(self.scales[split_mask].repeat(samps, 1)) * centered_samples
         )  # how these scales are rotated
-        quats = self.quats[split_mask] / self.quats[split_mask].norm(
-            dim=-1, keepdim=True
-        )  # normalize them first
+        quats = self.quats[split_mask] / self.quats[split_mask].norm(dim=-1, keepdim=True)  # normalize them first
         rots = quat_to_rotmat(quats.repeat(samps, 1))  # how these scales are rotated
         rotated_samples = torch.bmm(rots, scaled_samples[..., None]).squeeze()
         new_means = rotated_samples + self.means[split_mask].repeat(samps, 1)
@@ -597,12 +529,8 @@ def split_gaussians(self, split_mask, samps):
         new_opacities = self.opacities[split_mask].repeat(samps, 1)
         # step 4, sample new scales
         size_fac = 1.6
-        new_scales = torch.log(torch.exp(self.scales[split_mask]) / size_fac).repeat(
-            samps, 1
-        )
-        self.scales[split_mask] = torch.log(
-            torch.exp(self.scales[split_mask]) / size_fac
-        )
+        new_scales = torch.log(torch.exp(self.scales[split_mask]) / size_fac).repeat(samps, 1)
+        self.scales[split_mask] = torch.log(torch.exp(self.scales[split_mask]) / size_fac)
         # step 5, sample new quats
         new_quats = self.quats[split_mask].repeat(samps, 1)
         return (
@@ -619,9 +547,7 @@ def dup_gaussians(self, dup_mask):
         This function duplicates gaussians that are too small
         """
         n_dups = dup_mask.sum().item()
-        CONSOLE.log(
-            f"Duplicating {dup_mask.sum().item()/self.num_points} gaussians: {n_dups}/{self.num_points}"
-        )
+        CONSOLE.log(f"Duplicating {dup_mask.sum().item()/self.num_points} gaussians: {n_dups}/{self.num_points}")
         dup_means = self.means[dup_mask]
         dup_features_dc = self.features_dc[dup_mask]
         dup_features_rest = self.features_rest[dup_mask]
@@ -645,11 +571,7 @@ def get_training_callbacks(
         self, training_callback_attributes: TrainingCallbackAttributes
     ) -> List[TrainingCallback]:
         cbs = []
-        cbs.append(
-            TrainingCallback(
-                [TrainingCallbackLocation.BEFORE_TRAIN_ITERATION], self.step_cb
-            )
-        )
+        cbs.append(TrainingCallback([TrainingCallbackLocation.BEFORE_TRAIN_ITERATION], self.step_cb))
         # The order of these matters
         cbs.append(
             TrainingCallback(
@@ -692,10 +614,7 @@ def get_param_groups(self) -> Dict[str, List[Parameter]]:
     def _get_downscale_factor(self):
         if self.training:
             return 2 ** max(
-                (
-                    self.config.num_downscales
-                    - self.step // self.config.resolution_schedule
-                ),
+                (self.config.num_downscales - self.step // self.config.resolution_schedule),
                 0,
             )
         else:
@@ -731,11 +650,7 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
         if self.crop_box is not None and not self.training:
             crop_ids = self.crop_box.within(self.means).squeeze()
             if crop_ids.sum() == 0:
-                return {
-                    "rgb": background.repeat(
-                        int(camera.height.item()), int(camera.width.item()), 1
-                    )
-                }
+                return {"rgb": background.repeat(int(camera.height.item()), int(camera.width.item()), 1)}
         else:
             crop_ids = None
         camera_downscale = self._get_downscale_factor()
@@ -744,9 +659,7 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
         R = camera.camera_to_worlds[0, :3, :3]  # 3 x 3
         T = camera.camera_to_worlds[0, :3, 3:4]  # 3 x 1
         # flip the z and y axes to align with gsplat conventions
-        R_edit = torch.diag(
-            torch.tensor([1, -1, -1], device=self.device, dtype=R.dtype)
-        )
+        R_edit = torch.diag(torch.tensor([1, -1, -1], device=self.device, dtype=R.dtype))
         R = R @ R_edit
         # analytic matrix inverse to get world2camera matrix
         R_inv = R.T
@@ -784,9 +697,7 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
             scales_crop = self.scales
             quats_crop = self.quats
 
-        colors_crop = torch.cat(
-            (features_dc_crop[:, None, :], features_rest_crop), dim=1
-        )
+        colors_crop = torch.cat((features_dc_crop[:, None, :], features_rest_crop), dim=1)
 
         self.xys, depths, self.radii, conics, num_tiles_hit, cov3d = project_gaussians(  # type: ignore
             means_crop,
@@ -804,20 +715,14 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
             tile_bounds,
         )  # type: ignore
         if (self.radii).sum() == 0:
-            return {
-                "rgb": background.repeat(
-                    int(camera.height.item()), int(camera.width.item()), 1
-                )
-            }
+            return {"rgb": background.repeat(int(camera.height.item()), int(camera.width.item()), 1)}
 
         # Important to allow xys grads to populate properly
         if self.training:
             self.xys.retain_grad()
 
         if self.config.sh_degree > 0:
-            viewdirs = (
-                means_crop.detach() - camera.camera_to_worlds.detach()[..., :3, 3]
-            )  # (N, 3)
+            viewdirs = means_crop.detach() - camera.camera_to_worlds.detach()[..., :3, 3]  # (N, 3)
             viewdirs = viewdirs / viewdirs.norm(dim=-1, keepdim=True)
             n = min(self.step // self.config.sh_degree_interval, self.config.sh_degree)
             rgbs = spherical_harmonics(n, viewdirs, colors_crop)
@@ -829,7 +734,6 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
         camera.rescale_output_resolution(camera_downscale)
 
         assert (num_tiles_hit > 0).any()  # type: ignore
-
         rgb = rasterize_gaussians(  # type: ignore
             self.xys,
             depths,
@@ -856,9 +760,7 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
                 H,
                 W,
                 background=torch.ones(3, device=self.device) * 10,
-            )[
-                ..., 0:1
-            ]  # type: ignore
+            )[..., 0:1]  # type: ignore
 
         return {"rgb": rgb, "depth": depth_im}  # type: ignore
 
@@ -877,9 +779,7 @@ def get_gt_img(self, image: torch.Tensor):
             # torchvision can be slow to import, so we do it lazily.
             import torchvision.transforms.functional as TF
 
-            gt_img = TF.resize(image.permute(2, 0, 1), newsize, antialias=None).permute(
-                1, 2, 0
-            )
+            gt_img = TF.resize(image.permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0)
         else:
             gt_img = image
         return gt_img.to(self.device)
@@ -899,9 +799,7 @@ def get_metrics_dict(self, outputs, batch) -> Dict[str, torch.Tensor]:
         metrics_dict["gaussian_count"] = self.num_points
         return metrics_dict
 
-    def get_loss_dict(
-        self, outputs, batch, metrics_dict=None
-    ) -> Dict[str, torch.Tensor]:
+    def get_loss_dict(self, outputs, batch, metrics_dict=None) -> Dict[str, torch.Tensor]:
         """Computes and returns the losses dict.
 
         Args:
@@ -929,15 +827,12 @@ def get_loss_dict(
             scale_reg = torch.tensor(0.0).to(self.device)
 
         return {
-            "main_loss": (1 - self.config.ssim_lambda) * Ll1
-            + self.config.ssim_lambda * simloss,
+            "main_loss": (1 - self.config.ssim_lambda) * Ll1 + self.config.ssim_lambda * simloss,
             "scale_reg": scale_reg,
         }
 
     @torch.no_grad()
-    def get_outputs_for_camera(
-        self, camera: Cameras, obb_box: Optional[OrientedBox] = None
-    ) -> Dict[str, torch.Tensor]:
+    def get_outputs_for_camera(self, camera: Cameras, obb_box: Optional[OrientedBox] = None) -> Dict[str, torch.Tensor]:
         """Takes in a camera, generates the raybundle, and computes the output of the model.
         Overridden for a camera-based gaussian model.
 
@@ -970,9 +865,7 @@ def get_image_metrics_and_images(
             import torchvision.transforms.functional as TF
 
             newsize = [batch["image"].shape[0] // d, batch["image"].shape[1] // d]
-            predicted_rgb = TF.resize(
-                outputs["rgb"].permute(2, 0, 1), newsize, antialias=None
-            ).permute(1, 2, 0)
+            predicted_rgb = TF.resize(outputs["rgb"].permute(2, 0, 1), newsize, antialias=None).permute(1, 2, 0)
         else:
             predicted_rgb = outputs["rgb"]
 

From 8afc6495eed7015b54c6cca8b88f304090ea2b59 Mon Sep 17 00:00:00 2001
From: xucr <xucr@shanghaitech.edu.cn>
Date: Thu, 18 Jan 2024 21:12:44 -0800
Subject: [PATCH 33/38] formatted

---
 .../datamanagers/full_images_datamanager.py   | 82 ++++++++++++++++---
 1 file changed, 71 insertions(+), 11 deletions(-)

diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py
index f67bd120f4..7158d72eb6 100644
--- a/nerfstudio/data/datamanagers/full_images_datamanager.py
+++ b/nerfstudio/data/datamanagers/full_images_datamanager.py
@@ -26,7 +26,20 @@
 from dataclasses import dataclass, field
 from functools import cached_property
 from pathlib import Path
-from typing import Dict, ForwardRef, Generic, List, Literal, Optional, Tuple, Type, Union, cast, get_args, get_origin
+from typing import (
+    Dict,
+    ForwardRef,
+    Generic,
+    List,
+    Literal,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+    cast,
+    get_args,
+    get_origin,
+)
 
 import cv2
 import numpy as np
@@ -36,7 +49,11 @@
 
 from nerfstudio.cameras.cameras import Cameras, CameraType
 from nerfstudio.configs.dataparser_configs import AnnotatedDataParserUnion
-from nerfstudio.data.datamanagers.base_datamanager import DataManager, DataManagerConfig, TDataset
+from nerfstudio.data.datamanagers.base_datamanager import (
+    DataManager,
+    DataManagerConfig,
+    TDataset,
+)
 from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs
 from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig
 from nerfstudio.data.datasets.base_dataset import InputDataset
@@ -106,7 +123,10 @@ def __init__(
         self.train_dataset = self.create_train_dataset()
         self.eval_dataset = self.create_eval_dataset()
         if len(self.train_dataset) > 500 and self.config.cache_images == "gpu":
-            CONSOLE.print("Train dataset has over 500 images, overriding cache_images to cpu", style="bold yellow")
+            CONSOLE.print(
+                "Train dataset has over 500 images, overriding cache_images to cpu",
+                style="bold yellow",
+            )
             self.config.cache_images = "cpu"
         self.cached_train, self.cached_eval = self.cache_images(self.config.cache_images)
         self.exclude_batch_keys_from_device = self.train_dataset.exclude_batch_keys_from_device
@@ -174,16 +194,36 @@ def cache_images(self, cache_images_option):
 
             elif camera.camera_type.item() == CameraType.FISHEYE.value:
                 distortion_params = np.array(
-                    [distortion_params[0], distortion_params[1], distortion_params[2], distortion_params[3]]
+                    [
+                        distortion_params[0],
+                        distortion_params[1],
+                        distortion_params[2],
+                        distortion_params[3],
+                    ]
                 )
                 newK = cv2.fisheye.estimateNewCameraMatrixForUndistortRectify(
-                    K, distortion_params, (image.shape[1], image.shape[0]), np.eye(3), balance=0
+                    K,
+                    distortion_params,
+                    (image.shape[1], image.shape[0]),
+                    np.eye(3),
+                    balance=0,
                 )
                 map1, map2 = cv2.fisheye.initUndistortRectifyMap(
-                    K, distortion_params, np.eye(3), newK, (image.shape[1], image.shape[0]), cv2.CV_32FC1
+                    K,
+                    distortion_params,
+                    np.eye(3),
+                    newK,
+                    (image.shape[1], image.shape[0]),
+                    cv2.CV_32FC1,
                 )
                 # and then remap:
-                image = cv2.remap(image, map1, map2, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)
+                image = cv2.remap(
+                    image,
+                    map1,
+                    map2,
+                    interpolation=cv2.INTER_LINEAR,
+                    borderMode=cv2.BORDER_CONSTANT,
+                )
                 if "mask" in data:
                     mask = data["mask"].numpy()
                     mask = mask.astype(np.uint8) * 255
@@ -248,16 +288,36 @@ def cache_images(self, cache_images_option):
 
             elif camera.camera_type.item() == CameraType.FISHEYE.value:
                 distortion_params = np.array(
-                    [distortion_params[0], distortion_params[1], distortion_params[2], distortion_params[3]]
+                    [
+                        distortion_params[0],
+                        distortion_params[1],
+                        distortion_params[2],
+                        distortion_params[3],
+                    ]
                 )
                 newK = cv2.fisheye.estimateNewCameraMatrixForUndistortRectify(
-                    K, distortion_params, (image.shape[1], image.shape[0]), np.eye(3), balance=0
+                    K,
+                    distortion_params,
+                    (image.shape[1], image.shape[0]),
+                    np.eye(3),
+                    balance=0,
                 )
                 map1, map2 = cv2.fisheye.initUndistortRectifyMap(
-                    K, distortion_params, np.eye(3), newK, (image.shape[1], image.shape[0]), cv2.CV_32FC1
+                    K,
+                    distortion_params,
+                    np.eye(3),
+                    newK,
+                    (image.shape[1], image.shape[0]),
+                    cv2.CV_32FC1,
                 )
                 # and then remap:
-                image = cv2.remap(image, map1, map2, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)
+                image = cv2.remap(
+                    image,
+                    map1,
+                    map2,
+                    interpolation=cv2.INTER_LINEAR,
+                    borderMode=cv2.BORDER_CONSTANT,
+                )
                 if "mask" in data:
                     mask = data["mask"].numpy()
                     mask = mask.astype(np.uint8) * 255

From a10c777cff9873af60f59645f4ae9e60aaa9cb2b Mon Sep 17 00:00:00 2001
From: xucr <xucr@shanghaitech.edu.cn>
Date: Thu, 18 Jan 2024 21:42:16 -0800
Subject: [PATCH 34/38] formatted

---
 .../datamanagers/full_images_datamanager.py   | 21 ++-----------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/nerfstudio/data/datamanagers/full_images_datamanager.py b/nerfstudio/data/datamanagers/full_images_datamanager.py
index 7158d72eb6..a08a3bbf9f 100644
--- a/nerfstudio/data/datamanagers/full_images_datamanager.py
+++ b/nerfstudio/data/datamanagers/full_images_datamanager.py
@@ -26,20 +26,7 @@
 from dataclasses import dataclass, field
 from functools import cached_property
 from pathlib import Path
-from typing import (
-    Dict,
-    ForwardRef,
-    Generic,
-    List,
-    Literal,
-    Optional,
-    Tuple,
-    Type,
-    Union,
-    cast,
-    get_args,
-    get_origin,
-)
+from typing import Dict, ForwardRef, Generic, List, Literal, Optional, Tuple, Type, Union, cast, get_args, get_origin
 
 import cv2
 import numpy as np
@@ -49,11 +36,7 @@
 
 from nerfstudio.cameras.cameras import Cameras, CameraType
 from nerfstudio.configs.dataparser_configs import AnnotatedDataParserUnion
-from nerfstudio.data.datamanagers.base_datamanager import (
-    DataManager,
-    DataManagerConfig,
-    TDataset,
-)
+from nerfstudio.data.datamanagers.base_datamanager import DataManager, DataManagerConfig, TDataset
 from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs
 from nerfstudio.data.dataparsers.nerfstudio_dataparser import NerfstudioDataParserConfig
 from nerfstudio.data.datasets.base_dataset import InputDataset

From cdd1d1507e78c92fdf0c192a2b2739950dec7d7c Mon Sep 17 00:00:00 2001
From: Ethan Weber <ethanweber@berkeley.edu>
Date: Fri, 19 Jan 2024 19:51:54 +0000
Subject: [PATCH 35/38] changed glob variable name

---
 nerfstudio/process_data/process_data_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py
index 91132fe4ac..40381b2173 100644
--- a/nerfstudio/process_data/process_data_utils.py
+++ b/nerfstudio/process_data/process_data_utils.py
@@ -68,8 +68,8 @@ def list_images(data: Path, recursive: bool = False) -> List[Path]:
         Paths to images contained in the directory
     """
     allowed_exts = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] + ALLOWED_RAW_EXTS
-    glob = "**/[!.]*" if recursive else "[!.]*"
-    image_paths = sorted([p for p in data.glob(glob) if p.suffix.lower() in allowed_exts])
+    glob_str = "**/[!.]*" if recursive else "[!.]*"
+    image_paths = sorted([p for p in data.glob(glob_str) if p.suffix.lower() in allowed_exts])
     return image_paths
 
 

From 40c355307c7e73f89a590d062f54125965c28678 Mon Sep 17 00:00:00 2001
From: Justin Kerr <justin.g.kerr@gmail.com>
Date: Fri, 19 Jan 2024 17:04:48 -0800
Subject: [PATCH 36/38] Revert "Remove legacy viewer from GitHub actions
 (#2798)"

This reverts commit 70d83d42011e5956b6e59eb313254a2fad67b37f.
---
 .github/workflows/viewer_build_deploy.yml | 97 +++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 .github/workflows/viewer_build_deploy.yml

diff --git a/.github/workflows/viewer_build_deploy.yml b/.github/workflows/viewer_build_deploy.yml
new file mode 100644
index 0000000000..da65c20ea1
--- /dev/null
+++ b/.github/workflows/viewer_build_deploy.yml
@@ -0,0 +1,97 @@
+name: Viewer Build and Deploy.
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  build:
+    name: Build
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ./nerfstudio/viewer_legacy/app
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@master
+
+      - name: Install Node.js
+        uses: actions/setup-node@v3
+        with:
+          node-version: 17.8.0
+          cache: 'yarn'
+          cache-dependency-path: ./nerfstudio/viewer_legacy/app/yarn.lock
+
+      - name: Install packages
+        run: yarn install
+
+      - name: Build project
+        run: CI=false yarn build
+
+      - name: Upload production-ready build files
+        uses: actions/upload-artifact@v2
+        with:
+          name: production-files
+          path: ./nerfstudio/viewer_legacy/app/build
+
+  deploy:
+    name: Deploy
+    needs: build
+    runs-on: ubuntu-latest
+
+    env:
+      SSH_KEY: ${{secrets.SSH_KEY}}
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python 3.8.12
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.8.12'
+
+      - name: Install dependencies
+        run: |
+          pip install -r ./nerfstudio/viewer_legacy/app/requirements.txt
+
+      - name: Download artifact
+        uses: actions/download-artifact@v2
+        with:
+          name: production-files
+          path: ./nerfstudio/viewer_legacy/app/build
+
+      - name: Get branch name (merge)
+        if: github.event_name != 'pull_request'
+        shell: bash
+        run: echo "BRANCH_NAME=$(echo ${GITHUB_REF#refs/heads/} | tr / -)" >> $GITHUB_ENV
+
+      - name: Get branch name (pull request)
+        if: github.event_name == 'pull_request'
+        shell: bash
+        run: echo "BRANCH_NAME=$(echo ${GITHUB_HEAD_REF} | tr / -)" >> $GITHUB_ENV
+
+      # TODO: detect file or scheme changes of the viewer_legacy and only
+      # increment the version.txt file when there is a change.
+      # Update the version.txt code and push to master when things change.
+      # https://github.com/marketplace/actions/changed-files
+      # - name: Run changed-files with defaults on the dir1
+      #   id: changed-files-for-dir1
+      #   uses: tj-actions/changed-files@v29.0.3
+      #   with:
+      #     path: nerfstudio/viewer_legacy/app
+
+      # - name: List all added files in dir1
+      #   run: |
+      #     for file in ${{ steps.changed-files-for-dir1.outputs.modified_files }}; do
+      #       echo "$file was modified"
+      #     done
+
+      - run: |
+          python ./nerfstudio/viewer_legacy/app/run_deploy.py \
+            --branch-name ${{ env.BRANCH_NAME }} \
+            --ssh-key-string "$SSH_KEY" \
+            --local-folder ./nerfstudio/viewer_legacy/app/build \
+            --package-json-filename ./nerfstudio/viewer_legacy/app/package.json \
+            --increment-version "False"
+      - run: cat ~/.ssh/config

From 2ed1a71f2a2b50c23e96c3d2a1773b5979794b13 Mon Sep 17 00:00:00 2001
From: Justin Kerr <justin.g.kerr@gmail.com>
Date: Wed, 31 Jan 2024 10:02:36 -0800
Subject: [PATCH 37/38] replace alpha depth logic with torch.where for
 differentiability

---
 nerfstudio/models/splatfacto.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/nerfstudio/models/splatfacto.py b/nerfstudio/models/splatfacto.py
index 84000e73c5..b23eabe002 100644
--- a/nerfstudio/models/splatfacto.py
+++ b/nerfstudio/models/splatfacto.py
@@ -779,8 +779,7 @@ def get_outputs(self, camera: Cameras) -> Dict[str, Union[torch.Tensor, List]]:
                 W,
                 background=torch.zeros(3, device=self.device),
             )[..., 0:1]  # type: ignore
-            depth_im[alpha > 0] = depth_im[alpha > 0] / alpha[alpha > 0]
-            depth_im[alpha == 0] = 1000
+            depth_im = torch.where(alpha > 0, depth_im / alpha, depth_im.detach().max())
 
         return {"rgb": rgb, "depth": depth_im, "accumulation": alpha}  # type: ignore
 

From 830f20bfa7b41bfde7adc562f6635ae1492e06b7 Mon Sep 17 00:00:00 2001
From: Justin Kerr <justin.g.kerr@gmail.com>
Date: Wed, 31 Jan 2024 10:05:16 -0800
Subject: [PATCH 38/38] rm weird file

---
 .github/workflows/viewer_build_deploy.yml | 97 -----------------------
 1 file changed, 97 deletions(-)
 delete mode 100644 .github/workflows/viewer_build_deploy.yml

diff --git a/.github/workflows/viewer_build_deploy.yml b/.github/workflows/viewer_build_deploy.yml
deleted file mode 100644
index da65c20ea1..0000000000
--- a/.github/workflows/viewer_build_deploy.yml
+++ /dev/null
@@ -1,97 +0,0 @@
-name: Viewer Build and Deploy.
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-    branches: [main]
-
-jobs:
-  build:
-    name: Build
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: ./nerfstudio/viewer_legacy/app
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@master
-
-      - name: Install Node.js
-        uses: actions/setup-node@v3
-        with:
-          node-version: 17.8.0
-          cache: 'yarn'
-          cache-dependency-path: ./nerfstudio/viewer_legacy/app/yarn.lock
-
-      - name: Install packages
-        run: yarn install
-
-      - name: Build project
-        run: CI=false yarn build
-
-      - name: Upload production-ready build files
-        uses: actions/upload-artifact@v2
-        with:
-          name: production-files
-          path: ./nerfstudio/viewer_legacy/app/build
-
-  deploy:
-    name: Deploy
-    needs: build
-    runs-on: ubuntu-latest
-
-    env:
-      SSH_KEY: ${{secrets.SSH_KEY}}
-
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python 3.8.12
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.8.12'
-
-      - name: Install dependencies
-        run: |
-          pip install -r ./nerfstudio/viewer_legacy/app/requirements.txt
-
-      - name: Download artifact
-        uses: actions/download-artifact@v2
-        with:
-          name: production-files
-          path: ./nerfstudio/viewer_legacy/app/build
-
-      - name: Get branch name (merge)
-        if: github.event_name != 'pull_request'
-        shell: bash
-        run: echo "BRANCH_NAME=$(echo ${GITHUB_REF#refs/heads/} | tr / -)" >> $GITHUB_ENV
-
-      - name: Get branch name (pull request)
-        if: github.event_name == 'pull_request'
-        shell: bash
-        run: echo "BRANCH_NAME=$(echo ${GITHUB_HEAD_REF} | tr / -)" >> $GITHUB_ENV
-
-      # TODO: detect file or scheme changes of the viewer_legacy and only
-      # increment the version.txt file when there is a change.
-      # Update the version.txt code and push to master when things change.
-      # https://github.com/marketplace/actions/changed-files
-      # - name: Run changed-files with defaults on the dir1
-      #   id: changed-files-for-dir1
-      #   uses: tj-actions/changed-files@v29.0.3
-      #   with:
-      #     path: nerfstudio/viewer_legacy/app
-
-      # - name: List all added files in dir1
-      #   run: |
-      #     for file in ${{ steps.changed-files-for-dir1.outputs.modified_files }}; do
-      #       echo "$file was modified"
-      #     done
-
-      - run: |
-          python ./nerfstudio/viewer_legacy/app/run_deploy.py \
-            --branch-name ${{ env.BRANCH_NAME }} \
-            --ssh-key-string "$SSH_KEY" \
-            --local-folder ./nerfstudio/viewer_legacy/app/build \
-            --package-json-filename ./nerfstudio/viewer_legacy/app/package.json \
-            --increment-version "False"
-      - run: cat ~/.ssh/config