nerfstudio-project · fbranschke · Jan 16, 2025 · Jan 16, 2025 · Jan 16, 2025 · Jan 20, 2025
diff --git a/examples/benchmarks/compression/mcmc.sh b/examples/benchmarks/compression/mcmc.sh
@@ -1,4 +1,4 @@
-SCENE_DIR="data/360_v2"
+SCENE_DIR="../data/360_v2"
 # eval all 9 scenes for benchmarking
 SCENE_LIST="garden bicycle stump bonsai counter kitchen room treehill flowers"
 
@@ -21,22 +21,16 @@ CAP_MAX=1000000
 
 for SCENE in $SCENE_LIST;
 do
-    if [ "$SCENE" = "bonsai" ] || [ "$SCENE" = "counter" ] || [ "$SCENE" = "kitchen" ] || [ "$SCENE" = "room" ]; then
-        DATA_FACTOR=2
-    else
-        DATA_FACTOR=4
-    fi
-
     echo "Running $SCENE"
 
     # train without eval
-    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor $DATA_FACTOR \
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor -1 \
         --strategy.cap-max $CAP_MAX \
         --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/
 
     # eval: use vgg for lpips to align with other benchmarks
-    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor $DATA_FACTOR \
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor -1\
         --strategy.cap-max $CAP_MAX \
         --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/ \
@@ -52,4 +46,4 @@ then
     python benchmarks/compression/summarize_stats.py --results_dir $RESULT_DIR --scenes $SCENE_LIST
 else
     echo "zip command not found, skipping zipping"
-fi
+fi
diff --git a/examples/benchmarks/compression/mcmc_db.sh b/examples/benchmarks/compression/mcmc_db.sh
@@ -0,0 +1,49 @@
+SCENE_DIR="data/db"
+# eval all 2 scenes for benchmarking
+SCENE_LIST="playroom drjohnson"
+
+# # 0.36M GSs
+# RESULT_DIR="results/benchmark_db_mcmc_0_36M_png_compression"
+# CAP_MAX=360000
+
+# # 0.49M GSs
+# RESULT_DIR="results/benchmark_db_mcmc_0_49M_png_compression"
+# CAP_MAX=490000
+
+# 1M GSs
+RESULT_DIR="results/benchmark_db_mcmc_1M_png_compression"
+CAP_MAX=1000000
+
+# # 4M GSs
+# RESULT_DIR="results/benchmark_db_mcmc_4M_png_compression"
+# CAP_MAX=4000000
+
+for SCENE in $SCENE_LIST;
+do
+    echo "Running $SCENE"
+
+    # train without eval
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor -1 \
+        --strategy.cap-max $CAP_MAX \
+        --opacity_reg 0.001 \
+        --data_dir $SCENE_DIR/$SCENE/ \
+        --result_dir $RESULT_DIR/$SCENE/
+
+    # eval: use vgg for lpips to align with other benchmarks
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor -1 \
+        --strategy.cap-max $CAP_MAX \
+        --data_dir $SCENE_DIR/$SCENE/ \
+        --result_dir $RESULT_DIR/$SCENE/ \
+        --lpips_net vgg \
+        --compression png \
+        --ckpt $RESULT_DIR/$SCENE/ckpts/ckpt_29999_rank0.pt
+done
+
+# Zip the compressed files and summarize the stats
+if command -v zip &> /dev/null
+then
+    echo "Zipping results"
+    python benchmarks/compression/summarize_stats.py --results_dir $RESULT_DIR --scenes $SCENE_LIST --scenes $SCENE_LIST
+else
+    echo "zip command not found, skipping zipping"
+fi
diff --git a/examples/benchmarks/compression/mcmc_syn.sh b/examples/benchmarks/compression/mcmc_syn.sh
@@ -0,0 +1,47 @@
+SCENE_DIR="data/nerf_synthetic"
+SCENE_LIST="chair drums ficus hotdog lego materials mic ship"
+
+# # 0.36M GSs
+# RESULT_DIR="results/benchmark_syn_mcmc_0_36M_png_compression"
+# CAP_MAX=360000
+
+# # 0.49M GSs
+# RESULT_DIR="results/benchmark_syn_mcmc_0_49M_png_compression"
+# CAP_MAX=490000
+
+# 1M GSs
+RESULT_DIR="results/benchmark_syn_mcmc_1M_png_compression"
+CAP_MAX=1000000
+
+# # 4M GSs
+# RESULT_DIR="results/benchmark_syn_mcmc_4M_png_compression"
+# CAP_MAX=4000000
+
+for SCENE in $SCENE_LIST;
+do
+    echo "Running $SCENE"
+
+    # train without eval
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor 1 \
+        --strategy.cap-max $CAP_MAX \
+        --data_dir $SCENE_DIR/$SCENE/ \
+        --result_dir $RESULT_DIR/$SCENE/
+
+    # eval: use vgg for lpips to align with other benchmarks
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor 1 \
+        --strategy.cap-max $CAP_MAX \
+        --data_dir $SCENE_DIR/$SCENE/ \
+        --result_dir $RESULT_DIR/$SCENE/ \
+        --lpips_net vgg \
+        --compression png \
+        --ckpt $RESULT_DIR/$SCENE/ckpts/ckpt_29999_rank0.pt
+done
+
+# Zip the compressed files and summarize the stats
+if command -v zip &> /dev/null
+then
+    echo "Zipping results"
+    python benchmarks/compression/summarize_stats.py --results_dir $RESULT_DIR --scenes $SCENE_LIST
+else
+    echo "zip command not found, skipping zipping"
+fi
diff --git a/examples/benchmarks/compression/mcmc_tt.sh b/examples/benchmarks/compression/mcmc_tt.sh
@@ -23,13 +23,13 @@ do
     echo "Running $SCENE"
 
     # train without eval
-    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor 1 \
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --eval_steps -1 --disable_viewer --data_factor -1 \
         --strategy.cap-max $CAP_MAX \
         --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/
 
     # eval: use vgg for lpips to align with other benchmarks
-    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor 1 \
+    CUDA_VISIBLE_DEVICES=0 python simple_trainer.py mcmc --disable_viewer --data_factor -1 \
         --strategy.cap-max $CAP_MAX \
         --data_dir $SCENE_DIR/$SCENE/ \
         --result_dir $RESULT_DIR/$SCENE/ \
@@ -45,4 +45,4 @@ then
     python benchmarks/compression/summarize_stats.py --results_dir $RESULT_DIR --scenes $SCENE_LIST
 else
     echo "zip command not found, skipping zipping"
-fi
+fi
diff --git a/examples/benchmarks/compression/results/DeepBlending.csv b/examples/benchmarks/compression/results/DeepBlending.csv
@@ -0,0 +1,5 @@
+Submethod,PSNR,SSIM,LPIPS,Size [Bytes],#Gaussians
+,29.417935371398926,0.8983585238456726,0.2698713690042496,6877574.0,360000
+,29.31465244293213,0.8991637229919434,0.2642294019460678,8770896.0,490000
+-1.00M,29.8878116607666,0.9035206139087677,0.25052621215581894,16123593.0,1000000
+,29.644909858703613,0.903471440076828,0.23753593116998672,58272202.5,4000000
diff --git a/examples/benchmarks/compression/results/MipNeRF360.csv b/examples/benchmarks/compression/results/MipNeRF360.csv
@@ -1,5 +1,5 @@
 Submethod,PSNR,SSIM,LPIPS,Size [Bytes],#Gaussians
-,26.64,0.788,0.270,6916294,360000
-,26.88,0.796,0.256,8796870,490000
--1.00M,27.29,0.811,0.229,16038022,1000000
-,27.70,0.825,0.197,57812682,4000000
+,26.694747077094185,0.786249836285909,0.27215222186512417,6912028.777777778,360000
+,26.92844581604004,0.7948330309655931,0.25781087908479905,8767350.777777778,490000
+-1.00M,27.329831653171116,0.8091025948524475,0.23095709085464478,16028623.0,1000000
+,27.795140160454643,0.8232156700558133,0.198530751797888,57659358.222222224,4000000
diff --git a/examples/benchmarks/compression/results/SyntheticNeRF.csv b/examples/benchmarks/compression/results/SyntheticNeRF.csv
@@ -0,0 +1,5 @@
+Submethod,PSNR,SSIM,LPIPS,Size [Bytes],#Gaussians
+,33.50586247444153,0.9701051115989685,0.030247553484514356,7316613.5,360000
+,33.5969717502594,0.9701937362551689,0.029584018629975617,9346813.75,490000
+-1.00M,33.724597454071045,0.9700975641608238,0.029100348940119147,17143747.375,1000000
+,33.32782459259033,0.968658909201622,0.029677038837689906,61105994.625,4000000
diff --git a/examples/benchmarks/compression/results/TanksAndTemples.csv b/examples/benchmarks/compression/results/TanksAndTemples.csv
@@ -1,5 +1,5 @@
 Submethod,PSNR,SSIM,LPIPS,Size [Bytes],#Gaussians
-,23.54,0.838,0.200,6875669,360000
-,23.62,0.845,0.188,8728572,490000
--1.00M,24.03,0.857,0.163,16100628,1000000
-,24.47,0.872,0.132,58239022,4000000
+,23.484140396118164,0.8359003365039825,0.20022188872098923,6814856.5,360000
+,23.68420124053955,0.8424293696880341,0.18749213218688965,8710374.5,490000
+-1.00M,23.996936798095703,0.855468362569809,0.16304801404476166,16065561.5,1000000
+,24.45703887939453,0.8690102994441986,0.13164417818188667,58291533.5,4000000
diff --git a/examples/datasets/colmap.py b/examples/datasets/colmap.py
@@ -28,7 +28,7 @@ def _get_rel_paths(path_dir: str) -> List[str]:
     return paths
 
 
-def _resize_image_folder(image_dir: str, resized_dir: str, factor: int) -> str:
+def _resize_image_folder(image_dir: str, resized_dir: str, factor: float) -> str:
     """Resize image folder."""
     print(f"Downscaling images by {factor}x from {image_dir} to {resized_dir}.")
     os.makedirs(resized_dir, exist_ok=True)
@@ -59,12 +59,11 @@ class Parser:
     def __init__(
         self,
         data_dir: str,
-        factor: int = 1,
+        factor: int = -1,
         normalize: bool = False,
         test_every: int = 8,
     ):
         self.data_dir = data_dir
-        self.factor = factor
         self.normalize = normalize
         self.test_every = test_every
 
@@ -104,7 +103,6 @@ def __init__(
             cam = manager.cameras[camera_id]
             fx, fy, cx, cy = cam.fx, cam.fy, cam.cx, cam.cy
             K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
-            K[:2, :] /= factor
             Ks_dict[camera_id] = K
 
             # Get distortion parameters.
@@ -132,7 +130,10 @@ def __init__(
             ), f"Only perspective and fisheye cameras are supported, got {type_}"
 
             params_dict[camera_id] = params
-            imsize_dict[camera_id] = (cam.width // factor, cam.height // factor)
+            imsize_dict[camera_id] = (
+                cam.width // abs(factor),
+                cam.height // abs(factor),
+            )
             mask_dict[camera_id] = None
         print(
             f"[Parser] {len(imdata)} images, taken by {len(set(camera_ids))} cameras."
@@ -195,9 +196,36 @@ def __init__(
                 colmap_image_dir, image_dir + "_png", factor=factor
             )
             image_files = sorted(_get_rel_paths(image_dir))
+
         colmap_to_image = dict(zip(colmap_files, image_files))
         image_paths = [os.path.join(image_dir, colmap_to_image[f]) for f in image_names]
 
+        # load one image to check the size.
+        actual_image = imageio.imread(image_paths[0])[..., :3]
+        actual_height, actual_width = actual_image.shape[:2]
+
+        # need to check image resolution, side length > 1600 should be downscaled
+        # based on https://github.com/graphdeco-inria/gaussian-splatting/blob/54c035f7834b564019656c3e3fcc3646292f727d/utils/camera_utils.py#L50
+        max_side = max(actual_width, actual_height)
+        global_down = max_side / 1600.0
+
+        if factor == -1 and max_side > 1600:
+            print(
+                "[ INFO ] Encountered quite large input images (>1.6K pixels width), rescaling to 1.6K.\n "
+                "If this is not desired, please explicitly specify '--data_factor' as 1"
+            )
+            factor = global_down
+            image_dir = _resize_image_folder(
+                colmap_image_dir, image_dir + "_1600px", factor=factor
+            )
+            image_files = sorted(_get_rel_paths(image_dir))
+            colmap_to_image = dict(zip(colmap_files, image_files))
+            image_paths = [
+                os.path.join(image_dir, colmap_to_image[f]) for f in image_names
+            ]
+
+        self.factor = factor
+
         # 3D points and {image_name -> [point_idx]}
         points = manager.points3D.astype(np.float32)
         points_err = manager.point3D_errors.astype(np.float32)
@@ -242,18 +270,18 @@ def __init__(
         self.point_indices = point_indices  # Dict[str, np.ndarray], image_name -> [M,]
         self.transform = transform  # np.ndarray, (4, 4)
 
-        # load one image to check the size. In the case of tanksandtemples dataset, the
-        # intrinsics stored in COLMAP corresponds to 2x upsampled images.
-        actual_image = imageio.imread(self.image_paths[0])[..., :3]
-        actual_height, actual_width = actual_image.shape[:2]
         colmap_width, colmap_height = self.imsize_dict[self.camera_ids[0]]
         s_height, s_width = actual_height / colmap_height, actual_width / colmap_width
         for camera_id, K in self.Ks_dict.items():
+            K[:2, :] /= factor
             K[0, :] *= s_width
             K[1, :] *= s_height
             self.Ks_dict[camera_id] = K
             width, height = self.imsize_dict[camera_id]
-            self.imsize_dict[camera_id] = (int(width * s_width), int(height * s_height))
+            self.imsize_dict[camera_id] = (
+                int(width * s_width / global_down),
+                int(height * s_height / global_down),
+            )
 
         # undistortion
         self.mapx_dict = dict()