v0.0.4: Added updated documentation and demo script.

kylevedder · Nov 15, 2023 · 2a34308 · 2a34308
1 parent 0cedf41
commit 2a34308
Show file tree

Hide file tree

Showing 16 changed files with 929 additions and 46 deletions.
diff --git a/README.md b/README.md
@@ -1,33 +1,46 @@
 # Bucketed Scene Flow Evaluation
 
-A standardized dataloader plus eval protocol for various scene flow datasets.
+A standardized dataloader plus eval protocol for scene flow datasets.
 
-## Data Structures:
+Currently supported datasets:
 
-Located in `datastructures/scene_sequence.py`
+ - Argoverse 2
+ - Waymo Open
 
-### `RawSceneSequence`
+## Installation
 
-`RawSceneSequence` describes the raw scene -- raw observations and their global frame poses.
+```
+pip install bucketed-scene-flow-eval
+```
 
-`RawSceneSequence` presents a map interface from `Timestamp` to `RawSceneItem`.
+## Setup
 
-### `QuerySceneSequence`
+Follow our [Getting Started](docs/GETTING_STARTED.md) for setup instructions.
 
-`QuerySceneSequence` is a self-contained description of:
+## Demo
 
- - the raw scene
- - query particles
- - the requested timestamps the prediction method should solve for
+We provide a demo script which shows off the various features of the API.
 
-Query particles are comprised of a series of particles, each associated with a particle id, and a single query timestamp. The query timestamp associates the particles with the requested timestamps. In principle these particles could be at any point in the requested series, although datasets may provide stronger guarantees (e.g. scene flow datasets will have these be the first of two timestamps)
+To run with Argoverse 2:
 
-`QuerySceneSequence` presents a map interface from `ParticleID` to `Tuple[WorldParticle, Timestamp]`.
+```
+python scripts/demo.py --dataset Argoverse2SceneFlow --root_dir /efs/argoverse2/val/
+```
 
-### `EstimatedParticleTrajectories`
+To run with Waymo Open:
 
-`EstimatedParticleTrajectories` describes trajectories for every `ParticleID` over the given timestamps.
+```
+python scripts/demo.py --dataset WaymoOpenSceneFlow --root_dir /efs/waymo_open_processed_flow/validation/
+```
 
-### `EstimatedParticleTrajectories`
+## Evaluating AV2 flow submissions
 
-`EstimatedParticleTrajectories` describes trajectories for every `ParticleID` over the given timestamps, along with semantic class IDs for each particle.
+To evaluate an AV2 Scene Flow challenge entry named `./submission_val.zip` against validation dataset masks `/efs/argoverse2/val_official_masks.zip`, run
+
+```
+python scripts/av2_eval.py /efs/argoverse2/val /efs/argoverse2/val_official_masks.zip ./submission_val.zip
+```
+
+## Documentation
+
+See `docs/` for more documentation .
diff --git a/bucketed_scene_flow_eval/datasets/__init__.py b/bucketed_scene_flow_eval/datasets/__init__.py
@@ -2,18 +2,15 @@
 from bucketed_scene_flow_eval.datasets.argoverse2 import Argoverse2SceneFlow
 from bucketed_scene_flow_eval.datasets.waymoopen import WaymoOpenSceneFlow
 
-__all__ = ["Argoverse2SceneFlow", "WaymoOpenSceneFlow"]
-dataset_names = [cls.lower() for cls in __all__]
+importable_classes = [Argoverse2SceneFlow, WaymoOpenSceneFlow]
+name_to_class_lookup = {cls.__name__.lower(): cls for cls in importable_classes}
+
 
 
 def construct_dataset(name: str, args: dict):
     name = name.lower()
-    all_lookup: Dict[str, str] = {cls.lower(): cls for cls in __all__}
-    if name not in all_lookup:
+    if name not in name_to_class_lookup:
         raise ValueError(f"Unknown dataset name: {name}")
 
-    cls_name = all_lookup[name]
-    # Convert cls_name string to class object using getattr
-    print("Importing: ", __import__(__name__), cls_name)
-    cls = getattr(__import__(__name__), cls_name)
+    cls = name_to_class_lookup[name]
     return cls(**args)
diff --git a/bucketed_scene_flow_eval/datasets/argoverse2/argoverse_raw_data.py b/bucketed_scene_flow_eval/datasets/argoverse2/argoverse_raw_data.py
@@ -139,6 +139,13 @@ def _load_camera_ego_pose(self, sensor_name: str) -> SE3:
         tz = params["tz_m"]
         rotation = self._quat_to_mat(qw, qx, qy, qz)
         translation = np.array([tx, ty, tz])
+
+        coordinate_transform_matrix = np.array([[ 0, -1,  0],  # noqa
+                                                [ 0,  0, -1],  # noqa
+                                                [ 1,  0,  0]]) # noqa
+
+        rotation = rotation @ coordinate_transform_matrix
+
         return SE3(rotation_matrix=rotation, translation=translation)
 
     def _load_ground_height_raster(self):
@@ -260,6 +267,8 @@ def _load_rgb(self, idx) -> RGBImage:
         # Read the image, keep the same color space
         raw_img = cv2.imread(str(rgb_path), cv2.IMREAD_UNCHANGED).astype(
             np.float32) / 255.0
+        # Convert from CV2 standard BGR to RGB
+        raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB)
         return RGBImage(raw_img)
 
     def _load_pose(self, idx) -> SE3:

diff --git a/bucketed_scene_flow_eval/datastructures/camera_projection.py b/bucketed_scene_flow_eval/datastructures/camera_projection.py
@@ -1,6 +1,8 @@
 from enum import Enum
 import numpy as np
-
+from typing import Tuple
+from .rgb_image import RGBImage
+from .pointcloud import PointCloud
 
 class CameraModel(Enum):
     PINHOLE = 1
@@ -23,6 +25,19 @@ def __init__(self, fx: float, fy: float, cx: float, cy: float,
         self.cy = cy
         self.camera_model = camera_model
 
+    def __repr__(self) -> str:
+        return f"CameraProjection(fx={self.fx}, fy={self.fy}, cx={self.cx}, cy={self.cy}, camera_model={self.camera_model})"
+
+    def image_to_image_plane_pc(self, image : RGBImage, depth : float = 1.0) -> Tuple[PointCloud, np.ndarray]:
+        # Make pixel coordinate grid
+        image_shape = image.image.shape[:2]
+        image_coordinates = np.stack(np.meshgrid(np.arange(image_shape[1]), np.arange(image_shape[0])), axis=2).astype(np.float32).reshape(-1, 2)
+        image_coordinate_depths = np.ones((len(image_coordinates), 1)) * depth
+
+        resulting_points = self.to_camera(image_coordinates, image_coordinate_depths)
+        colors = image.image.reshape(-1, 3)
+        return PointCloud(resulting_points), colors
+
     def _camera_to_view_coordinates(self, camera_points: np.ndarray):
         assert len(camera_points.shape) == 2, \
             f"camera_points must have shape (N, 3), got {camera_points.shape}"
@@ -49,24 +64,24 @@ def _view_to_camera_coordinates(self, view_points: np.ndarray):
 
         return view_points @ view_T_camera
 
-    def to_pixels(self, camera_points: np.ndarray):
+    def view_frame_to_pixels(self, view_points: np.ndarray):
         """
         Input: camera_frame_ego_points of shape (N, 3)
         
-        Expects the camera frame ego points to be in right hand coordinates, with 
-        the camera looking down the positive X axis.
+        Expects the view frame ego points to be in sensor coordinates, with
+        the sensor looking down the positive Z axis, positive X being right, 
+        and positive Y being down.
 
         Output: image_points of shape (N, 2)
 
         The image frame is defined as follows:
         0,0 is the top left corner
         """
-        assert len(camera_points.shape) == 2, \
-            f"camera_points must have shape (N, 3), got {camera_points.shape}"
-        assert camera_points.shape[1] == 3, \
-            f"camera_points must have shape (N, 3), got {camera_points.shape}"
 
-        view_points = self._camera_to_view_coordinates(camera_points)
+        assert len(view_points.shape) == 2, \
+            f"view_points must have shape (N, 3), got {view_points.shape}"
+        assert view_points.shape[1] == 3, \
+            f"view_points must have shape (N, 3), got {view_points.shape}"
 
         K = np.array([
             [self.fx, 0, self.cx],
@@ -79,6 +94,28 @@ def to_pixels(self, camera_points: np.ndarray):
 
         return pixel_points_2d
 
+    def camera_frame_to_pixels(self, camera_points: np.ndarray):
+        """
+        Input: camera_frame_ego_points of shape (N, 3)
+        
+        Expects the camera frame ego points to be in right hand coordinates, with 
+        the camera looking down the positive X axis.
+
+        Output: image_points of shape (N, 2)
+
+        The image frame is defined as follows:
+        0,0 is the top left corner
+        """
+        assert len(camera_points.shape) == 2, \
+            f"camera_points must have shape (N, 3), got {camera_points.shape}"
+        assert camera_points.shape[1] == 3, \
+            f"camera_points must have shape (N, 3), got {camera_points.shape}"
+
+        view_points = self._camera_to_view_coordinates(camera_points)
+        return self.view_frame_to_pixels(view_points)
+
+
+
     def to_camera(self, pixel_coordinates, pixel_coordinate_depths):
         """
         Input: pixel_coordinates of shape (N, 2)

diff --git a/bucketed_scene_flow_eval/datastructures/o3d_visualizer.py b/bucketed_scene_flow_eval/datastructures/o3d_visualizer.py
@@ -1,6 +1,6 @@
 import open3d as o3d
 from bucketed_scene_flow_eval.datastructures import PointCloud, SE3
-from typing import Tuple, List, Dict, Union
+from typing import Tuple, List, Dict, Union, Optional
 import numpy as np
 
 
@@ -36,11 +36,17 @@ def add_pc_frame(self,
     def add_pointcloud(self,
                        pc: PointCloud,
                        pose: SE3 = SE3.identity(),
-                       color: Union[Tuple[float, float, float], None] = None):
+                       color: Optional[Union[np.ndarray, Tuple[float, float, float], List[Tuple[float, float, float]]]] = None):
         pc = pc.transform(pose)
         pc = pc.to_o3d()
         if color is not None:
-            pc = pc.paint_uniform_color(color)
+            color = np.array(color)
+            if color.ndim == 1:
+                pc = pc.paint_uniform_color(color)
+            elif color.ndim == 2:
+                assert len(color) == len(
+                    pc.points), f"Expected color to have length {len(pc.points)}, got {len(color)} instead"
+                pc.colors = o3d.utility.Vector3dVector(color)
         self.add_geometry(pc)
 
     def add_sphere(self, location: np.ndarray, radius: float,
@@ -128,5 +134,3 @@ def run(self):
         ctr.set_lookat([0, 0, 0])
         self.vis.run()
 
-    def destroy(self):
-        self.vis.destroy_window()
diff --git a/bucketed_scene_flow_eval/datastructures/pointcloud.py b/bucketed_scene_flow_eval/datastructures/pointcloud.py
@@ -1,6 +1,7 @@
 import numpy as np
 import open3d as o3d
-from bucketed_scene_flow_eval.datastructures import SE3, CameraProjection
+from typing import Optional
+from bucketed_scene_flow_eval.datastructures import SE3
 
 
 def to_fixed_array(array: np.ndarray,
@@ -85,7 +86,7 @@ def __getitem__(self, idx):
 
     @staticmethod
     def from_depth_image(depth: np.ndarray,
-                         camera_projection: CameraProjection) -> 'PointCloud':
+                         camera_projection: 'CameraProjection') -> 'PointCloud':
         assert depth.ndim == 2, f'depth must be a 2D array, got {depth.ndim}'
         image_coordinates = make_image_pixel_coordinate_grid(depth.shape)
         image_coordinate_depths = depth.reshape(-1, 1)
@@ -98,7 +99,7 @@ def from_depth_image(depth: np.ndarray,
     @staticmethod
     def from_points_and_depth(
             image_coordinates: np.ndarray, image_coordinate_depths: np.ndarray,
-            camera_projection: CameraProjection) -> 'PointCloud':
+            camera_projection: 'CameraProjection') -> 'PointCloud':
         return PointCloud(
             camera_projection.to_camera(image_coordinates,
                                         image_coordinate_depths))
@@ -188,5 +189,5 @@ def within_region(self, x_min, x_max, y_min, y_max, z_min,
     def shape(self) -> tuple:
         return self.points.shape
 
-    def to_o3d(self):
+    def to_o3d(self) -> o3d.geometry.PointCloud:
         return o3d.geometry.PointCloud(o3d.utility.Vector3dVector(self.points))
diff --git a/build.sh b/build.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+rm -rf ./dist/
+python3 -m build
+python3 -m twine upload --repository pypi dist/*