diff --git a/README.md b/README.md index f945249..c9ec662 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Currently supported datasets: - Argoverse 2 (Human Labeled and [NSFP Pseudolabeled](https://github.com/kylevedder/BucketedSceneFlowEval/blob/master/docs/GETTING_STARTED.md#argoverse-2-nsfp-pseudolabels-new)) - Waymo Open (LiDAR only) + - NuScenes (LiDAR only, beta) If you use this repository as part of a publication, please cite: diff --git a/bucketed_scene_flow_eval/datasets/__init__.py b/bucketed_scene_flow_eval/datasets/__init__.py index 8007086..66dc8c0 100644 --- a/bucketed_scene_flow_eval/datasets/__init__.py +++ b/bucketed_scene_flow_eval/datasets/__init__.py @@ -6,11 +6,18 @@ WaymoOpenCausalSceneFlow, WaymoOpenNonCausalSceneFlow, ) + +from bucketed_scene_flow_eval.datasets.nuscenes import ( + NuScenesCausalSceneFlow, + NuScenesNonCausalSceneFlow, +) from bucketed_scene_flow_eval.interfaces import AbstractDataset importable_classes = [ Argoverse2CausalSceneFlow, Argoverse2NonCausalSceneFlow, + NuScenesCausalSceneFlow, + NuScenesNonCausalSceneFlow, WaymoOpenCausalSceneFlow, WaymoOpenNonCausalSceneFlow, ] diff --git a/bucketed_scene_flow_eval/datasets/argoverse2/__init__.py b/bucketed_scene_flow_eval/datasets/argoverse2/__init__.py index bb41a8e..d973aa7 100644 --- a/bucketed_scene_flow_eval/datasets/argoverse2/__init__.py +++ b/bucketed_scene_flow_eval/datasets/argoverse2/__init__.py @@ -1,8 +1,6 @@ from .argoverse_raw_data import ( - DEFAULT_POINT_CLOUD_RANGE, ArgoverseRawSequence, ArgoverseRawSequenceLoader, - PointCloudRange, ) from .argoverse_scene_flow import ( ArgoverseNoFlowSequence, diff --git a/bucketed_scene_flow_eval/datasets/argoverse2/argoverse_raw_data.py b/bucketed_scene_flow_eval/datasets/argoverse2/argoverse_raw_data.py index 6c37d4d..723c1af 100644 --- a/bucketed_scene_flow_eval/datasets/argoverse2/argoverse_raw_data.py +++ b/bucketed_scene_flow_eval/datasets/argoverse2/argoverse_raw_data.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from pathlib import Path -from typing import Any, Optional +from typing import Optional import cv2 import numpy as np @@ -196,7 +196,7 @@ def __init__( expected_camera_shape=(1550, 2048, 3), ): self.log_id = log_id - self.POINT_CLOUD_RANGE = point_cloud_range + self.point_cloud_range = point_cloud_range self.dataset_dir = Path(dataset_dir) assert self.dataset_dir.is_dir(), f"dataset_dir {dataset_dir} does not exist" @@ -418,14 +418,14 @@ def is_ground_points(self, global_point_cloud: PointCloud) -> np.ndarray: return is_ground_boolean_arr def is_in_range(self, global_point_cloud: PointCloud) -> MaskArray: - if self.POINT_CLOUD_RANGE is None: + if self.point_cloud_range is None: return np.ones(len(global_point_cloud), dtype=bool) - xmin = self.POINT_CLOUD_RANGE[0] - ymin = self.POINT_CLOUD_RANGE[1] - zmin = self.POINT_CLOUD_RANGE[2] - xmax = self.POINT_CLOUD_RANGE[3] - ymax = self.POINT_CLOUD_RANGE[4] - zmax = self.POINT_CLOUD_RANGE[5] + xmin = self.point_cloud_range[0] + ymin = self.point_cloud_range[1] + zmin = self.point_cloud_range[2] + xmax = self.point_cloud_range[3] + ymax = self.point_cloud_range[4] + zmax = self.point_cloud_range[5] return global_point_cloud.within_region_mask(xmin, xmax, ymin, ymax, zmin, zmax) def __repr__(self) -> str: diff --git a/bucketed_scene_flow_eval/datasets/argoverse2/argoverse_scene_flow.py b/bucketed_scene_flow_eval/datasets/argoverse2/argoverse_scene_flow.py index c99b030..96fad5e 100644 --- a/bucketed_scene_flow_eval/datasets/argoverse2/argoverse_scene_flow.py +++ b/bucketed_scene_flow_eval/datasets/argoverse2/argoverse_scene_flow.py @@ -1,5 +1,3 @@ -import copy -import dataclasses from pathlib import Path from typing import Optional, Union @@ -9,7 +7,6 @@ EgoLidarFlow, MaskArray, PointCloud, - PointCloudFrame, SemanticClassId, SemanticClassIdArray, SupervisedPointCloudFrame, @@ -24,7 +21,7 @@ ) from bucketed_scene_flow_eval.utils.loaders import load_feather -from . import DEFAULT_POINT_CLOUD_RANGE, ArgoverseRawSequence, PointCloudRange +from bucketed_scene_flow_eval.datasets.argoverse2.argoverse_raw_data import DEFAULT_POINT_CLOUD_RANGE, ArgoverseRawSequence, PointCloudRange CATEGORY_MAP = { -1: "BACKGROUND", diff --git a/bucketed_scene_flow_eval/datasets/argoverse2/dataset.py b/bucketed_scene_flow_eval/datasets/argoverse2/dataset.py index 71fd4b2..7cf0eb4 100644 --- a/bucketed_scene_flow_eval/datasets/argoverse2/dataset.py +++ b/bucketed_scene_flow_eval/datasets/argoverse2/dataset.py @@ -1,6 +1,6 @@ import copy from pathlib import Path -from typing import Any, Optional, Union +from typing import Optional, Union from bucketed_scene_flow_eval.datastructures import * from bucketed_scene_flow_eval.eval import ( @@ -9,7 +9,6 @@ ThreeWayEPEEvaluator, ) from bucketed_scene_flow_eval.interfaces import ( - AbstractAVLidarSequence, CausalSeqLoaderDataset, EvalType, NonCausalSeqLoaderDataset, diff --git a/bucketed_scene_flow_eval/datasets/nuscenes/__init__.py b/bucketed_scene_flow_eval/datasets/nuscenes/__init__.py index 9e3d93c..206aed2 100644 --- a/bucketed_scene_flow_eval/datasets/nuscenes/__init__.py +++ b/bucketed_scene_flow_eval/datasets/nuscenes/__init__.py @@ -1 +1,22 @@ -from .nuscenes_loader import NuScenesLoader, NuScenesSequence +from .nuscenes_raw_data import ( + NuScenesRawSequence, + NuScenesRawSequenceLoader, +) +from .nuscenes_scene_flow import ( + NuScenesNoFlowSequence, + NuScenesNoFlowSequenceLoader, + NuScenesSceneFlowSequence, + NuScenesSceneFlowSequenceLoader +) +from .dataset import NuScenesCausalSceneFlow, NuScenesNonCausalSceneFlow + +__all__ = [ + "NuScenesCausalSceneFlow", + "NuScenesNonCausalSceneFlow", + "NuScenesNoFlowSequence", + "NuScenesNoFlowSequenceLoader", + "NuScenesRawSequence", + "NuScenesRawSequenceLoader", + "NuScenesSceneFlowSequence", + "NuScenesSceneFlowSequenceLoader", +] diff --git a/bucketed_scene_flow_eval/datasets/nuscenes/dataset.py b/bucketed_scene_flow_eval/datasets/nuscenes/dataset.py new file mode 100644 index 0000000..31313c6 --- /dev/null +++ b/bucketed_scene_flow_eval/datasets/nuscenes/dataset.py @@ -0,0 +1,134 @@ +import copy +from pathlib import Path +from typing import Optional, Union + +from bucketed_scene_flow_eval.datastructures import * +from bucketed_scene_flow_eval.eval import ( + BucketedEPEEvaluator, + Evaluator, + ThreeWayEPEEvaluator, +) +from bucketed_scene_flow_eval.interfaces import ( + CausalSeqLoaderDataset, + EvalType, + NonCausalSeqLoaderDataset, +) + +from bucketed_scene_flow_eval.datasets.argoverse2.argoverse_raw_data import DEFAULT_POINT_CLOUD_RANGE, PointCloudRange +from .nuscenes_scene_flow import ( + CATEGORY_MAP, + NuScenesNoFlowSequenceLoader, + NuScenesSceneFlowSequenceLoader, +) +from .nuscenes_metacategories import BUCKETED_METACATAGORIES, THREEWAY_EPE_METACATAGORIES + + +def _make_evaluator(eval_type: EvalType, eval_args: dict) -> Evaluator: + eval_args_copy = copy.deepcopy(eval_args) + # Builds the evaluator object for this dataset. + if eval_type == EvalType.BUCKETED_EPE: + if "meta_class_lookup" not in eval_args_copy: + eval_args_copy["meta_class_lookup"] = BUCKETED_METACATAGORIES + if "class_id_to_name" not in eval_args_copy: + eval_args_copy["class_id_to_name"] = CATEGORY_MAP + return BucketedEPEEvaluator(**eval_args_copy) + elif eval_type == EvalType.THREEWAY_EPE: + if "meta_class_lookup" not in eval_args_copy: + eval_args_copy["meta_class_lookup"] = THREEWAY_EPE_METACATAGORIES + if "class_id_to_name" not in eval_args_copy: + eval_args_copy["class_id_to_name"] = CATEGORY_MAP + return ThreeWayEPEEvaluator(**eval_args_copy) + else: + raise ValueError(f"Unknown eval type {eval_type}") + + +class NuScenesCausalSceneFlow(CausalSeqLoaderDataset): + def __init__( + self, + root_dir: Union[Path, list[Path]], + nuscenes_version: str, + subsequence_length: int = 2, + with_ground: bool = True, + with_rgb: bool = False, + cache_root: Path = Path("/tmp/"), + use_gt_flow: bool = True, + flow_data_path: Optional[Union[Path, list[Path]]] = None, + eval_type: str = "bucketed_epe", + eval_args=dict(), + expected_camera_shape: tuple[int, int, int] = (1550, 2048, 3), + point_cloud_range: Optional[PointCloudRange] = DEFAULT_POINT_CLOUD_RANGE, + use_cache=True, + load_flow: bool = True, + ) -> None: + if load_flow: + self.sequence_loader = NuScenesSceneFlowSequenceLoader( + raw_data_path=root_dir, + nuscenes_version=nuscenes_version, + with_rgb=with_rgb, + use_gt_flow=use_gt_flow, + flow_data_path=flow_data_path, + expected_camera_shape=expected_camera_shape, + point_cloud_range=point_cloud_range, + ) + else: + self.sequence_loader = NuScenesNoFlowSequenceLoader( + raw_data_path=root_dir, + nuscenes_version=nuscenes_version, + with_rgb=with_rgb, + expected_camera_shape=expected_camera_shape, + point_cloud_range=point_cloud_range, + ) + super().__init__( + sequence_loader=self.sequence_loader, + subsequence_length=subsequence_length, + with_ground=with_ground, + idx_lookup_cache_root=cache_root, + eval_type=eval_type, + eval_args=eval_args, + use_cache=use_cache, + ) + + def evaluator(self) -> Evaluator: + return _make_evaluator(self.eval_type, self.eval_args) + + +class NuScenesNonCausalSceneFlow(NonCausalSeqLoaderDataset): + def __init__( + self, + root_dir: Union[Path, list[Path]], + subsequence_length: int = 2, + with_ground: bool = True, + with_rgb: bool = False, + cache_root: Path = Path("/tmp/"), + use_gt_flow: bool = True, + flow_data_path: Optional[Union[Path, list[Path]]] = None, + eval_type: str = "bucketed_epe", + eval_args=dict(), + expected_camera_shape: tuple[int, int, int] = (1550, 2048, 3), + use_cache=True, + load_flow: bool = True, + ) -> None: + if load_flow: + self.sequence_loader = NuScenesSceneFlowSequenceLoader( + root_dir, + with_rgb=with_rgb, + use_gt_flow=use_gt_flow, + flow_data_path=flow_data_path, + expected_camera_shape=expected_camera_shape, + ) + else: + self.sequence_loader = NuScenesNoFlowSequenceLoader( + root_dir, with_rgb=with_rgb, expected_camera_shape=expected_camera_shape + ) + super().__init__( + sequence_loader=self.sequence_loader, + subsequence_length=subsequence_length, + with_ground=with_ground, + idx_lookup_cache_root=cache_root, + eval_type=eval_type, + eval_args=eval_args, + use_cache=use_cache, + ) + + def evaluator(self) -> Evaluator: + return _make_evaluator(self.eval_type, self.eval_args) diff --git a/bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_metacategories.py b/bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_metacategories.py new file mode 100644 index 0000000..03ea973 --- /dev/null +++ b/bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_metacategories.py @@ -0,0 +1,48 @@ +BACKGROUND_CATEGORIES = ["background"] + +# These catagories are ignored because of labeling oddities +STATIC_OBJECTS = [ + "movable_object.barrier", + "movable_object.debris", + "movable_object.pushable_pullable", + "movable_object.trafficcone", + "static_object.bicycle_rack", +] + +PEDESTRIAN_CATEGORIES = [ + "animal", + "human.pedestrian.adult", + "human.pedestrian.child", + "human.pedestrian.construction_worker", + "human.pedestrian.personal_mobility", + "human.pedestrian.police_officer", + "human.pedestrian.stroller", + "human.pedestrian.wheelchair", +] + +WHEELED_VRU = ["vehicle.bicycle", "vehicle.motorcycle"] + +CAR = ["vehicle.car"] + +OTHER_VEHICLES = [ + "vehicle.bus.bendy", + "vehicle.bus.rigid", + "vehicle.construction", + "vehicle.emergency.ambulance", + "vehicle.emergency.police", + "vehicle.trailer", + "vehicle.truck", +] + +BUCKETED_METACATAGORIES = { + "BACKGROUND": BACKGROUND_CATEGORIES, + "CAR": CAR, + "PEDESTRIAN": PEDESTRIAN_CATEGORIES, + "WHEELED_VRU": WHEELED_VRU, + "OTHER_VEHICLES": OTHER_VEHICLES, +} + +THREEWAY_EPE_METACATAGORIES = { + "BACKGROUND": BACKGROUND_CATEGORIES, + "FOREGROUND": PEDESTRIAN_CATEGORIES + WHEELED_VRU + CAR + OTHER_VEHICLES, +} diff --git a/bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_loader.py b/bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_raw_data.py similarity index 55% rename from bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_loader.py rename to bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_raw_data.py index 35affb7..a52d7cb 100644 --- a/bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_loader.py +++ b/bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_raw_data.py @@ -1,14 +1,18 @@ -import time from dataclasses import dataclass from pathlib import Path -from typing import Any, Optional, Union +from typing import Union import numpy as np -from nuscenes.nuscenes import NuScenes +from numpy.typing import NDArray from nuscenes.utils.data_classes import LidarPointCloud as NuscLidarPointCloud from PIL import Image from pyquaternion import Quaternion +import open3d as o3d +from bucketed_scene_flow_eval.datasets.argoverse2.argoverse_raw_data import ( + PointCloudRange, + DEFAULT_POINT_CLOUD_RANGE, +) from bucketed_scene_flow_eval.datastructures import ( SE3, CameraModel, @@ -20,11 +24,12 @@ RGBFrameLookup, RGBImage, TimeSyncedAVLidarData, - TimeSyncedBaseAuxilaryData, TimeSyncedRawFrame, ) from bucketed_scene_flow_eval.interfaces import AbstractSequence, CachedSequenceLoader +from .nuscenes_utils import NuScenesWithInstanceBoxes + NuscDict = dict[str, Union[str, int, list]] NuscSample = dict[str, NuscDict] NuscSampleData = NuscDict @@ -33,20 +38,26 @@ import matplotlib.pyplot as plt +MaskArray = NDArray[np.bool_] + @dataclass class NuScenesSyncedSampleData: cam_front: NuscSampleData lidar_top: NuscSampleData - def _load_pointcloud(self, nusc: NuScenes, pointsensor: NuscSampleData) -> PointCloud: + def _load_pointcloud( + self, nusc: NuScenesWithInstanceBoxes, pointsensor: NuscSampleData + ) -> PointCloud: pc_path = Path(nusc.dataroot) / pointsensor["filename"] # type: ignore assert pc_path.is_file(), f"pointcloud file {pc_path} does not exist" nusc_lidar_pc = NuscLidarPointCloud.from_file(str(pc_path.absolute())) xyz_points = nusc_lidar_pc.points[:3].T return PointCloud(xyz_points) - def _load_poseinfo(self, nusc: NuScenes, sample_data: NuscSampleData) -> PoseInfo: + def _load_poseinfo( + self, nusc: NuScenesWithInstanceBoxes, sample_data: NuscSampleData + ) -> PoseInfo: # Load Sensor to Ego sensor: NuscCalibratedSensor = nusc.get( "calibrated_sensor", sample_data["calibrated_sensor_token"] @@ -58,7 +69,7 @@ def _load_poseinfo(self, nusc: NuScenes, sample_data: NuscSampleData) -> PoseInf rotation_matrix=sensor_to_ego_rotation.rotation_matrix, ) - # Load Ego to World + # Load Ego to World (map) poserecord: NuscEgoPose = nusc.get("ego_pose", sample_data["ego_pose_token"]) ego_to_world_translation = np.array(poserecord["translation"]) ego_to_world_rotation = Quaternion(poserecord["rotation"]) @@ -72,7 +83,7 @@ def _load_poseinfo(self, nusc: NuScenes, sample_data: NuscSampleData) -> PoseInf ego_to_global=ego_to_world_se3, ) - def _load_rgb(self, nusc: NuScenes, sample_data: NuscSampleData) -> RGBImage: + def _load_rgb(self, nusc: NuScenesWithInstanceBoxes, sample_data: NuscSampleData) -> RGBImage: data_path = nusc.get_sample_data_path(sample_data["token"]) data = Image.open(data_path) np_data_uint8 = np.array(data) @@ -80,7 +91,7 @@ def _load_rgb(self, nusc: NuScenes, sample_data: NuscSampleData) -> RGBImage: return RGBImage(np_data_float32) def _load_camera_projection( - self, nusc: NuScenes, sample_data: NuscSampleData + self, nusc: NuScenesWithInstanceBoxes, sample_data: NuscSampleData ) -> CameraProjection: sensor: NuscCalibratedSensor = nusc.get( "calibrated_sensor", sample_data["calibrated_sensor_token"] @@ -98,20 +109,20 @@ def _load_camera_projection( camera_model=CameraModel.PINHOLE, ) - def lidar_to_pc_frame(self, nusc: NuScenes) -> PointCloudFrame: + def lidar_to_pc_frame(self, nusc: NuScenesWithInstanceBoxes) -> PointCloudFrame: pc = self._load_pointcloud(nusc, self.lidar_top) pose_info = self._load_poseinfo(nusc, self.lidar_top) mask = np.ones(pc.points.shape[0], dtype=bool) return PointCloudFrame(pc, pose_info, mask) - def camera_to_rgb_frame_lookup(self, nusc: NuScenes) -> RGBFrameLookup: + def camera_to_rgb_frame_lookup(self, nusc: NuScenesWithInstanceBoxes) -> RGBFrameLookup: rgb = self._load_rgb(nusc, self.cam_front) pose_info = self._load_poseinfo(nusc, self.cam_front) camera_projection = self._load_camera_projection(nusc, self.cam_front) cam_front_rgb_frame = RGBFrame(rgb, pose_info, camera_projection) return RGBFrameLookup({"cam_front": cam_front_rgb_frame}, ["cam_front"]) - def cam_front_with_points(self, nusc: NuScenes): + def cam_front_with_points(self, nusc: NuScenesWithInstanceBoxes): pointsensor_token = self.lidar_top["token"] camera_token = self.cam_front["token"] points, depth, im = nusc.explorer.map_pointcloud_to_image( @@ -123,12 +134,10 @@ def cam_front_with_points(self, nusc: NuScenes): plt.scatter(points[0, :], points[1, :], c=depth, s=1) plt.show() - breakpoint() - -class NuScenesSequence(AbstractSequence): +class NuScenesRawSequence(AbstractSequence): """ - Argoverse Raw Sequence. + NuScenes Raw Sequence. Every sequence is a collection of frames. Unfortunately, RGB cameras are not perfectly synced with the lidar frames, so we have to pull the most recent RGB image as a first order @@ -137,22 +146,25 @@ class NuScenesSequence(AbstractSequence): def __init__( self, - nusc: NuScenes, + nusc: NuScenesWithInstanceBoxes, log_id: str, scene_info: NuscDict, verbose: bool = False, + with_rgb: bool = False, + point_cloud_range: PointCloudRange | None = DEFAULT_POINT_CLOUD_RANGE, ): self.nusc = nusc self.scene_info = scene_info self.log_id = log_id - - sensor_to_sample_data_table = self._sensor_to_sample_data_table( + self.point_cloud_range = point_cloud_range + self.with_rgb = with_rgb + self.sensor_to_sample_data_table = self._sensor_to_sample_data_table( scene_info, ["CAM_FRONT", "LIDAR_TOP"] ) self.synced_sensors = self._sync_sensors( - sensor_to_sample_data_table["CAM_FRONT"], - sensor_to_sample_data_table["LIDAR_TOP"], + self.sensor_to_sample_data_table["CAM_FRONT"], + self.sensor_to_sample_data_table["LIDAR_TOP"], ) def _sync_sensors( @@ -216,7 +228,7 @@ def _load_cam_data(self, cam_data: NuscSampleData) -> RGBImage: # Render the camera self.nusc.render_sample_data(cam_data["token"]) - # Load the calibarted sensor + # Load the calibrated sensor sensor: NuscCalibratedSensor = self.nusc.get( "calibrated_sensor", cam_data["calibrated_sensor_token"] ) @@ -228,14 +240,93 @@ def _load_cam_data(self, cam_data: NuscSampleData) -> RGBImage: np_data_float32 = np_data_uint8.astype(np.float32) / 255.0 return RGBImage(np_data_float32) + def _compute_ground_mask(self, pc_frame: PointCloudFrame) -> MaskArray: + """Given a point cloud frame, compute the ground plane. + + Args: + pc_frame: a PointCloudFrame object + Returns: + is_ground_mask: true if the index is part of the ground plane, false otherwise + """ + # Consider only points with Z values close to 0 otherwise RANSAC may produce planes that are not actually on the ground + points_close_to_ground_mask = pc_frame.full_ego_pc.within_region_mask( + -1000, 1000, -1000, 1000, -2, 0.5 + ) + points_close_to_ground = pc_frame.full_ego_pc.mask_points(points_close_to_ground_mask) + # Open3D's RANSAC returns a list of indices (in the input point cloud) of inliers in the plane + o3d.utility.random.seed( + 42 + ) # We set the seed so that the ground plane extraction is deterministic + _, ground_plane_inliers = points_close_to_ground.to_o3d().segment_plane( + distance_threshold=0.2, ransac_n=3, num_iterations=100, probability=1.0 + ) + # Since we passed in only the points close to the ground to the RANSAC segmentation, the indices are with respect to points_close_to_ground and not the original point cloud + # In order to get the indices of the points in the ground plane with respect to the original point cloud we must first conver the indices to a mask + actual_ground_in_points_close_to_ground_mask = np.zeros( + (points_close_to_ground.shape[0]), dtype=bool + ) + actual_ground_in_points_close_to_ground_mask[ground_plane_inliers] = ( + 1 # Convert indices to a binary mask of points_close_to_ground + ) + # Create another mask to represent the final ground mask with respect to the full point cloud + is_ground_mask = np.zeros_like( + points_close_to_ground_mask, dtype=bool + ) # This has the same shape as the original point cloud + # This line does all the heavy lifting. is_ground_mask[points_close_to_ground_mask] takes a mask with the same size of the original point cloud + # and crops it to only the indices that were in our Z-height cropped pc. Then we set those indices to be equal to their value in the inlier mask from RANSAC. + is_ground_mask[points_close_to_ground_mask] = actual_ground_in_points_close_to_ground_mask + return is_ground_mask + + def _in_range_mask(self, global_point_cloud: PointCloud) -> MaskArray: + if self.point_cloud_range is None: + return np.ones(len(global_point_cloud), dtype=bool) + xmin = self.point_cloud_range[0] + ymin = self.point_cloud_range[1] + zmin = self.point_cloud_range[2] + xmax = self.point_cloud_range[3] + ymax = self.point_cloud_range[4] + zmax = self.point_cloud_range[5] + return global_point_cloud.within_region_mask(xmin, xmax, ymin, ymax, zmin, zmax) + + def _load_pose(self, idx: int) -> PoseInfo: + """Returns the PoseInfo object for the LiDAR data at a given index. Importantly there is NO RELATIVE INDEX for this function. + + Thus the "global" transformation contains the EGO -> MAP FRAME transformation. + """ + sample = self.synced_sensors[idx] + return sample._load_poseinfo(self.nusc, sample.lidar_top) + def load( - self, idx: int, relative_to_idx: int - ) -> tuple[TimeSyncedRawFrame, TimeSyncedBaseAuxilaryData]: + self, + idx: int, + relative_to_idx: int, + ) -> tuple[TimeSyncedRawFrame, TimeSyncedAVLidarData]: + """The load function loads a sample at some index, relative to another. + + Concretely, the nuScenes API convention is that there are 3 frames. Sensor frame (origin = the sensor), ego frame (origin = vehicle) and global frame (origin = some point on the map) + For the purposes of scene flow, we want our "global frame" origin to be the ego pose of a different sample in the sequence. + We abuse the terminology a bit and temporarily stored the ego -> map transform in the "global" field of the PoseInfo object. + + To summarize. When the sample is loaded from the nuScenes API using `lidar_to_pc_frame` global = map. + But after this load function is called, we replace the value such that global = the ego pose at the relative idx. + + Args: + idx: the sample to load + relative_to_idx: the origin of the sequence, all point clouds can be represented in a frame relative to this one + """ + assert 0 <= idx < len(self), f"idx must be in range [0, {len(self)}), got {idx}" synced_sample = self.synced_sensors[idx] pc_frame = synced_sample.lidar_to_pc_frame(self.nusc) - rgb_frames = synced_sample.camera_to_rgb_frame_lookup(self.nusc) + if self.with_rgb: + rgb_frames = synced_sample.camera_to_rgb_frame_lookup(self.nusc) + else: + rgb_frames = RGBFrameLookup({}, []) + + start_ego_pose = self._load_pose(relative_to_idx).ego_to_global + relative_pose = start_ego_pose.inverse().compose(pc_frame.pose.ego_to_global) + pc_frame.pose.ego_to_global = relative_pose return ( TimeSyncedRawFrame( @@ -246,8 +337,8 @@ def load( log_timestamp=idx, ), TimeSyncedAVLidarData( - is_ground_points=np.zeros(len(pc_frame.pc), dtype=bool), - in_range_mask=np.ones(len(pc_frame.pc), dtype=bool), + is_ground_points=self._compute_ground_mask(pc_frame), + in_range_mask=self._in_range_mask(pc_frame.global_pc), ), ) @@ -255,20 +346,25 @@ def __len__(self): return len(self.synced_sensors) -class NuScenesLoader(CachedSequenceLoader): +class NuScenesRawSequenceLoader(CachedSequenceLoader): def __init__( self, sequence_dir: Path, - version: str = "v1.0", + version: str = "v1.0-mini", verbose: bool = False, + point_cloud_range: PointCloudRange | None = DEFAULT_POINT_CLOUD_RANGE, ): super().__init__() self.dataset_dir = Path(sequence_dir) self.verbose = verbose assert self.dataset_dir.is_dir(), f"dataset_dir {sequence_dir} does not exist" - self.nusc = NuScenes(version=version, dataroot=sequence_dir, verbose=verbose) + self.nusc = NuScenesWithInstanceBoxes( + version=version, dataroot=sequence_dir, verbose=verbose + ) self.log_lookup: dict[str, NuscDict] = {e["token"]: e for e in self.nusc.scene} + self.point_cloud_range = point_cloud_range + if self.verbose: print(f"Loaded {len(self)} logs") @@ -278,18 +374,19 @@ def __len__(self): def get_sequence_ids(self) -> list: return sorted(self.log_lookup.keys()) - def __getitem__(self, idx: int) -> NuScenesSequence: + def __getitem__(self, idx: int) -> NuScenesRawSequence: seq_id = self.get_sequence_ids()[idx] return self.load_sequence(seq_id) - def _load_sequence_uncached(self, log_id: str) -> NuScenesSequence: + def _load_sequence_uncached(self, log_id: str) -> NuScenesRawSequence: assert log_id in self.log_lookup, f"log_id {log_id} is not in the {len(self.log_lookup)}" log_info_dict = self.log_lookup[log_id] - return NuScenesSequence( + return NuScenesRawSequence( self.nusc, log_id, log_info_dict, verbose=self.verbose, + point_cloud_range=self.point_cloud_range, ) def cache_folder_name(self) -> str: diff --git a/bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_scene_flow.py b/bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_scene_flow.py new file mode 100644 index 0000000..cdb8adf --- /dev/null +++ b/bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_scene_flow.py @@ -0,0 +1,299 @@ +from pathlib import Path + +import numpy as np + +from bucketed_scene_flow_eval.datasets.argoverse2.argoverse_scene_flow import ( + ArgoverseSceneFlowSequenceLoader, +) +from bucketed_scene_flow_eval.datasets.nuscenes.nuscenes_raw_data import ( + DEFAULT_POINT_CLOUD_RANGE, + NuscDict, + NuScenesRawSequence, + NuScenesWithInstanceBoxes, + PointCloudRange, +) +from bucketed_scene_flow_eval.datastructures import ( + EgoLidarFlow, + MaskArray, + PointCloud, + SemanticClassId, + SemanticClassIdArray, + SupervisedPointCloudFrame, + TimeSyncedAVLidarData, + TimeSyncedRawFrame, + TimeSyncedSceneFlowFrame, + VectorArray, +) +from bucketed_scene_flow_eval.interfaces import AbstractAVLidarSequence, CachedSequenceLoader +from bucketed_scene_flow_eval.utils.loaders import load_feather + +CATEGORY_MAP = { + -1: "background", + 0: "animal", + 1: "human.pedestrian.adult", + 2: "human.pedestrian.child", + 3: "human.pedestrian.construction_worker", + 4: "human.pedestrian.personal_mobility", + 5: "human.pedestrian.police_officer", + 6: "human.pedestrian.stroller", + 7: "human.pedestrian.wheelchair", + 8: "movable_object.barrier", + 9: "movable_object.debris", + 10: "movable_object.pushable_pullable", + 11: "movable_object.trafficcone", + 12: "static_object.bicycle_rack", + 13: "vehicle.bicycle", + 14: "vehicle.bus.bendy", + 15: "vehicle.bus.rigid", + 16: "vehicle.car", + 17: "vehicle.construction", + 18: "vehicle.emergency.ambulance", + 19: "vehicle.emergency.police", + 20: "vehicle.motorcycle", + 21: "vehicle.trailer", + 22: "vehicle.truck", +} + +CATEGORY_MAP_INV = {v: k for k, v in CATEGORY_MAP.items()} + + +class NuScenesSceneFlowSequence(NuScenesRawSequence, AbstractAVLidarSequence): + def __init__( + self, + nusc: NuScenesWithInstanceBoxes, + log_id: str, + scene_info: NuscDict, + flow_dir: Path, + with_rgb: bool = False, + with_classes: bool = False, + point_cloud_range: PointCloudRange | None = DEFAULT_POINT_CLOUD_RANGE, + ): + super().__init__( + nusc=nusc, + log_id=log_id, + scene_info=scene_info, + point_cloud_range=point_cloud_range, + ) + self.with_classes = with_classes + self.with_rgb = with_rgb + self.flow_dir = flow_dir + + @staticmethod + def get_class_str(class_id: SemanticClassId) -> str | None: + class_id_int = int(class_id) + if class_id_int not in CATEGORY_MAP: + return None + return CATEGORY_MAP[class_id_int] + + def _make_default_classes(self, pc: PointCloud) -> SemanticClassIdArray: + return np.ones(len(pc.points), dtype=SemanticClassId) * CATEGORY_MAP_INV["background"] + + def _load_flow_feather( + self, idx: int, classes_0: SemanticClassIdArray + ) -> tuple[VectorArray, MaskArray, SemanticClassIdArray]: + assert idx < len(self), f"idx {idx} out of range, len {len(self)} for {self.dataset_dir}" + # There is no flow information for the last pointcloud in the sequence. + + assert ( + idx != len(self) - 1 + ), f"idx {idx} is the last frame in the sequence, which has no flow data" + assert idx >= 0, f"idx {idx} is out of range" + flow_data_file = self.flow_dir / f"{idx}.feather" + flow_data = load_feather(flow_data_file, verbose=False) + is_valid_arr = flow_data["is_valid"].values + + # The flow data is stored as 3 1D arrays, one for each dimension. + xs = flow_data["flow_tx_m"].values + ys = flow_data["flow_ty_m"].values + zs = flow_data["flow_tz_m"].values + + flow_0_1 = np.stack([xs, ys, zs], axis=1) + + if self.with_classes: + classes_0 = flow_data["classes_0"].values + + return flow_0_1, is_valid_arr, classes_0 + + def _make_tssf_item( + self, raw_item: TimeSyncedRawFrame, classes_0: SemanticClassIdArray, flow: EgoLidarFlow + ) -> TimeSyncedSceneFlowFrame: + supervised_pc = SupervisedPointCloudFrame( + **vars(raw_item.pc), + full_pc_classes=classes_0, + ) + return TimeSyncedSceneFlowFrame( + pc=supervised_pc, + rgbs=raw_item.rgbs, + log_id=raw_item.log_id, + log_idx=raw_item.log_idx, + log_timestamp=raw_item.log_timestamp, + flow=flow, + ) + + def _load_no_flow( + self, raw_item: TimeSyncedRawFrame, metadata: TimeSyncedAVLidarData + ) -> tuple[TimeSyncedSceneFlowFrame, TimeSyncedAVLidarData]: + classes_0 = self._make_default_classes(raw_item.pc.pc) + flow = EgoLidarFlow.make_no_flow(len(classes_0)) + return self._make_tssf_item(raw_item, classes_0, flow), metadata + + def _load_with_flow( + self, + raw_item: TimeSyncedRawFrame, + metadata: TimeSyncedAVLidarData, + idx: int, + ) -> tuple[TimeSyncedSceneFlowFrame, TimeSyncedAVLidarData]: + ( + ego_flow_with_ground, + is_valid_flow_with_ground_arr, + classes_0_with_ground, + ) = self._load_flow_feather(idx, self._make_default_classes(raw_item.pc.pc)) + flow = EgoLidarFlow(full_flow=ego_flow_with_ground, mask=is_valid_flow_with_ground_arr) + return (self._make_tssf_item(raw_item, classes_0_with_ground, flow), metadata) + + def load( + self, idx: int, relative_to_idx: int, with_flow: bool = True + ) -> tuple[TimeSyncedSceneFlowFrame, TimeSyncedAVLidarData]: + assert idx < len(self), f"idx {idx} out of range, len {len(self)} for {self.dataset_dir}" + raw_item, metadata = super().load(idx, relative_to_idx) + + if with_flow: + return self._load_with_flow(raw_item, metadata, idx) + else: + return self._load_no_flow(raw_item, metadata) + + def load_frame_list( + self, relative_to_idx: int | None = 0 + ) -> list[tuple[TimeSyncedRawFrame, TimeSyncedAVLidarData]]: + return [ + self.load( + idx=idx, + relative_to_idx=(relative_to_idx if relative_to_idx is not None else idx), + with_flow=(idx != len(self) - 1), + ) + for idx in range(len(self)) + ] + + @staticmethod + def category_ids() -> list[int]: + return NuScenesSceneFlowSequenceLoader.category_ids() + + @staticmethod + def category_id_to_name(category_id: int) -> str: + return NuScenesSceneFlowSequenceLoader.category_id_to_name(category_id) + + @staticmethod + def category_name_to_id(category_name: str) -> int: + return NuScenesSceneFlowSequenceLoader.category_name_to_id(category_name) + + +class NuScenesSceneFlowSequenceLoader(ArgoverseSceneFlowSequenceLoader, CachedSequenceLoader): + def __init__( + self, + raw_data_path: Path | list[Path], + nuscenes_version: str = "v1.0-mini", + flow_data_path: Path | list[Path] | None = None, + use_gt_flow: bool = True, + with_rgb: bool = False, + log_subset: list[str] | None = None, + expected_camera_shape: tuple[int, int, int] = (1550, 2048, 3), + point_cloud_range: PointCloudRange | None = DEFAULT_POINT_CLOUD_RANGE, + ): + CachedSequenceLoader.__init__(self) + self.use_gt_flow = use_gt_flow + self.raw_data_path = self._sanitize_raw_data_path(raw_data_path) + self.with_rgb = with_rgb + self.expected_camera_shape = expected_camera_shape + self.point_cloud_range = point_cloud_range + self.nuscenes = NuScenesWithInstanceBoxes(nuscenes_version, raw_data_path) + self.sequence_id_to_raw_data: dict[str, NuscDict] = { + e["token"]: e for e in self.nuscenes.scene + } + self.sequence_id_lst: list[str] = sorted(self.sequence_id_to_raw_data.keys()) + self._setup_flow_data(use_gt_flow, flow_data_path) + self._subset_log(log_subset) + + def _load_sequence_uncached(self, sequence_id: str) -> NuScenesSceneFlowSequence: + assert ( + sequence_id in self.sequence_id_to_flow_data + ), f"sequence_id {sequence_id} does not exist" + return NuScenesSceneFlowSequence( + nusc=self.nuscenes, + log_id=sequence_id, + scene_info=self.sequence_id_to_raw_data[sequence_id], + flow_dir=self.sequence_id_to_flow_data[sequence_id], + with_rgb=self.with_rgb, + with_classes=self.use_gt_flow, + point_cloud_range=self.point_cloud_range, + ) + + @staticmethod + def category_ids() -> list[int]: + return list(CATEGORY_MAP.keys()) + + @staticmethod + def category_id_to_name(category_id: int) -> str: + return CATEGORY_MAP[category_id] + + @staticmethod + def category_name_to_id(category_name: str) -> int: + return {v: k for k, v in CATEGORY_MAP.items()}[category_name] + + def cache_folder_name(self) -> str: + return f"nuscenes_raw_data_with_rgb_{self.with_rgb}_use_gt_flow_{self.use_gt_flow}_raw_data_path_{self.raw_data_path}_flow_data_path_{self.flow_data_path}" + + +class NuScenesNoFlowSequence(NuScenesSceneFlowSequence): + def _prep_flow(self, flow_dir: Path): + pass + + def _load_flow_feather( + self, idx: int, classes_0: SemanticClassIdArray + ) -> tuple[VectorArray, MaskArray, SemanticClassIdArray]: + raise NotImplementedError("No flow data available for NuScenesNoFlowSequence") + + def load( + self, idx: int, relative_to_idx: int, with_flow: bool = True + ) -> tuple[TimeSyncedSceneFlowFrame, TimeSyncedAVLidarData]: + return super().load(idx, relative_to_idx, with_flow=False) + + +class NuScenesNoFlowSequenceLoader(NuScenesSceneFlowSequenceLoader): + def __init__( + self, + raw_data_path: Path | list[Path], + nuscenes_version: str = "v1.0-mini", + with_rgb: bool = False, + log_subset: list[str] | None = None, + expected_camera_shape: tuple[int, int, int] = (1550, 2048, 3), + point_cloud_range: PointCloudRange | None = DEFAULT_POINT_CLOUD_RANGE, + ): + CachedSequenceLoader.__init__(self) + self.use_gt_flow = False + self.raw_data_path = raw_data_path + self.with_rgb = with_rgb + self.expected_camera_shape = expected_camera_shape + self.point_cloud_range = point_cloud_range + self.nuscenes = NuScenesWithInstanceBoxes(nuscenes_version, raw_data_path) + self.sequence_id_to_raw_data: dict[str, NuscDict] = { + e["token"]: e for e in self.nuscenes.scene + } + self.sequence_id_lst: list[str] = sorted(self.sequence_id_to_raw_data.keys()) + self._subset_log(log_subset) + + def _load_sequence_uncached(self, sequence_id: str) -> NuScenesNoFlowSequence: + assert ( + sequence_id in self.sequence_id_to_raw_data + ), f"sequence_id {sequence_id} does not exist" + return NuScenesNoFlowSequence( + nusc=self.nuscenes, + log_id=sequence_id, + scene_info=self.sequence_id_to_raw_data[sequence_id], + flow_dir=self.sequence_id_to_raw_data[sequence_id], + with_rgb=self.with_rgb, + with_classes=False, + point_cloud_range=self.point_cloud_range, + ) + + def cache_folder_name(self) -> str: + return f"nuscenes_raw_data_with_rgb_{self.with_rgb}_use_gt_flow_{self.use_gt_flow}_raw_data_path_{self.raw_data_path}_No_flow_data_path" diff --git a/bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_utils.py b/bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_utils.py new file mode 100644 index 0000000..7955fd0 --- /dev/null +++ b/bucketed_scene_flow_eval/datasets/nuscenes/nuscenes_utils.py @@ -0,0 +1,204 @@ +"""This file contains utility functions helpful for using the NuScenes dataset.""" + +import numpy as np +from nuscenes.nuscenes import NuScenes +from nuscenes.utils.data_classes import Box +from pyquaternion import Quaternion + +from bucketed_scene_flow_eval.datastructures.se3 import SE3 + +class InstanceBox(Box): + def __init__( + self, + center: list[float], + size: list[float], + orientation: Quaternion, + label: int = np.nan, + score: float = np.nan, + velocity: tuple = (np.nan, np.nan, np.nan), + name: str = None, + token: str = None, + instance_token: str = None, + ): + super().__init__(center, size, orientation, label, score, velocity, name, token) + self.instance_token = instance_token + + def compute_interior_points( + self, points_xyz_m: np.ndarray[float], wlh_factor: float = 1.0 + ) -> tuple[np.ndarray[float], np.ndarray[bool]]: + """Given a query point cloud, filter to points interior to the cuboid, and provide mask. + + Note: comparison is to cuboid vertices in the destination reference frame. + + Args: + points_xyz_m: (N,3) Points to filter. + wlh_factor: Multiply w, l, h by a factor to scale the box. + + Returns: + The interior points and the boolean array indicating which points are interior. + """ + vertices_dst_xyz_m = self.corners(wlh_factor=wlh_factor).T + is_interior = compute_interior_points_mask(points_xyz_m, vertices_dst_xyz_m) + return points_xyz_m[is_interior], is_interior + + def transform(self, pose: SE3) -> None: + self.rotate(Quaternion(matrix=pose.rotation_matrix)) + self.translate(pose.translation) + return self + + @property + def dst_SE3_object(self) -> SE3: + return SE3(self.rotation_matrix, self.center) + + +class NuScenesWithInstanceBoxes(NuScenes): + """A wrapper class of the NuScenes DB object that provides extra functions. + + Most importantly allows boxes to carry over their instance tokens when they are interpolated using `get_boxes` from a sample data token. + """ + + def __init__( + self, + version: str = "v1.0-mini", + dataroot: str = "/data/sets/nuscenes", + verbose: bool = True, + map_resolution: float = 0.1, + ): + super().__init__(version, dataroot, verbose, map_resolution) + + def get_instance_box(self, sample_annotation_token: str) -> InstanceBox: + """ + Instantiates a Box class from a sample annotation record. + :param sample_annotation_token: Unique sample_annotation identifier. + """ + record = self.get("sample_annotation", sample_annotation_token) + return InstanceBox( + record["translation"], + record["size"], + Quaternion(record["rotation"]), + name=record["category_name"], + token=record["token"], + instance_token=record["instance_token"], + ) + + def get_boxes_with_instance_token(self, sample_data_token: str) -> list[InstanceBox]: + """ + This function is a copy of get_boxes() except that it adds the instance tokens to the box objects. + + Instantiates Boxes for all annotation for a particular sample_data record. If the sample_data is a + keyframe, this returns the annotations for that sample. But if the sample_data is an intermediate + sample_data, a linear interpolation is applied to estimate the location of the boxes at the time the + sample_data was captured. + :param sample_data_token: Unique sample_data identifier. + """ + + # Retrieve sensor & pose records + sd_record = self.get("sample_data", sample_data_token) + curr_sample_record = self.get("sample", sd_record["sample_token"]) + + if curr_sample_record["prev"] == "" or sd_record["is_key_frame"]: + # If no previous annotations available, or if sample_data is keyframe just return the current ones. + boxes = list(map(self.get_instance_box, curr_sample_record["anns"])) + + else: + prev_sample_record = self.get("sample", curr_sample_record["prev"]) + + curr_ann_recs = [ + self.get("sample_annotation", token) for token in curr_sample_record["anns"] + ] + prev_ann_recs = [ + self.get("sample_annotation", token) for token in prev_sample_record["anns"] + ] + + # Maps instance tokens to prev_ann records + prev_inst_map = {entry["instance_token"]: entry for entry in prev_ann_recs} + + t0 = prev_sample_record["timestamp"] + t1 = curr_sample_record["timestamp"] + t = sd_record["timestamp"] + + # There are rare situations where the timestamps in the DB are off so ensure that t0 < t < t1. + t = max(t0, min(t1, t)) + + boxes = [] + for curr_ann_rec in curr_ann_recs: + + if curr_ann_rec["instance_token"] in prev_inst_map: + # If the annotated instance existed in the previous frame, interpolate center & orientation. + prev_ann_rec = prev_inst_map[curr_ann_rec["instance_token"]] + + # Interpolate center. + center = [ + np.interp(t, [t0, t1], [c0, c1]) + for c0, c1 in zip(prev_ann_rec["translation"], curr_ann_rec["translation"]) + ] + + # Interpolate orientation. + rotation = Quaternion.slerp( + q0=Quaternion(prev_ann_rec["rotation"]), + q1=Quaternion(curr_ann_rec["rotation"]), + amount=(t - t0) / (t1 - t0), + ) + + box = InstanceBox( + center, + curr_ann_rec["size"], + rotation, + name=curr_ann_rec["category_name"], + token=curr_ann_rec["token"], + instance_token=curr_ann_rec["instance_token"], + ) + else: + # If not, simply grab the current annotation. + box = self.get_instance_box(curr_ann_rec["token"]) + + boxes.append(box) + return boxes + +def compute_interior_points_mask(points_xyz: np.ndarray[float], cuboid_vertices: np.ndarray[float]) -> np.ndarray[bool]: + """Compute the interior points mask for the cuboid. Copied from av2.geometry.geometry.compute_interior_points_mask. + + Reference: https://math.stackexchange.com/questions/1472049/check-if-a-point-is-inside-a-rectangular-shaped-area-3d + + 5------4 + |\\ |\\ + | \\ | \\ + 6--\\--7 \\ + \\ \\ \\ \\ + l \\ 1-------0 h + e \\ || \\ || e + n \\|| \\|| i + g \\2------3 g + t width. h + h. t. + + Args: + points_xyz: (N,3) Array representing a point cloud in Cartesian coordinates (x,y,z). + cuboid_vertices: (8,3) Array representing 3D cuboid vertices, ordered as shown above. + + Returns: + (N,) An array of boolean flags indicating whether the points are interior to the cuboid. + """ + # Get three corners of the cuboid vertices. + vertices: np.ndarray[float] = np.stack((cuboid_vertices[6], cuboid_vertices[3], cuboid_vertices[1])) # (3,3) + + # Choose reference vertex. + # vertices and choice of ref_vertex are coupled. + ref_vertex = cuboid_vertices[2] # (3,) + + # Compute orthogonal edges of the cuboid. + uvw = ref_vertex - vertices # (3,3) + + # Compute signed values which are proportional to the distance from the vector. + sim_uvw_points = points_xyz @ uvw.transpose() # (N,3) + sim_uvw_ref = uvw @ ref_vertex # (3,) + + # Only care about the diagonal. + sim_uvw_vertices: np.ndarray[float] = np.diag(uvw @ vertices.transpose()) # type: ignore # (3,) + + # Check 6 conditions (2 for each of the 3 orthogonal directions). + # Refer to the linked reference for additional information. + constraint_a = np.logical_and(sim_uvw_ref <= sim_uvw_points, sim_uvw_points <= sim_uvw_vertices) + constraint_b = np.logical_and(sim_uvw_ref >= sim_uvw_points, sim_uvw_points >= sim_uvw_vertices) + is_interior: np.ndarray[bool] = np.logical_or(constraint_a, constraint_b).all(axis=1) + return is_interior diff --git a/bucketed_scene_flow_eval/datasets/waymoopen/waymo_supervised_flow.py b/bucketed_scene_flow_eval/datasets/waymoopen/waymo_supervised_flow.py index 5435ef6..0a4805d 100644 --- a/bucketed_scene_flow_eval/datasets/waymoopen/waymo_supervised_flow.py +++ b/bucketed_scene_flow_eval/datasets/waymoopen/waymo_supervised_flow.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Any, Optional +from typing import Optional import numpy as np @@ -7,21 +7,15 @@ SE3, EgoLidarFlow, PointCloud, - PointCloudFrame, PoseInfo, - RGBFrame, RGBFrameLookup, SemanticClassId, SupervisedPointCloudFrame, TimeSyncedAVLidarData, - TimeSyncedBaseAuxilaryData, - TimeSyncedRawFrame, TimeSyncedSceneFlowFrame, ) from bucketed_scene_flow_eval.interfaces import ( AbstractAVLidarSequence, - AbstractSequence, - AbstractSequenceLoader, CachedSequenceLoader, ) from bucketed_scene_flow_eval.utils import load_pickle diff --git a/bucketed_scene_flow_eval/datastructures/dataclasses.py b/bucketed_scene_flow_eval/datastructures/dataclasses.py index 017b27a..bafe0c4 100644 --- a/bucketed_scene_flow_eval/datastructures/dataclasses.py +++ b/bucketed_scene_flow_eval/datastructures/dataclasses.py @@ -17,6 +17,11 @@ @dataclass class PoseInfo: + """Stores pose SE3 objects for a given frame. + + sensor_to_ego: the transformation from sensor frame to ego (vehicle) frame + ego_to_global: the transformation from ego frame to a consistent global frame, which is defined as the ego pose at a different timestep in the sequence + """ sensor_to_ego: SE3 ego_to_global: SE3 @@ -90,6 +95,13 @@ def mask_points(self, mask: MaskArray) -> "EgoLidarFlow": @dataclass class PointCloudFrame: + """A Point Cloud Frame. + + full_pc: the point cloud as captured in the sensors coordinate frame + pose: a PoseInfo object that specifies the transformations from sensor -> ego as well as ego -> global. + mask: a mask for validity in the point cloud, validity is determined by the dataloader and could be any of the following: + if the point is valid for the purpose of computing scene flow, if the point is ground or not ground, or any other criteria enforced by the dataloader + """ full_pc: PointCloud pose: PoseInfo mask: MaskArray @@ -99,19 +111,25 @@ def pc(self) -> PointCloud: return self.full_pc.mask_points(self.mask) @property - def full_global_pc(self) -> PointCloud: - pose = self.global_pose - return self.full_pc.transform(pose) + def full_ego_pc(self) -> PointCloud: + return self.full_pc.transform(self.pose.sensor_to_ego) @property - def global_pc(self) -> PointCloud: - pose = self.global_pose - return self.pc.transform(pose) + def ego_pc(self) -> PointCloud: + return self.pc.transform(self.pose.sensor_to_ego) @property def global_pose(self) -> SE3: return self.pose.sensor_to_global + @property + def full_global_pc(self) -> PointCloud: + return self.full_pc.transform(self.global_pose) + + @property + def global_pc(self) -> PointCloud: + return self.pc.transform(self.global_pose) + def mask_points(self, mask: MaskArray) -> "PointCloudFrame": assert isinstance(mask, np.ndarray), f"mask must be an ndarray, got {type(mask)}" assert mask.ndim == 1, f"mask must be a 1D array, got {mask.ndim}" diff --git a/data_prep_scripts/nuscenes/create_gt_flow.py b/data_prep_scripts/nuscenes/create_gt_flow.py new file mode 100644 index 0000000..7ed4ef5 --- /dev/null +++ b/data_prep_scripts/nuscenes/create_gt_flow.py @@ -0,0 +1,195 @@ +import os + +os.environ["OMP_NUM_THREADS"] = "1" + +import multiprocessing +from argparse import ArgumentParser +from multiprocessing import Pool, current_process +from pathlib import Path +from typing import Optional + +import numpy as np +import pandas as pd +from tqdm import tqdm + +from bucketed_scene_flow_eval.datastructures.dataclasses import PoseInfo +from bucketed_scene_flow_eval.datastructures.pointcloud import PointCloud +from bucketed_scene_flow_eval.datasets.nuscenes.nuscenes_scene_flow import ( + CATEGORY_MAP_INV, +) +from bucketed_scene_flow_eval.utils.loaders import save_feather +from bucketed_scene_flow_eval.datasets.nuscenes import NuScenesRawSequenceLoader +from bucketed_scene_flow_eval.datasets.nuscenes.nuscenes_utils import InstanceBox + +def compute_sceneflow( + dataset: NuScenesRawSequenceLoader, log_id: str, timestamps: tuple[int, int] +) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + """Compute sceneflow between the sweeps at the given timestamps. + Args: + dataset: Sensor dataset. + log_id: unique id. + timestamps: the timestamps of the lidar sweeps to compute flow between + Returns: + dictionary with fields: + pcl_0: Nx3 array containing the points at time 0 + pcl_1: Mx3 array containing the points at time 1 + flow_0_1: Nx3 array containing flow from timestamp 0 to 1 + classes_0: Nx1 array containing the class ids for each point in sweep 0 + valid_0: Nx1 array indicating if the returned flow from 0 to 1 is valid (1 for valid, 0 otherwise) + ego_motion: SE3 motion from sweep 0 to sweep 1 + """ + + def compute_flow(sweeps, cuboids: list[list[InstanceBox]], poses: list[PoseInfo]): + ego1_SE3_ego0 = poses[1].ego_to_global.inverse().compose(poses[0].ego_to_global) + + flow_0_1 = np.zeros_like(sweeps[0].points, dtype=np.float32) + + valid_0 = np.ones(len(sweeps[0].points), dtype=bool) + classes_0 = np.ones(len(sweeps[0].points), dtype=np.int8) * CATEGORY_MAP_INV["background"] + + c1_instance_tokens = {c.instance_token: i for i, c in enumerate(cuboids[1])} + + for c0 in cuboids[0]: + c0.wlh += np.array([0.2, 0.2, 0.0]) # the bounding boxes are a little too tight, so some points are missed, expand width and length by 0.2m + obj_pts, obj_mask = c0.compute_interior_points(sweeps[0].points) + classes_0[obj_mask] = CATEGORY_MAP_INV[c0.name] + + if c0.instance_token in c1_instance_tokens: + c1 = cuboids[1][c1_instance_tokens[c0.instance_token]] + c1_SE3_c0_ego_frame = ego1_SE3_ego0.inverse().compose( + c1.dst_SE3_object.compose(c0.dst_SE3_object.inverse()) + ) + obj_flow = c1_SE3_c0_ego_frame.transform_points(obj_pts) - obj_pts + flow_0_1[obj_mask] = obj_flow.astype(np.float32) + else: + valid_0[obj_mask] = 0 + + # Convert flow from ego -> sensor frame for storage + flow_0_1 = PointCloud(flow_0_1) + flow_0_1_sensor = poses[0].sensor_to_ego.inverse().transform_flow(flow_0_1) + + return flow_0_1_sensor, classes_0, valid_0, ego1_SE3_ego0 + + sequence = dataset._load_sequence_uncached(log_id) + + pc_objects = [sequence.load(ts, 0)[0].pc for ts in timestamps] + + # Sweeps are stored in sensor frame so we must transform to ego frame + sweeps = [pc_obj.full_ego_pc for pc_obj in pc_objects] + + # Ego to map poses, used for computing ego motion + poses = [sequence._load_pose(ts) for ts in timestamps] + + # Cuboids are fetched in global frame initially + lidar_sensor_tokens = [sequence.synced_sensors[ts].lidar_top['token'] for ts in timestamps] + cuboids = [sequence.nusc.get_boxes_with_instance_token(lidar_token) for lidar_token in lidar_sensor_tokens] + # Here we convert cuboids from global frame to ego frame + for cuboid_list, pose in zip(cuboids, poses): + for c in cuboid_list: + c.transform(pose.ego_to_global.inverse()) + + flow_0_1, classes_0, valid_0, _ = compute_flow(sweeps, cuboids, poses) + return flow_0_1, classes_0, valid_0 + + +def process_log( + dataset: NuScenesRawSequenceLoader, log_id: str, output_dir: Path, n: Optional[int] = None +): + """Outputs sceneflow and auxillary information for each pair of pointclouds in the + dataset. Output files have the format /_.npz + Args: + dataset: Sensor dataset to process. + log_id: Log unique id. + output_dir: Output_directory. + n: the position to use for the progress bar + Returns: + None + """ + timestamps = range(len(dataset._load_sequence_uncached(log_id))) + + iter_bar = zip(timestamps, timestamps[1:]) + if n is not None: + iter_bar = tqdm( + iter_bar, + leave=False, + total=len(timestamps) - 1, + position=n, + desc=f"Log {log_id}", + ) + + for ts0, ts1 in iter_bar: + flow_0_1, classes_0, valid_0 = compute_sceneflow(dataset, log_id, (ts0, ts1)) + df = pd.DataFrame( + { + "flow_tx_m": flow_0_1[:, 0], + "flow_ty_m": flow_0_1[:, 1], + "flow_tz_m": flow_0_1[:, 2], + "is_valid": valid_0, + "classes_0": classes_0, + } + ) + save_feather(output_dir / log_id / f"{ts0}.feather", df, verbose=False) + + +def process_log_wrapper(x, ignore_current_process=False): + if not ignore_current_process: + current = current_process() + pos = current._identity[0] + else: + pos = 1 + process_log(*x, n=pos) + + +def process_logs(data_dir: Path, nusc_ver: str, output_dir: Path, nproc: int): + """Compute sceneflow for all logs in the dataset. Logs are processed in parallel. + Args: + data_dir: NuScenes directory + output_dir: Output directory. + """ + + if not data_dir.exists(): + print(f"{data_dir} not found") + return + + split_output_dir = output_dir + split_output_dir.mkdir(exist_ok=True, parents=True) + + dataset = NuScenesRawSequenceLoader(version=nusc_ver, sequence_dir=str(data_dir), point_cloud_range=None) + logs = dataset.get_sequence_ids() + args = sorted([(dataset, log, split_output_dir) for log in logs]) + + print(f"Using {nproc} processes") + if nproc <= 1: + for x in tqdm(args): + process_log_wrapper(x, ignore_current_process=True) + else: + with Pool(processes=nproc) as p: + res = list(tqdm(p.imap_unordered(process_log_wrapper, args), total=len(logs))) + + +if __name__ == "__main__": + multiprocessing.set_start_method("spawn") + parser = ArgumentParser( + prog="create", + description="Create a LiDAR sceneflow dataset from NuScenes", + ) + parser.add_argument( + "--data_dir", + type=str, + help="The top level directory contating the input dataset", + ) + parser.add_argument( + "--nusc_ver", + type=str, + help="The version of nuscenes to use.", + ) + parser.add_argument( + "--output_dir", type=str, help="The location to output the sceneflow files to" + ) + parser.add_argument("--nproc", type=int, default=(multiprocessing.cpu_count() - 1)) + + args = parser.parse_args() + data_root = Path(args.data_dir) + output_dir = Path(args.output_dir) + + process_logs(data_root, args.nusc_ver, output_dir, args.nproc) diff --git a/data_prep_scripts/nuscenes/visualize_nuscenes.py b/data_prep_scripts/nuscenes/visualize_nuscenes.py new file mode 100644 index 0000000..d50e0b6 --- /dev/null +++ b/data_prep_scripts/nuscenes/visualize_nuscenes.py @@ -0,0 +1,171 @@ +from bucketed_scene_flow_eval.datasets.nuscenes import NuScenesRawSequenceLoader +import numpy as np +import open3d as o3d +from pathlib import Path + +from bucketed_scene_flow_eval.datasets.nuscenes.nuscenes_metacategories import BUCKETED_METACATAGORIES +from bucketed_scene_flow_eval.utils.loaders import load_feather + +raw_sequence_loader = NuScenesRawSequenceLoader(version='v1.0-mini', sequence_dir="/efs/nuscenes_mini") +sequence = raw_sequence_loader[0] + +starter_idx = 0 +timestamps = range(len(sequence)) + +def increase_starter_idx(vis): + global starter_idx + starter_idx += 1 + if starter_idx >= len(timestamps) - 1: + starter_idx = 0 + # print("Index: ", starter_idx) + vis.clear_geometries() + draw_frames(vis, reset_view=False) + + +def decrease_starter_idx(vis): + global starter_idx + starter_idx -= 1 + if starter_idx < 0: + starter_idx = len(timestamps) - 2 + # print("Index: ", starter_idx) + vis.clear_geometries() + draw_frames(vis, reset_view=False) + + +def setup_vis(): + # # make open3d visualizer + vis = o3d.visualization.VisualizerWithKeyCallback() + vis.create_window() + vis.get_render_option().point_size = 1.5 + vis.get_render_option().background_color = (0.1, 0.1, 0.1) + # vis.get_render_option().show_coordinate_frame = True + # set up vector + vis.get_view_control().set_up([0, 0, 1]) + # left arrow decrease starter_idx + vis.register_key_callback(263, decrease_starter_idx) + # right arrow increase starter_idx + vis.register_key_callback(262, increase_starter_idx) + + return vis + +def _colorize_pc(pc: o3d.geometry.PointCloud, color_tuple: tuple[float, float, float]): + pc_color = np.ones_like(pc.points) * np.array(color_tuple) + return o3d.utility.Vector3dVector(pc_color) + +def draw_frames_cuboids(vis, reset_view=False): + ts = timestamps[starter_idx] + pc_object = sequence.load(ts, 0)[0].pc + lidar_pc = pc_object.full_ego_pc + pose = pc_object.pose.ego_to_global + lidar_sensor_token = sequence.synced_sensors[ts].lidar_top['token'] + cuboids = sequence.nusc.get_boxes_with_instance_token(lidar_sensor_token) + # Add base point cloud + pcd = lidar_pc.to_o3d() + pcd.colors = _colorize_pc(pcd, (1, 1, 1)) + vis.add_geometry(pcd, reset_bounding_box=reset_view) + # # Draw the cuboids + cuboids = [c.transform(pose.inverse()) for c in cuboids] + draw_cuboids(vis, cuboids) + +def draw_frames(vis, reset_view=False): + ts = timestamps[starter_idx:starter_idx+2] + color_list = [(0, 0, 1), (0, 1, 0)] + pc_objects = [sequence.load(t, 0)[0].pc for t in ts] + + lidar_pc = [pc_obj.full_ego_pc for pc_obj in pc_objects] + + groundish_points_mask = lidar_pc[0].within_region_mask(-1000, 1000, -1000, 1000, -2, 0.5) + groundish_points = lidar_pc[0].mask_points(groundish_points_mask) + actual_ground_from_groundish_mask = np.zeros((groundish_points.shape[0]), dtype=bool) + + o3d.utility.random.seed(42) + _, inliers0 = groundish_points.to_o3d().segment_plane(distance_threshold=0.2, ransac_n=3, num_iterations=100, probability=1.0) + actual_ground_from_groundish_mask[inliers0] = 1 + + ground_mask = np.zeros_like(groundish_points_mask, dtype=bool) + ground_mask[groundish_points_mask] = actual_ground_from_groundish_mask + + final_rendered_pc = lidar_pc[0].to_o3d() + final_pc_color = np.tile([0, 0, 1], (ground_mask.shape[0], 1)) + final_pc_color[ground_mask] = [0, 1, 1] + final_rendered_pc.colors = o3d.utility.Vector3dVector(final_pc_color) + + vis.add_geometry(final_rendered_pc, reset_bounding_box=reset_view) + + # flow_data = load_feather(Path(f"/efs/nuscenes_mini_sceneflow_feather/{sequence.log_id}/{ts[0]}.feather")) + flow_data = load_feather(Path(f"/efs/nuscenes_mini_nsfp_flow/sequence_len_002/{sequence.log_id}/{ts[0]:010d}.feather")) + # flow_data = load_feather(Path(f"/efs/nuscenes_mini_fast_nsf_flow/sequence_len_002/{sequence.log_id}/{ts[0]:010d}.feather")) + # is_valid_arr = flow_data["is_valid"].values + + # The flow data is stored as 3 1D arrays, one for each dimension. + xs = flow_data["flow_tx_m"].values + ys = flow_data["flow_ty_m"].values + zs = flow_data["flow_tz_m"].values + flow_0_1 = np.stack([xs, ys, zs], axis=1) + # ego_frame_flow = flow_0_1 + ego_frame_flow = pc_objects[0].pose.sensor_to_ego.transform_flow(flow_0_1) + + pc = lidar_pc[0].to_array() + flowed_pc = pc + ego_frame_flow + + line_set = o3d.geometry.LineSet() + line_set_points = np.concatenate([pc, flowed_pc], axis=0) + lines = np.array([[i, i + len(pc)] for i in range(len(pc))]) + line_set.points = o3d.utility.Vector3dVector(line_set_points) + line_set.lines = o3d.utility.Vector2iVector(lines) + draw_color = (1, 0, 0) + line_set.colors = o3d.utility.Vector3dVector( + [draw_color for _ in range(len(lines))] + ) + vis.add_geometry(line_set, reset_bounding_box=reset_view) + + +def draw_cuboids(vis, box_list, reset_view=False): + # Our lines span from points 0 to 1, 1 to 2, 2 to 3, etc... + lines = [ + [0, 1], + [1, 2], + [2, 3], + [0, 3], + [4, 5], + [5, 6], + [6, 7], + [4, 7], + [0, 4], + [1, 5], + [2, 6], + [3, 7], + ] + + # Use the same color for all lines + red = [[1, 0, 0] for _ in range(len(lines))] + green = [[0, 1, 0] for _ in range(len(lines))] + blue = [[0, 0.8, 0.8] for _ in range(len(lines))] + magenta = [[1, 0, 1] for _ in range(len(lines))] + + for bbox in box_list: + corner_box = bbox.corners().T + + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(corner_box) + line_set.lines = o3d.utility.Vector2iVector(lines) + + if bbox.name in BUCKETED_METACATAGORIES["PEDESTRIAN"]: + colors = red + elif bbox.name in BUCKETED_METACATAGORIES["CAR"]: + colors = blue + elif bbox.name in BUCKETED_METACATAGORIES["WHEELED_VRU"]: + colors = green + elif bbox.name in BUCKETED_METACATAGORIES["OTHER_VEHICLES"]: + colors = magenta + else: # Background/static + colors = [[1, 1, 0] for _ in range(len(lines))] + line_set.colors = o3d.utility.Vector3dVector(colors) + + # Display the bounding boxes: + vis.add_geometry(line_set, reset_bounding_box=reset_view) + +if __name__ == "__main__": + vis = setup_vis() + draw_frames(vis, reset_view=True) + vis.run() diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md index 66c35cd..161625b 100644 --- a/docs/GETTING_STARTED.md +++ b/docs/GETTING_STARTED.md @@ -23,7 +23,7 @@ To generate these supervision labels, use the generation script in `data_prep_sc ### Argoverse 2 NSFP Pseudolabels (New!) -We provide the Argoverse 2 NSFP Pseudolabels for the _Sensor_ split in the S3 bucket +We provide the Argoverse 2 NSFP Pseudolabels for the _Sensor_ split in the S3 bucket ``` s3://argoverse/assets/av2/scene_flow/sensor/ @@ -66,3 +66,7 @@ We have also provided a directly downloadable [tiny demo dataset](https://github - `training`: a single frame pair of waymo data - `train_nsfp_flow`: the flow labels for the framepair + +### NuScenes + +TODO \ No newline at end of file diff --git a/scripts/demo_3d.py b/scripts/demo_3d.py index d7b3541..785cd45 100644 --- a/scripts/demo_3d.py +++ b/scripts/demo_3d.py @@ -40,9 +40,9 @@ def visualize_lidar_3d(frame_list: list[TimeSyncedSceneFlowFrame], downscale_rgb if __name__ == "__main__": # Take arguments to specify dataset and root directory parser = argparse.ArgumentParser() - parser.add_argument("--dataset", type=str, default="Argoverse2CausalSceneFlow") - parser.add_argument("--root_dir", type=Path, default="/efs/argoverse2/val") - parser.add_argument("--flow_dir", type=Path, default="/efs/argoverse2/val_sceneflow_feather/") + parser.add_argument("--dataset", type=str, default="NuScenesCausalSceneFlow") + parser.add_argument("--root_dir", type=Path, default="/efs/nuscenes_mini/") + parser.add_argument("--flow_dir", type=Path, default="/efs/nuscenes_mini_sceneflow_feather/") parser.add_argument("--with_rgb", action="store_true") parser.add_argument("--no_ground", action="store_true") parser.add_argument("--sequence_length", type=int, default=2) diff --git a/tests/datasets/nuscenes/nuscenes_tests.py b/tests/datasets/nuscenes/nuscenes_tests.py index e76aa2b..bb6988c 100644 --- a/tests/datasets/nuscenes/nuscenes_tests.py +++ b/tests/datasets/nuscenes/nuscenes_tests.py @@ -1,23 +1,20 @@ from pathlib import Path -from typing import Optional -import numpy as np import pytest -import tqdm -from bucketed_scene_flow_eval.datasets.nuscenes import NuScenesLoader +from bucketed_scene_flow_eval.datasets.nuscenes import NuScenesRawSequenceLoader @pytest.fixture -def nuscenes_loader() -> NuScenesLoader: - return NuScenesLoader( +def nuscenes_loader() -> NuScenesRawSequenceLoader: + return NuScenesRawSequenceLoader( sequence_dir=Path("/tmp/nuscenes"), version="v1.0-mini", verbose=False, ) -def test_nuscenes_loader_basic_load_and_len_check(nuscenes_loader: NuScenesLoader): +def test_nuscenes_loader_basic_load_and_len_check(nuscenes_loader: NuScenesRawSequenceLoader): assert len(nuscenes_loader) > 0, f"no sequences found in {nuscenes_loader}" expected_lens = [236, 239, 236, 236, 233, 223, 239, 231, 231, 228] assert len(nuscenes_loader) == len( @@ -35,11 +32,3 @@ def test_nuscenes_loader_basic_load_and_len_check(nuscenes_loader: NuScenesLoade assert num_loop_iterations == len( expected_lens ), f"expected {len(expected_lens)} loop iterations, got {num_loop_iterations}" - - -def test_nuscenes_first_sequence_investigation(nuscenes_loader: NuScenesLoader): - sequence_id = nuscenes_loader.get_sequence_ids()[0] - nusc_seq = nuscenes_loader.load_sequence(sequence_id) - assert len(nusc_seq) > 0, f"no frames found in {sequence_id}" - - first_frame = nusc_seq.load(0, 0)