-
Notifications
You must be signed in to change notification settings - Fork 524
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added updates for HM3D ObjectNav training #818
base: main
Are you sure you want to change the base?
Changes from all commits
c04d8f3
8e11f2d
619845b
6a13829
9638fa7
0c4a22d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
DATASET: | ||
TYPE: ObjectNav-v1 | ||
SPLIT: train | ||
DATA_PATH: data/datasets/objectnav/hm3d/v1/{split}/{split}.json.gz |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
ENVIRONMENT: | ||
MAX_EPISODE_STEPS: 500 | ||
|
||
SIMULATOR: | ||
TURN_ANGLE: 30 | ||
TILT_ANGLE: 30 | ||
ACTION_SPACE_CONFIG: "v1" | ||
AGENT_0: | ||
SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR'] | ||
HEIGHT: 0.88 | ||
RADIUS: 0.18 | ||
HABITAT_SIM_V0: | ||
GPU_DEVICE_ID: 0 | ||
ALLOW_SLIDING: False | ||
SEMANTIC_SENSOR: | ||
WIDTH: 640 | ||
HEIGHT: 480 | ||
HFOV: 79 | ||
POSITION: [0, 0.88, 0] | ||
RGB_SENSOR: | ||
WIDTH: 640 | ||
HEIGHT: 480 | ||
HFOV: 79 | ||
POSITION: [0, 0.88, 0] | ||
DEPTH_SENSOR: | ||
WIDTH: 640 | ||
HEIGHT: 480 | ||
HFOV: 79 | ||
MIN_DEPTH: 0.5 | ||
MAX_DEPTH: 5.0 | ||
POSITION: [0, 0.88, 0] | ||
TASK: | ||
TYPE: ObjectNav-v1 | ||
POSSIBLE_ACTIONS: ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT", "LOOK_UP", "LOOK_DOWN"] | ||
|
||
SENSORS: ['OBJECTGOAL_SENSOR', 'COMPASS_SENSOR', 'GPS_SENSOR'] | ||
GOAL_SENSOR_UUID: objectgoal | ||
SEMANTIC_CATEGORY_SENSOR: | ||
WIDTH: 640 | ||
HEIGHT: 480 | ||
DATASET: "hm3d" | ||
CONVERT_TO_RGB: True | ||
RAW_NAME_TO_CATEGORY_MAPPING: "data/matterport_semantics/matterport_category_mappings.tsv" | ||
|
||
MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL', 'SOFT_SPL'] | ||
|
||
DISTANCE_TO_GOAL: | ||
DISTANCE_TO: VIEW_POINTS | ||
SUCCESS: | ||
SUCCESS_DISTANCE: 0.1 | ||
|
||
DATASET: | ||
TYPE: ObjectNav-v1 | ||
SPLIT: train | ||
DATA_PATH: "data/datasets/objectnav/hm3d/v1/{split}/{split}.json.gz" | ||
SCENES_DIR: "data/scene_datasets/" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
ENVIRONMENT: | ||
MAX_EPISODE_STEPS: 500 | ||
SIMULATOR: | ||
AGENT_0: | ||
SENSORS: ['RGB_SENSOR'] | ||
HABITAT_SIM_V0: | ||
GPU_DEVICE_ID: 0 | ||
RGB_SENSOR: | ||
WIDTH: 256 | ||
HEIGHT: 256 | ||
DEPTH_SENSOR: | ||
WIDTH: 256 | ||
HEIGHT: 256 | ||
TASK: | ||
TYPE: Nav-v0 | ||
SUCCESS_DISTANCE: 0.2 | ||
|
||
SENSORS: ['POINTGOAL_WITH_GPS_COMPASS_SENSOR'] | ||
POINTGOAL_WITH_GPS_COMPASS_SENSOR: | ||
GOAL_FORMAT: "POLAR" | ||
DIMENSIONALITY: 2 | ||
GOAL_SENSOR_UUID: pointgoal_with_gps_compass | ||
|
||
MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL'] | ||
SUCCESS: | ||
SUCCESS_DISTANCE: 0.2 | ||
DATASET: | ||
TYPE: PointNav-v1 | ||
SPLIT: train | ||
DATA_PATH: data/datasets/pointnav/hm3d/v1/{split}/{split}.json.gz |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,11 +34,18 @@ | |
from habitat.core.utils import not_none_validator, try_cv2_import | ||
from habitat.sims.habitat_simulator.actions import HabitatSimActions | ||
from habitat.tasks.utils import cartesian_to_polar | ||
from habitat.tasks.nav.semantic_constants import ( | ||
GIBSON_CATEGORY_TO_TASK_CATEGORY_ID, | ||
MP3D_CATEGORY_TO_TASK_CATEGORY_ID, | ||
HM3D_CATEGORY_TO_TASK_CATEGORY_ID, | ||
) | ||
from habitat.utils.geometry_utils import ( | ||
quaternion_from_coeff, | ||
quaternion_rotate_vector, | ||
) | ||
from habitat.utils.visualizations import fog_of_war, maps | ||
from habitat_sim.utils.common import d3_40_colors_rgb | ||
from PIL import Image | ||
|
||
try: | ||
from habitat.sims.habitat_simulator.habitat_simulator import HabitatSim | ||
|
@@ -514,6 +521,118 @@ def get_observation( | |
) | ||
|
||
|
||
@registry.register_sensor(name="SemanticCategorySensor") | ||
class SemanticCategorySensor(Sensor): | ||
r"""Lists the object categories for each pixel location. | ||
Args: | ||
sim: reference to the simulator for calculating task observations. | ||
""" | ||
cls_uuid: str = "semantic_category" | ||
|
||
def __init__( | ||
self, sim: Simulator, config: Config, *args: Any, **kwargs: Any | ||
): | ||
self._sim = sim | ||
self._current_episode_id = None | ||
self.mapping = None | ||
self.category_to_task_category_id = None | ||
self.instance_id_to_task_id = None | ||
self._initialize_category_mappings(config) | ||
|
||
super().__init__(config=config) | ||
|
||
def _get_uuid(self, *args: Any, **kwargs: Any): | ||
return self.cls_uuid | ||
|
||
def _initialize_category_mappings(self, config): | ||
assert config.DATASET in ["gibson", "mp3d", "hm3d"] | ||
if config.DATASET == "gibson": | ||
cat_mapping = GIBSON_CATEGORY_TO_TASK_CATEGORY_ID | ||
elif config.DATASET == "mp3d": | ||
cat_mapping = MP3D_CATEGORY_TO_TASK_CATEGORY_ID | ||
else: | ||
cat_mapping = HM3D_CATEGORY_TO_TASK_CATEGORY_ID | ||
self.category_to_task_category_id = cat_mapping | ||
if config.RAW_NAME_TO_CATEGORY_MAPPING != "": | ||
with open(config.RAW_NAME_TO_CATEGORY_MAPPING, "r") as fp: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we leave this code it should use some csv reader or etc. |
||
lines = fp.readlines() | ||
lines = lines[1:] | ||
lines = [l.strip().split(" ") for l in lines] | ||
self.raw_to_cat_mapping = {} | ||
for l in lines: | ||
raw_name = l[1] | ||
cat_name = l[-1] | ||
if cat_name in cat_mapping: | ||
self.raw_to_cat_mapping[raw_name] = cat_name | ||
else: | ||
self.raw_to_cat_mapping = {k: k for k in cat_mapping.keys()} | ||
|
||
def _get_sensor_type(self, *args: Any, **kwargs: Any): | ||
return SensorTypes.COLOR | ||
|
||
def _get_observation_space(self, *args: Any, **kwargs: Any): | ||
if self.config.CONVERT_TO_RGB: | ||
observation_space = spaces.Box( | ||
low=0, | ||
high=255, | ||
shape=(self.config.HEIGHT, self.config.WIDTH, 3), | ||
dtype=np.uint8, | ||
) | ||
else: | ||
observation_space = spaces.Box( | ||
low=np.iinfo(np.int32).min, | ||
high=np.iinfo(np.int32).max, | ||
shape=(self.config.HEIGHT, self.config.WIDTH), | ||
dtype=np.int32, | ||
) | ||
return observation_space | ||
|
||
def get_observation( | ||
self, *args: Any, observations, episode, **kwargs: Any | ||
): | ||
episode_uniq_id = f"{episode.scene_id} {episode.episode_id}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The semantic sensor oservation here could be a Tensor right, which would create an unnecessary CPU <-> GPU. I think you can easily modify this to operator on both NpArrays and Torch.Tensor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is caching. |
||
if self._current_episode_id != episode_uniq_id: | ||
self._current_episode_id = episode_uniq_id | ||
# Get mapping from instance id to task id | ||
scene = self._sim.semantic_annotations() | ||
self.instance_id_to_task_id = np.ones( | ||
(len(scene.objects), ), dtype=np.int64 | ||
) * -1 # Non-task objects are set to -1 | ||
for obj in scene.objects: | ||
if obj is None: | ||
continue | ||
obj_inst_id = int(obj.id.split("_")[-1]) | ||
obj_name = obj.category.name() | ||
if obj_name in self.raw_to_cat_mapping: | ||
obj_name = self.raw_to_cat_mapping[obj_name] | ||
obj_task_id = self.category_to_task_category_id[obj_name] | ||
self.instance_id_to_task_id[obj_inst_id] = obj_task_id | ||
# Set invalid instance IDs to unknown object 0 | ||
semantic = np.copy(observations["semantic"]) | ||
semantic[semantic >= self.instance_id_to_task_id.shape[0]] = 0 | ||
# Map from instance id to task id | ||
semantic_category = np.take(self.instance_id_to_task_id, semantic) | ||
if self.config.CONVERT_TO_RGB: | ||
semantic_category = self.convert_semantic_to_rgb(semantic_category) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. RGB conversion should live in
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This sensor is not for visualization purposes. It is intended to be a model input for training with oracle semantics. So the logic should live within the sensor itself, right? |
||
|
||
return semantic_category | ||
|
||
def convert_semantic_to_rgb(self, x): | ||
max_valid_id = max(self.category_to_task_category_id.values()) | ||
assert max_valid_id < 39 | ||
# Map invalid values (-1) to max_valid_id + 1 | ||
invalid_locs = x == -1 | ||
x[x == -1] = max_valid_id + 1 | ||
# Get RGB image | ||
semantic_img = Image.new("P", (x.shape[1], x.shape[0])) | ||
semantic_img.putpalette(d3_40_colors_rgb.flatten()) | ||
semantic_img.putdata((x.flatten() % 40).astype(np.uint8)) | ||
semantic_img = np.array(semantic_img.convert("RGB")) | ||
# Set pixels for invalid objects to (0, 0, 0) | ||
semantic_img[invalid_locs, :] = np.array([0, 0, 0]) | ||
return semantic_img | ||
|
||
|
||
@registry.register_measure | ||
class Success(Measure): | ||
r"""Whether or not the agent succeeded at its task | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
# This source code is licensed under the MIT license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
|
||
GIBSON_CATEGORY_TO_TASK_CATEGORY_ID = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This file should be somewhere: |
||
'chair': 0, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on "Set invalid instance IDs to unknown object 0" you use |
||
'dining table': 1, | ||
'book': 2, | ||
'vase': 3, | ||
'bottle': 4, | ||
'couch': 5, | ||
'bed': 6, | ||
'refrigerator': 7, | ||
'potted plant': 8, | ||
'sink': 9, | ||
'toilet': 10, | ||
'clock': 11, | ||
'towel': 12, | ||
'tv': 13, | ||
'oven': 14, | ||
'cup': 15, | ||
'umbrella': 16, | ||
'bowl': 17, | ||
'gym_equipment': 18, | ||
'bench': 19, | ||
'clothes': 20 | ||
} | ||
|
||
|
||
MP3D_CATEGORY_TO_TASK_CATEGORY_ID = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is different than https://github.com/niessner/Matterport/blob/master/metadata/mpcat40.tsv which will likely lead to confusion. |
||
'chair': 0, | ||
'table': 1, | ||
'picture': 2, | ||
'cabinet': 3, | ||
'cushion': 4, | ||
'sofa': 5, | ||
'bed': 6, | ||
'chest_of_drawers': 7, | ||
'plant': 8, | ||
'sink': 9, | ||
'toilet': 10, | ||
'stool': 11, | ||
'towel': 12, | ||
'tv_monitor': 13, | ||
'shower': 14, | ||
'bathtub': 15, | ||
'counter': 16, | ||
'fireplace': 17, | ||
'gym_equipment': 18, | ||
'seating': 19, | ||
'clothes': 20 | ||
} | ||
|
||
|
||
HM3D_CATEGORY_TO_TASK_CATEGORY_ID = { | ||
'chair': 0, | ||
'bed': 1, | ||
'plant': 2, | ||
'toilet': 3, | ||
'tv_monitor': 4, | ||
'sofa': 5, | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we use this sensor for training or only for visualization purposes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This sensor is used for training models with GT semantic goal inputs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@srama2512, yes. But that GT data isn't available during evaluation and from this PR isn't clear how it will be replaced.