From e94550798d8b5572af7b527c0b2587c1321f798e Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Thu, 21 Apr 2022 16:39:54 -0700 Subject: [PATCH 01/53] functional training with basic/process-testing motion noise --- ithor_arm/ithor_arm_environment.py | 53 +++++++- ...rd_no_pu_w_noise_0_4_agent_motion_noise.py | 125 ++++++++++++++++++ 2 files changed, 175 insertions(+), 3 deletions(-) create mode 100644 manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise.py diff --git a/ithor_arm/ithor_arm_environment.py b/ithor_arm/ithor_arm_environment.py index 0fdfcc4..9d626bb 100644 --- a/ithor_arm/ithor_arm_environment.py +++ b/ithor_arm/ithor_arm_environment.py @@ -7,6 +7,7 @@ from typing import Tuple, Dict, List, Set, Union, Any, Optional import ai2thor.server +from cv2 import USAGE_DEFAULT import numpy as np from ai2thor.controller import Controller from allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV @@ -56,6 +57,7 @@ def __init__( verbose: bool = False, env_args=None, ) -> None: + """Initializer. # Parameters @@ -84,6 +86,9 @@ def __init__( simplify_physics : Whether or not to simplify physics when applicable. Currently this only simplies object interactions when opening drawers (when simplified, objects within a drawer do not slide around on their own when the drawer is opened or closed, instead they are effectively glued down). + *_noise_meta_dist_params : [mean, variance] defines the normal distribution over which the actual noise parameters + for a motion noise distribution will be drawn. Distributions for noise in motion will be re-rolled every scene + reset with new bias and variance values drawn from these meta-distributions. """ self._start_player_screen_width = player_screen_width @@ -112,6 +117,19 @@ def __init__( self._always_return_visible_range = False self.simplify_physics = simplify_physics + if 'motion_noise' in env_args.keys(): + self.ahead_noise_meta_dist_params = env_args['motion_noise']['ahead_noise_meta_dist_params'] + self.lateral_noise_meta_dist_params = env_args['motion_noise']['lateral_noise_meta_dist_params'] + self.turning_noise_meta_dist_params = env_args['motion_noise']['turning_noise_meta_dist_params'] + else: + self.ahead_noise_meta_dist_params = {'bias_dist': [0,0], 'variance_dist': [0,0]} + self.lateral_noise_meta_dist_params = {'bias_dist': [0,0], 'variance_dist': [0,0]} + self.turning_noise_meta_dist_params = {'bias_dist': [0,0], 'variance_dist': [0,0]} + + self.ahead_noise_params = [0,0] + self.lateral_noise_params = [0,0] + self.turning_noise_params = [0,0] + self.start(None) # self.check_controller_version() @@ -124,6 +142,28 @@ def __init__( self.env_args["quality"] = self._quality # self.memory_frames = [] + def generate_motion_noise_params(self,meta_dist): + bias = np.random.normal(*meta_dist['bias_dist']) + variance = np.abs(np.random.normal(*meta_dist['variance_dist'])) + return [bias,variance] + + + def reset_agent_motion_noise_models(self): + self.ahead_noise_params = self.generate_motion_noise_params(self.ahead_noise_meta_dist_params) + self.lateral_noise_params = self.generate_motion_noise_params(self.lateral_noise_meta_dist_params) + self.turning_noise_params = self.generate_motion_noise_params(self.turning_noise_meta_dist_params) + + + def check_controller_version(self): + if MANIPULATHOR_COMMIT_ID is not None: + assert ( + MANIPULATHOR_COMMIT_ID in self.controller._build.url + ), "Build number is not right, {} vs {}, use pip3 install -e git+https://github.com/allenai/ai2thor.git@{}#egg=ai2thor".format( + self.controller._build.url, + MANIPULATHOR_COMMIT_ID, + MANIPULATHOR_COMMIT_ID, + ) + # def check_controller_version(self): # if MANIPULATHOR_COMMIT_ID is not None: # assert ( @@ -222,6 +262,9 @@ def reset( self.list_of_actions_so_far = [] + self.reset_agent_motion_noise_models() + + def randomize_agent_location( self, seed: int = None, partial_position: Optional[Dict[str, float]] = None ) -> Dict: @@ -391,15 +434,19 @@ def step( action_dict = {**action_dict, **copy_aditions} if action in [MOVE_AHEAD]: action_dict["action"] = "MoveAgent" - action_dict["ahead"] = 0.2 + action_dict["ahead"] = 0.2 + np.random.normal(*self.ahead_noise_params) + action_dict["right"] = np.random.normal(*self.lateral_noise_params) elif action in [ROTATE_RIGHT]: action_dict["action"] = "RotateAgent" - action_dict["degrees"] = 45 + action_dict["degrees"] = 45 + np.random.normal(*self.turning_noise_params) elif action in [ROTATE_LEFT]: action_dict["action"] = "RotateAgent" - action_dict["degrees"] = -45 + action_dict["degrees"] = -45 + np.random.normal(*self.turning_noise_params) + + # print(action_dict) + # ForkedPdb().set_trace() elif "MoveArm" in action: copy_aditions = copy.deepcopy(ADITIONAL_ARM_ARGS) diff --git a/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise.py b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise.py new file mode 100644 index 0000000..0c66ff0 --- /dev/null +++ b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise.py @@ -0,0 +1,125 @@ +import platform +from typing import Dict + +import gym +import torch +from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor +from torch import nn + +from ithor_arm.bring_object_sensors import CategorySampleSensor, NoisyObjectMask, NoGripperRGBSensorThor, CategoryFeatureSampleSensor +from ithor_arm.bring_object_task_samplers import DiverseBringObjectTaskSampler +from ithor_arm.bring_object_tasks import WPickUPExploreBringObjectTask, ExploreWiseRewardTask +from ithor_arm.ithor_arm_constants import ENV_ARGS, TRAIN_OBJECTS, TEST_OBJECTS +from ithor_arm.ithor_arm_sensors import ( + InitialAgentArmToObjectSensor, + InitialObjectToGoalSensor, + PickedUpObjSensor, + DepthSensorThor, RelativeAgentArmToObjectSensor, RelativeObjectToGoalSensor, +) +from ithor_arm.ithor_arm_viz import MaskImageVisualizer +from ithor_arm.near_deadline_sensors import PointNavEmulatorSensor +from manipulathor_baselines.bring_object_baselines.experiments.bring_object_mixin_ddppo import BringObjectMixInPPOConfig +from manipulathor_baselines.bring_object_baselines.experiments.bring_object_mixin_simplegru import BringObjectMixInSimpleGRUConfig +from manipulathor_baselines.bring_object_baselines.experiments.ithor.bring_object_ithor_base import BringObjectiThorBaseConfig +from manipulathor_baselines.bring_object_baselines.models.pointnav_emulator_model import RGBDModelWPointNavEmulator +from manipulathor_baselines.bring_object_baselines.models.query_obj_w_gt_mask_rgb_model import SmallBringObjectWQueryObjGtMaskRGBDModel +from manipulathor_baselines.bring_object_baselines.models.pointnav_emulator_model import RGBDModelWPointNavEmulator + + +class PointNavNewModelAndHandWAgentLocationAndMotionNoise( + BringObjectiThorBaseConfig, + BringObjectMixInPPOConfig, + BringObjectMixInSimpleGRUConfig, +): + """An Object Navigation experiment configuration in iThor with RGB + input.""" + NOISE_LEVEL = 0 + AGENT_LOCATION_NOISE = 0.4 #? + distance_thr = 1.5 # is this a good number? + source_mask_sensor = NoisyObjectMask(height=BringObjectiThorBaseConfig.SCREEN_SIZE, width=BringObjectiThorBaseConfig.SCREEN_SIZE,noise=NOISE_LEVEL, type='source', distance_thr=distance_thr) + destination_mask_sensor = NoisyObjectMask(height=BringObjectiThorBaseConfig.SCREEN_SIZE, width=BringObjectiThorBaseConfig.SCREEN_SIZE,noise=NOISE_LEVEL, type='destination', distance_thr=distance_thr) + SENSORS = [ + RGBSensorThor( + height=BringObjectiThorBaseConfig.SCREEN_SIZE, + width=BringObjectiThorBaseConfig.SCREEN_SIZE, + use_resnet_normalization=True, + uuid="rgb_lowres", + ), + DepthSensorThor( + height=BringObjectiThorBaseConfig.SCREEN_SIZE, + width=BringObjectiThorBaseConfig.SCREEN_SIZE, + use_normalization=True, + uuid="depth_lowres", + ), + PickedUpObjSensor(), + CategorySampleSensor(type='source'), + CategorySampleSensor(type='destination'), + CategoryFeatureSampleSensor(type='source'), + CategoryFeatureSampleSensor(type='destination'), + source_mask_sensor, + destination_mask_sensor, + PointNavEmulatorSensor(type='source', mask_sensor=source_mask_sensor, noise=AGENT_LOCATION_NOISE), + PointNavEmulatorSensor(type='destination', mask_sensor=destination_mask_sensor, noise=AGENT_LOCATION_NOISE), + # TempRealArmpointNav(uuid='point_nav_emul',type='source'), + # TempRealArmpointNav(uuid='point_nav_emul', type='destination'), + ] + + MAX_STEPS = 200 + + TASK_SAMPLER = DiverseBringObjectTaskSampler + # TASK_TYPE = TestPointNavExploreWiseRewardTask + TASK_TYPE = ExploreWiseRewardTask + + NUM_PROCESSES = 20 + + OBJECT_TYPES = TRAIN_OBJECTS + TEST_OBJECTS + + + def train_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoise, self).train_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + + for pointnav_emul_sensor in sampler_args['sensors']: + if isinstance(pointnav_emul_sensor, PointNavEmulatorSensor): + pointnav_emul_sensor.device = torch.device(kwargs["devices"][0]) + + return sampler_args + def test_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoise, self).test_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + + for pointnav_emul_sensor in sampler_args['sensors']: + if isinstance(pointnav_emul_sensor, PointNavEmulatorSensor): + pointnav_emul_sensor.device = torch.device(kwargs["devices"][0]) + + return sampler_args + + def __init__(self): + super().__init__() + self.REWARD_CONFIG['exploration_reward'] = 0.1 # is this too big? + self.REWARD_CONFIG['object_found'] = 1 # is this too big? + self.ENV_ARGS['visibilityDistance'] = self.distance_thr + self.ENV_ARGS['motion_noise'] = dict() + self.ENV_ARGS['motion_noise']['ahead_noise_meta_dist_params'] = {'bias_dist': [0,0.02], 'variance_dist': [0,0.05]} + self.ENV_ARGS['motion_noise']['lateral_noise_meta_dist_params'] = {'bias_dist': [0,0.02], 'variance_dist': [0,0.02]} + self.ENV_ARGS['motion_noise']['turning_noise_meta_dist_params'] = {'bias_dist': [0,5], 'variance_dist': [0,5]} + + + @classmethod + def create_model(cls, **kwargs) -> nn.Module: + return RGBDModelWPointNavEmulator( + action_space=gym.spaces.Discrete( + len(cls.TASK_TYPE.class_action_names()) + ), + observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, + hidden_size=512, + visualize=cls.VISUALIZE + ) + + @classmethod + def tag(cls): + return cls.__name__ From 396f7e545824bcc87ec7778c0f81baa275a11a74 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Wed, 27 Apr 2022 17:27:19 -0700 Subject: [PATCH 02/53] agent motion noise models as imported classes, habitat noise, and agent belief in nominal motion --- ithor_arm/ithor_arm_environment.py | 102 +++++++++---- ithor_arm/ithor_arm_noise_models.py | 113 +++++++++++++++ ithor_arm/near_deadline_sensors.py | 122 ++++++++++++++++ ...rd_no_pu_w_noise_0_4_agent_motion_noise.py | 22 ++- ..._w_noise_0_4_agent_motion_noise_noreset.py | 135 ++++++++++++++++++ 5 files changed, 458 insertions(+), 36 deletions(-) create mode 100644 ithor_arm/ithor_arm_noise_models.py create mode 100644 manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py diff --git a/ithor_arm/ithor_arm_environment.py b/ithor_arm/ithor_arm_environment.py index 9d626bb..fd9085b 100644 --- a/ithor_arm/ithor_arm_environment.py +++ b/ithor_arm/ithor_arm_environment.py @@ -13,6 +13,9 @@ from allenact_plugins.ithor_plugin.ithor_constants import VISIBILITY_DISTANCE, FOV from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment +from ithor_arm.ithor_arm_noise_models import NoiseInMotionHabitatFlavor, NoiseInMotionSimple1DNormal +from ithor_arm.arm_calculation_utils import convert_world_to_agent_coordinate + from ithor_arm.ithor_arm_constants import ( ADITIONAL_ARM_ARGS, ARM_MIN_HEIGHT, @@ -117,18 +120,19 @@ def __init__( self._always_return_visible_range = False self.simplify_physics = simplify_physics - if 'motion_noise' in env_args.keys(): - self.ahead_noise_meta_dist_params = env_args['motion_noise']['ahead_noise_meta_dist_params'] - self.lateral_noise_meta_dist_params = env_args['motion_noise']['lateral_noise_meta_dist_params'] - self.turning_noise_meta_dist_params = env_args['motion_noise']['turning_noise_meta_dist_params'] + if 'motion_noise_type' in env_args.keys(): + if env_args['motion_noise_type'] == 'habitat': + self.noise_model = NoiseInMotionHabitatFlavor(**env_args['motion_noise_args']) + elif env_args['motion_noise_type'] == 'simple1d': + self.noise_model = NoiseInMotionSimple1DNormal(**env_args['motion_noise_args']) + else: + print('Unrecognized motion noise model type') + ForkedPdb().set_trace() else: - self.ahead_noise_meta_dist_params = {'bias_dist': [0,0], 'variance_dist': [0,0]} - self.lateral_noise_meta_dist_params = {'bias_dist': [0,0], 'variance_dist': [0,0]} - self.turning_noise_meta_dist_params = {'bias_dist': [0,0], 'variance_dist': [0,0]} + self.noise_model = NoiseInMotionSimple1DNormal() # without args initializes as trivial/zero-noise + + self.ahead_nominal = 0.2 - self.ahead_noise_params = [0,0] - self.lateral_noise_params = [0,0] - self.turning_noise_params = [0,0] self.start(None) # self.check_controller_version() @@ -142,18 +146,6 @@ def __init__( self.env_args["quality"] = self._quality # self.memory_frames = [] - def generate_motion_noise_params(self,meta_dist): - bias = np.random.normal(*meta_dist['bias_dist']) - variance = np.abs(np.random.normal(*meta_dist['variance_dist'])) - return [bias,variance] - - - def reset_agent_motion_noise_models(self): - self.ahead_noise_params = self.generate_motion_noise_params(self.ahead_noise_meta_dist_params) - self.lateral_noise_params = self.generate_motion_noise_params(self.lateral_noise_meta_dist_params) - self.turning_noise_params = self.generate_motion_noise_params(self.turning_noise_meta_dist_params) - - def check_controller_version(self): if MANIPULATHOR_COMMIT_ID is not None: assert ( @@ -262,7 +254,8 @@ def reset( self.list_of_actions_so_far = [] - self.reset_agent_motion_noise_models() + self.noise_model.reset_noise_model() + self.nominal_agent_location = self.get_agent_location() def randomize_agent_location( @@ -334,6 +327,34 @@ def get_absolute_hand_state(self): xyz_dict = self.correct_nan_inf(xyz_dict, "absolute hand") return dict(position=xyz_dict, rotation={"x": 0, "y": 0, "z": 0}) + + def update_nominal_location(self, action): + curr_loc = self.nominal_agent_location + new_loc = copy.deepcopy(curr_loc) + # location = { + # "x": metadata["agent"]["position"]["x"], + # "y": metadata["agent"]["position"]["y"], + # "z": metadata["agent"]["position"]["z"], + # "rotation": metadata["agent"]["rotation"]["y"], + # "horizon": metadata["agent"]["cameraHorizon"], + # "standing": metadata.get("isStanding", metadata["agent"].get("isStanding")), + # } + if action is ROTATE_LEFT: + new_loc["rotation"] += -45 # add some wrapping here + elif action is ROTATE_RIGHT: + new_loc["rotation"] += 45 + elif action is MOVE_AHEAD: + # Assumption: rotation is relative to the +x axis in the xz plane + new_loc["x"] += self.ahead_nominal * np.cos(new_loc["rotation"] * np.pi / 180) + new_loc["z"] += self.ahead_nominal * np.sin(new_loc["rotation"] * np.pi / 180) + else: + print('unknown action') + ForkedPdb().set_trace() + + # ForkedPdb().set_trace() + + self.nominal_agent_location = new_loc + def get_pickupable_objects(self): event = self.controller.last_event @@ -431,22 +452,45 @@ def step( copy_aditions = copy.deepcopy(ADITIONAL_ARM_ARGS) + # RH: order matters, nominal action happens last action_dict = {**action_dict, **copy_aditions} if action in [MOVE_AHEAD]: + noise = self.noise_model.get_ahead_drift(self.ahead_nominal) + + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = noise[2] + sr = self.controller.step(action_dict) + + action_dict = dict() action_dict["action"] = "MoveAgent" - action_dict["ahead"] = 0.2 + np.random.normal(*self.ahead_noise_params) - action_dict["right"] = np.random.normal(*self.lateral_noise_params) + action_dict["ahead"] = self.ahead_nominal + noise[0] + action_dict["right"] = noise[1] elif action in [ROTATE_RIGHT]: + noise = self.noise_model.get_rotate_drift() + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = noise[0] + action_dict["right"] = noise[1] + sr = self.controller.step(action_dict) + + action_dict = dict() action_dict["action"] = "RotateAgent" - action_dict["degrees"] = 45 + np.random.normal(*self.turning_noise_params) + action_dict["degrees"] = 45 + noise[2] elif action in [ROTATE_LEFT]: + noise = self.noise_model.get_rotate_drift() + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = noise[0] + action_dict["right"] = noise[1] + sr = self.controller.step(action_dict) + + action_dict = dict() action_dict["action"] = "RotateAgent" - action_dict["degrees"] = -45 + np.random.normal(*self.turning_noise_params) + action_dict["degrees"] = -45 + noise[2] # print(action_dict) # ForkedPdb().set_trace() + self.update_nominal_location(action) elif "MoveArm" in action: copy_aditions = copy.deepcopy(ADITIONAL_ARM_ARGS) @@ -480,10 +524,10 @@ def step( k: v for (k, v) in base_position.items() if k in ["x", "y", "z"] } - - sr = self.controller.step(action_dict) self.list_of_actions_so_far.append(action_dict) + # print(action_dict) + # ForkedPdb().set_trace() if action in SET_OF_ALL_AGENT_ACTIONS: self.update_memory() diff --git a/ithor_arm/ithor_arm_noise_models.py b/ithor_arm/ithor_arm_noise_models.py new file mode 100644 index 0000000..e25b510 --- /dev/null +++ b/ithor_arm/ithor_arm_noise_models.py @@ -0,0 +1,113 @@ +"""Utility classes and functions for noise models used by the platforms.""" +from typing import Tuple, Dict, List, Set, Union, Any, Optional + +from utils.noise_from_habitat import MotionNoiseModel, _TruncatedMultivariateGaussian +import numpy as np + +class NoiseInMotionHabitatFlavor: + # Sample usage in __init__ for a config file: + # self.ENV_ARGS['motion_noise_type'] = 'habitat' + # self.ENV_ARGS['motion_noise_args'] = dict() + # self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] + # self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0,0,0,0,0,0,0] + # self.ENV_ARGS['motion_noise_args']['effect_scale'] = 1 + + def __init__(self, + multiplier_means: List[float] = [1,1,1,1,1,1], + multiplier_sigmas: List[float] = [0,0,0,0,0,0,0], + effect_scale: float = 1, + ) -> None: + + self.multiplier_means = multiplier_means + self.multiplier_sigmas = multiplier_sigmas + self.effect_scale = effect_scale + + self.ahead = None + self.rotate = None + + self.reset_noise_model() + + def generate_linear_submodel(self,m1,m2,m3,s1,s2,s3): + return MotionNoiseModel( + _TruncatedMultivariateGaussian([m1 * 0.074, m2 * 0.036], [s1 * 0.019, s2 * 0.033]), + _TruncatedMultivariateGaussian([m3 * 0.189], [s3 * 0.038])) + + def generate_rotational_submodel(self,m1,m2,m3,s1,s2,s3): + return MotionNoiseModel( + _TruncatedMultivariateGaussian([m1 * 0.002, m2 * 0.003], [s1 * 0.0, s2 * 0.002]), + _TruncatedMultivariateGaussian([m3 * 0.219], [s3 * 0.019])) + + def generate_model_multipliers(self): + return [np.random.normal(self.multiplier_means[i],self.multiplier_sigmas[i]) + for i in range(len(self.multiplier_means))] + + def reset_noise_model(self): + model_multipliers = self.generate_model_multipliers() + + self.ahead = self.generate_linear_submodel(*model_multipliers) + self.rotate = self.generate_rotational_submodel(*model_multipliers) + + def get_ahead_drift(self,*_): + # returns [ahead change, left/right change, rot change] from an ahead command in [m,m,deg] + rotation_drift = self.effect_scale * self.ahead.rotation.sample() + linear_drift = self.effect_scale * self.ahead.linear.sample() + + return [linear_drift[0], linear_drift[1], rotation_drift[0] * 180 / np.pi] + + def get_rotate_drift(self): + # returns [ahead change, left/right change, rot change] from an ahead command in [m,m,deg] + rotation_drift = self.effect_scale * self.rotate.rotation.sample() + linear_drift = self.effect_scale * self.rotate.linear.sample() + + return [linear_drift[0], linear_drift[1], rotation_drift[0] * 180 / np.pi] + + +class NoiseInMotionSimple1DNormal: + # Sample usage in __init__ for a config file: + # self.ENV_ARGS['motion_noise_type'] = 'simple1d' + # self.ENV_ARGS['motion_noise_args'] = dict() + # self.ENV_ARGS['motion_noise_args']['ahead_noise_meta_dist_params'] = {'bias_dist': [0,0.04], 'variance_dist': [0,0.10]} + # self.ENV_ARGS['motion_noise_args']['lateral_noise_meta_dist_params'] = {'bias_dist': [0,0.04], 'variance_dist': [0,0.04]} + # self.ENV_ARGS['motion_noise_args']['turning_noise_meta_dist_params'] = {'bias_dist': [0,10], 'variance_dist': [0,10]} + + def __init__(self, + ahead_noise_meta_dist_params: Dict = {'bias_dist': [0,0], 'variance_dist': [0,0]}, + lateral_noise_meta_dist_params: Dict= {'bias_dist': [0,0], 'variance_dist': [0,0]}, + turning_noise_meta_dist_params: Dict = {'bias_dist': [0,0], 'variance_dist': [0,0]}, + effect_scale: float = 1, + ) -> None: + + self.ahead_noise_meta_dist_params = ahead_noise_meta_dist_params + self.lateral_noise_meta_dist_params = lateral_noise_meta_dist_params + self.turning_noise_meta_dist_params = turning_noise_meta_dist_params + self.effect_scale = effect_scale + + self.ahead_noise_params = [0,0] + self.lateral_noise_params = [0,0] + self.turning_noise_params = [0,0] + + self.reset_noise_model() + + def generate_motion_noise_params(self,meta_dist): + bias = np.random.normal(*meta_dist['bias_dist']) + variance = np.abs(np.random.normal(*meta_dist['variance_dist'])) + return [bias,variance] + + def reset_noise_model(self): + self.ahead_noise_params = self.generate_motion_noise_params(self.ahead_noise_meta_dist_params) + self.lateral_noise_params = self.generate_motion_noise_params(self.lateral_noise_meta_dist_params) + self.turning_noise_params = self.generate_motion_noise_params(self.turning_noise_meta_dist_params) + + def get_ahead_drift(self, nominal_ahead): + # returns [ahead change, left/right change, rot change] from an ahead command in [m,m,deg] + linear_drift = self.effect_scale * np.random.normal(*self.ahead_noise_params) + side_drift = self.effect_scale * np.random.normal(*self.lateral_noise_params) + rotation_drift = np.arctan2(side_drift,linear_drift + nominal_ahead) * 180 / np.pi + + return [linear_drift, side_drift, rotation_drift ] + + def get_rotate_drift(self): + # for this model, rotating incurs no positional drift. Returns [0,0,deg] + rotation_drift = self.effect_scale * np.random.normal(*self.turning_noise_params) + + return [0, 0, rotation_drift] diff --git a/ithor_arm/near_deadline_sensors.py b/ithor_arm/near_deadline_sensors.py index cc15e4e..f5ed013 100644 --- a/ithor_arm/near_deadline_sensors.py +++ b/ithor_arm/near_deadline_sensors.py @@ -39,6 +39,8 @@ from utils.noise_from_habitat import ControllerNoiseModel, MotionNoiseModel, _TruncatedMultivariateGaussian from utils.noise_in_motion_util import NoiseInMotion, squeeze_bool_mask, tensor_from_dict +KINECT_FOV_W, KINECT_FOV_H = 59, 90 + class FancyNoisyObjectMaskWLabels(Sensor): def __init__(self, type: str,noise, height, width, uuid: str = "object_mask", distance_thr: float = -1, **kwargs: Any): @@ -368,7 +370,127 @@ def average_so_far(self, camera_xyz, camera_rotation, arm_state): # self.noise = something # return super(PointNavEmulatorSensorwScheduler, self).get_observation(env, task, *args, **kwargs) +<<<<<<< HEAD +======= +class PointNavEmulSensorDeadReckoning(Sensor): + + def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "point_nav_emul", **kwargs: Any): + observation_space = gym.spaces.Box( + low=0, high=1, shape=(1,), dtype=np.float32 + ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) + self.type = type + self.mask_sensor = mask_sensor + self.depth_sensor = depth_sensor + uuid = '{}_{}'.format(uuid, type) + + self.min_xyz = np.zeros((3)) + + self.dummy_answer = torch.zeros(3) + self.dummy_answer[:] = 4 # is this good enough? + self.device = torch.device("cpu") + super().__init__(**prepare_locals_for_super(locals())) + + def get_agent_belief_state(self,env): + #TODO all these values need to be checked + fov=max(KINECT_FOV_W, KINECT_FOV_H)#TODO are you sure? it should be smaller one I think + agent_state = env.nominal_agent_location + # ForkedPdb().set_trace() + camera_horizon = 45 + camera_xyz = np.array([agent_state[k] for k in ['x','y','z']]) + camera_rotation = (agent_state['rotation'] + 90) % 360 + return fov, camera_horizon, camera_xyz, camera_rotation + + def get_observation( + self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any + ) -> Any: + #TODO remove + if self.type == 'destination': + return self.dummy_answer + mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) #TODO this is called multiple times? + depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1) + + if task.num_steps_taken() == 0: + self.pointnav_history_aggr = [] + self.real_prev_location = None + self.belief_prev_location = None + + + fov, camera_horizon, camera_xyz, camera_rotation = self.get_agent_belief_state(env) + arm_agent_coord = env.get_current_arm_state() + + if mask.sum() != 0: + + midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) + self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken())) + + # arm_mask = self.arm_mask_sensor.get_observation(env, task, *args, **kwargs) #TODO this is also called twice + # if arm_mask.sum() == 0: #Do we want to do some approximations or no? + # arm_world_coord = None #TODO approax for this + # else: + # arm_world_coord = get_mid_point_of_object_from_depth_and_mask(arm_mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) + # # distance_in_agent_coord = midpoint_agent_coord - arm_location_in_camera + # # result = distance_in_agent_coord.cpu() + # if arm_mask.sum() != 0 or mask.sum() != 0: + + # import cv2 + # cv2.imwrite('/Users/kianae/Desktop/image.png', env.kinect_frame[:,:,::-1]) + # cv2.imwrite('/Users/kianae/Desktop/mask.png', mask.squeeze().numpy() * 255) + # cv2.imwrite('/Users/kianae/Desktop/arm_mask.png', arm_mask * 255) + # # ForkedPdb().set_trace() #TODO remove + result = self.history_aggregation(camera_xyz, camera_rotation, arm_agent_coord, task.num_steps_taken()) + return result + + def history_aggregation(self, camera_xyz, camera_rotation, arm_agent_coord, current_step_number): + if len(self.pointnav_history_aggr) == 0: + return self.dummy_answer + else: + weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr] + total_weights = sum(weights) + total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr] + total_sum = sum(total_sum) + midpoint = total_sum / total_weights + agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0)) + midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0)) + midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state) + midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']]) + + # arm_world_coord = dict(position=dict(x=arm_world_coord[0], y=arm_world_coord[1], z=arm_world_coord[2]), rotation=dict(x=0,y=0,z=0)) + # arm_state_agent_coord = convert_world_to_agent_coordinate(arm_world_coord, agent_state) + arm_state_agent_coord = torch.Tensor([arm_agent_coord[k] for k in ['x','y','z']]) + # ForkedPdb().set_trace() + + distance_in_agent_coord = midpoint_agent_coord - arm_state_agent_coord + return distance_in_agent_coord.cpu() + + +def calc_world_coordinates(min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device, depth_frame): + with torch.no_grad(): + camera_xyz = ( + torch.from_numpy(camera_xyz - min_xyz).float().to(device) + ) + + depth_frame = torch.from_numpy(depth_frame).to(device) + depth_frame[depth_frame == -1] = np.NaN + world_space_point_cloud = depth_frame_to_world_space_xyz( + depth_frame=depth_frame, + camera_world_xyz=camera_xyz, + rotation=camera_rotation, + horizon=camera_horizon, + fov=fov, + ) + return world_space_point_cloud +>>>>>>> 222cada (agent motion noise models as imported classes, habitat noise, and agent belief in nominal motion) + +def get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device): + mask = squeeze_bool_mask(mask) + depth_frame_masked = depth_frame_original.copy() + depth_frame_masked[~mask] = -1 + world_space_point_cloud = calc_world_coordinates(min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device, depth_frame_masked) + valid_points = (world_space_point_cloud == world_space_point_cloud).sum(dim=-1) == 3 + point_in_world = world_space_point_cloud[valid_points] + midpoint_agent_coord = point_in_world.mean(dim=0) + return midpoint_agent_coord class PredictionObjectMask(Sensor): diff --git a/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise.py b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise.py index 0c66ff0..13e0407 100644 --- a/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise.py +++ b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise.py @@ -26,7 +26,7 @@ from manipulathor_baselines.bring_object_baselines.models.pointnav_emulator_model import RGBDModelWPointNavEmulator -class PointNavNewModelAndHandWAgentLocationAndMotionNoise( +class PointNavNewModelAndHandWAgentLocationAndMotionNoiseMoreNoise( BringObjectiThorBaseConfig, BringObjectMixInPPOConfig, BringObjectMixInSimpleGRUConfig, @@ -76,7 +76,7 @@ class PointNavNewModelAndHandWAgentLocationAndMotionNoise( def train_task_sampler_args(self, **kwargs): - sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoise, self).train_task_sampler_args(**kwargs) + sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseMoreNoise, self).train_task_sampler_args(**kwargs) if platform.system() == "Darwin": pass else: @@ -87,7 +87,7 @@ def train_task_sampler_args(self, **kwargs): return sampler_args def test_task_sampler_args(self, **kwargs): - sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoise, self).test_task_sampler_args(**kwargs) + sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseMoreNoise, self).test_task_sampler_args(**kwargs) if platform.system() == "Darwin": pass else: @@ -103,10 +103,18 @@ def __init__(self): self.REWARD_CONFIG['exploration_reward'] = 0.1 # is this too big? self.REWARD_CONFIG['object_found'] = 1 # is this too big? self.ENV_ARGS['visibilityDistance'] = self.distance_thr - self.ENV_ARGS['motion_noise'] = dict() - self.ENV_ARGS['motion_noise']['ahead_noise_meta_dist_params'] = {'bias_dist': [0,0.02], 'variance_dist': [0,0.05]} - self.ENV_ARGS['motion_noise']['lateral_noise_meta_dist_params'] = {'bias_dist': [0,0.02], 'variance_dist': [0,0.02]} - self.ENV_ARGS['motion_noise']['turning_noise_meta_dist_params'] = {'bias_dist': [0,5], 'variance_dist': [0,5]} + + # self.ENV_ARGS['motion_noise_type'] = 'habitat' + # self.ENV_ARGS['motion_noise_args'] = dict() + # self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] + # self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0,0,0,0,0,0,0] + # self.ENV_ARGS['motion_noise_args']['effect_scale'] = 1 + + # self.ENV_ARGS['motion_noise_type'] = 'simple1d' + # self.ENV_ARGS['motion_noise_args'] = dict() + # self.ENV_ARGS['motion_noise_args']['ahead_noise_meta_dist_params'] = {'bias_dist': [0,0.04], 'variance_dist': [0,0.10]} + # self.ENV_ARGS['motion_noise_args']['lateral_noise_meta_dist_params'] = {'bias_dist': [0,0.04], 'variance_dist': [0,0.04]} + # self.ENV_ARGS['motion_noise_args']['turning_noise_meta_dist_params'] = {'bias_dist': [0,10], 'variance_dist': [0,10]} @classmethod diff --git a/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py new file mode 100644 index 0000000..3c31d28 --- /dev/null +++ b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py @@ -0,0 +1,135 @@ +import platform +from typing import Dict + +import gym +import torch +from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor +from torch import nn + +from ithor_arm.bring_object_sensors import CategorySampleSensor, NoisyObjectMask, NoGripperRGBSensorThor, CategoryFeatureSampleSensor +from ithor_arm.bring_object_task_samplers import DiverseBringObjectTaskSampler +from ithor_arm.bring_object_tasks import WPickUPExploreBringObjectTask, ExploreWiseRewardTask +from ithor_arm.ithor_arm_constants import ENV_ARGS, TRAIN_OBJECTS, TEST_OBJECTS +from ithor_arm.ithor_arm_sensors import ( + InitialAgentArmToObjectSensor, + InitialObjectToGoalSensor, + PickedUpObjSensor, + DepthSensorThor, RelativeAgentArmToObjectSensor, RelativeObjectToGoalSensor, +) +from ithor_arm.ithor_arm_viz import MaskImageVisualizer +from ithor_arm.near_deadline_sensors import PointNavEmulatorSensor, PointNavEmulSensorDeadReckoning +from manipulathor_baselines.bring_object_baselines.experiments.bring_object_mixin_ddppo import BringObjectMixInPPOConfig +from manipulathor_baselines.bring_object_baselines.experiments.bring_object_mixin_simplegru import BringObjectMixInSimpleGRUConfig +from manipulathor_baselines.bring_object_baselines.experiments.ithor.bring_object_ithor_base import BringObjectiThorBaseConfig +from manipulathor_baselines.bring_object_baselines.models.pointnav_emulator_model import RGBDModelWPointNavEmulator +from manipulathor_baselines.bring_object_baselines.models.query_obj_w_gt_mask_rgb_model import SmallBringObjectWQueryObjGtMaskRGBDModel +from manipulathor_baselines.bring_object_baselines.models.pointnav_emulator_model import RGBDModelWPointNavEmulator + + +class PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoReset( + BringObjectiThorBaseConfig, + BringObjectMixInPPOConfig, + BringObjectMixInSimpleGRUConfig, +): + """An Object Navigation experiment configuration in iThor with RGB + input.""" + NOISE_LEVEL = 0 + AGENT_LOCATION_NOISE = 0.4 #? + distance_thr = 1.5 # is this a good number? + source_mask_sensor = NoisyObjectMask(height=BringObjectiThorBaseConfig.SCREEN_SIZE, width=BringObjectiThorBaseConfig.SCREEN_SIZE,noise=NOISE_LEVEL, type='source', distance_thr=distance_thr) + destination_mask_sensor = NoisyObjectMask(height=BringObjectiThorBaseConfig.SCREEN_SIZE, width=BringObjectiThorBaseConfig.SCREEN_SIZE,noise=NOISE_LEVEL, type='destination', distance_thr=distance_thr) + depth_sensor = DepthSensorThor( + height=BringObjectiThorBaseConfig.SCREEN_SIZE, + width=BringObjectiThorBaseConfig.SCREEN_SIZE, + use_normalization=True, + uuid="depth_lowres", + ) + + SENSORS = [ + RGBSensorThor( + height=BringObjectiThorBaseConfig.SCREEN_SIZE, + width=BringObjectiThorBaseConfig.SCREEN_SIZE, + use_resnet_normalization=True, + uuid="rgb_lowres", + ), + depth_sensor, + PickedUpObjSensor(), + CategorySampleSensor(type='source'), + CategorySampleSensor(type='destination'), + CategoryFeatureSampleSensor(type='source'), + CategoryFeatureSampleSensor(type='destination'), + source_mask_sensor, + destination_mask_sensor, + PointNavEmulSensorDeadReckoning(type='source', mask_sensor=source_mask_sensor, noise=AGENT_LOCATION_NOISE, depth_sensor=depth_sensor), + PointNavEmulSensorDeadReckoning(type='destination', mask_sensor=destination_mask_sensor, noise=AGENT_LOCATION_NOISE, depth_sensor=depth_sensor), + # TempRealArmpointNav(uuid='point_nav_emul',type='source'), + # TempRealArmpointNav(uuid='point_nav_emul', type='destination'), + ] + + MAX_STEPS = 200 + + TASK_SAMPLER = DiverseBringObjectTaskSampler + # TASK_TYPE = TestPointNavExploreWiseRewardTask + TASK_TYPE = ExploreWiseRewardTask + + NUM_PROCESSES = 20 + + OBJECT_TYPES = TRAIN_OBJECTS + TEST_OBJECTS + + + def train_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoReset, self).train_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + + for pointnav_emul_sensor in sampler_args['sensors']: + if isinstance(pointnav_emul_sensor, PointNavEmulSensorDeadReckoning): + pointnav_emul_sensor.device = torch.device(kwargs["devices"][0]) + + return sampler_args + def test_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoReset, self).test_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + + for pointnav_emul_sensor in sampler_args['sensors']: + if isinstance(pointnav_emul_sensor, PointNavEmulSensorDeadReckoning): + pointnav_emul_sensor.device = torch.device(kwargs["devices"][0]) + + return sampler_args + + def __init__(self): + super().__init__() + self.REWARD_CONFIG['exploration_reward'] = 0.1 # is this too big? + self.REWARD_CONFIG['object_found'] = 1 # is this too big? + self.ENV_ARGS['visibilityDistance'] = self.distance_thr + + # self.ENV_ARGS['motion_noise_type'] = 'habitat' + # self.ENV_ARGS['motion_noise_args'] = dict() + # self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] + # self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0,0,0,0,0,0,0] + # self.ENV_ARGS['motion_noise_args']['effect_scale'] = 1 + + self.ENV_ARGS['motion_noise_type'] = 'simple1d' + self.ENV_ARGS['motion_noise_args'] = dict() + self.ENV_ARGS['motion_noise_args']['ahead_noise_meta_dist_params'] = {'bias_dist': [0,0.02], 'variance_dist': [0,0.5]} + self.ENV_ARGS['motion_noise_args']['lateral_noise_meta_dist_params'] = {'bias_dist': [0,0.02], 'variance_dist': [0,0.02]} + self.ENV_ARGS['motion_noise_args']['turning_noise_meta_dist_params'] = {'bias_dist': [0,5], 'variance_dist': [0,5]} + + + @classmethod + def create_model(cls, **kwargs) -> nn.Module: + return RGBDModelWPointNavEmulator( + action_space=gym.spaces.Discrete( + len(cls.TASK_TYPE.class_action_names()) + ), + observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, + hidden_size=512, + visualize=cls.VISUALIZE + ) + + @classmethod + def tag(cls): + return cls.__name__ From 90ac8e9bfd3dde387ced179baf66a3414dc764ec Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Mon, 2 May 2022 14:08:02 -0700 Subject: [PATCH 03/53] fix cherry-pick --- ithor_arm/near_deadline_sensors.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/ithor_arm/near_deadline_sensors.py b/ithor_arm/near_deadline_sensors.py index f5ed013..ff4a9b5 100644 --- a/ithor_arm/near_deadline_sensors.py +++ b/ithor_arm/near_deadline_sensors.py @@ -370,8 +370,6 @@ def average_so_far(self, camera_xyz, camera_rotation, arm_state): # self.noise = something # return super(PointNavEmulatorSensorwScheduler, self).get_observation(env, task, *args, **kwargs) -<<<<<<< HEAD -======= class PointNavEmulSensorDeadReckoning(Sensor): def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "point_nav_emul", **kwargs: Any): @@ -480,7 +478,6 @@ def calc_world_coordinates(min_xyz, camera_xyz, camera_rotation, camera_horizon, fov=fov, ) return world_space_point_cloud ->>>>>>> 222cada (agent motion noise models as imported classes, habitat noise, and agent belief in nominal motion) def get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device): mask = squeeze_bool_mask(mask) From 9862a8f6924360ec8f42d5be6de178ccacf64d2b Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Thu, 28 Apr 2022 15:53:51 -0700 Subject: [PATCH 04/53] debugged belief state with traj viz and included knowledge of failed actions --- ithor_arm/ithor_arm_environment.py | 46 ++++++++--------- ithor_arm/near_deadline_sensors.py | 49 ++++++++++++++++--- ..._w_noise_0_4_agent_motion_noise_noreset.py | 20 ++++---- 3 files changed, 76 insertions(+), 39 deletions(-) diff --git a/ithor_arm/ithor_arm_environment.py b/ithor_arm/ithor_arm_environment.py index fd9085b..f311153 100644 --- a/ithor_arm/ithor_arm_environment.py +++ b/ithor_arm/ithor_arm_environment.py @@ -1,5 +1,6 @@ """A wrapper for engaging with the ManipulaTHOR environment.""" +from ast import For import copy import math import typing @@ -328,9 +329,7 @@ def get_absolute_hand_state(self): return dict(position=xyz_dict, rotation={"x": 0, "y": 0, "z": 0}) - def update_nominal_location(self, action): - curr_loc = self.nominal_agent_location - new_loc = copy.deepcopy(curr_loc) + def update_nominal_location(self, action_dict): # location = { # "x": metadata["agent"]["position"]["x"], # "y": metadata["agent"]["position"]["y"], @@ -339,20 +338,25 @@ def update_nominal_location(self, action): # "horizon": metadata["agent"]["cameraHorizon"], # "standing": metadata.get("isStanding", metadata["agent"].get("isStanding")), # } - if action is ROTATE_LEFT: - new_loc["rotation"] += -45 # add some wrapping here - elif action is ROTATE_RIGHT: - new_loc["rotation"] += 45 - elif action is MOVE_AHEAD: - # Assumption: rotation is relative to the +x axis in the xz plane - new_loc["x"] += self.ahead_nominal * np.cos(new_loc["rotation"] * np.pi / 180) - new_loc["z"] += self.ahead_nominal * np.sin(new_loc["rotation"] * np.pi / 180) - else: - print('unknown action') - ForkedPdb().set_trace() - - # ForkedPdb().set_trace() + curr_loc = self.nominal_agent_location + new_loc = copy.deepcopy(curr_loc) + + if action_dict['action'] is 'RotateLeft': + new_loc["rotation"] = (new_loc["rotation"] - 45) % 360 + elif action_dict['action'] is 'RotateRight': + new_loc["rotation"] = (new_loc["rotation"] + 45) % 360 + elif action_dict['action'] is 'MoveAhead': + new_loc["x"] += self.ahead_nominal * np.sin(new_loc["rotation"] * np.pi / 180) + new_loc["z"] += self.ahead_nominal * np.cos(new_loc["rotation"] * np.pi / 180) + elif action_dict['action'] is 'TeleportFull': + new_loc["x"] = action_dict['x'] + new_loc["y"] = action_dict['y'] + new_loc["z"] = action_dict['z'] + new_loc["rotation"] = action_dict['rotation']['y'] + new_loc["horizon"] = action_dict['horizon'] + # print('Agent teleported, nominal location reset to teleport destination') + self.nominal_agent_location = new_loc def get_pickupable_objects(self): @@ -417,6 +421,7 @@ def step( ) -> ai2thor.server.Event: """Take a step in the ai2thor environment.""" action = typing.cast(str, action_dict["action"]) + original_action_dict = copy.deepcopy(action_dict) skip_render = "renderImage" in action_dict and not action_dict["renderImage"] last_frame: Optional[np.ndarray] = None @@ -488,10 +493,6 @@ def step( action_dict["action"] = "RotateAgent" action_dict["degrees"] = -45 + noise[2] - # print(action_dict) - # ForkedPdb().set_trace() - self.update_nominal_location(action) - elif "MoveArm" in action: copy_aditions = copy.deepcopy(ADITIONAL_ARM_ARGS) action_dict = {**action_dict, **copy_aditions} @@ -526,8 +527,9 @@ def step( sr = self.controller.step(action_dict) self.list_of_actions_so_far.append(action_dict) - # print(action_dict) - # ForkedPdb().set_trace() + + if sr.metadata["lastActionSuccess"]: + self.update_nominal_location(original_action_dict) if action in SET_OF_ALL_AGENT_ACTIONS: self.update_memory() diff --git a/ithor_arm/near_deadline_sensors.py b/ithor_arm/near_deadline_sensors.py index ff4a9b5..b7b912d 100644 --- a/ithor_arm/near_deadline_sensors.py +++ b/ithor_arm/near_deadline_sensors.py @@ -1,4 +1,5 @@ """Utility classes and functions for sensory inputs used by the models.""" +from ast import For import copy import datetime import math @@ -391,12 +392,22 @@ def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str def get_agent_belief_state(self,env): #TODO all these values need to be checked fov=max(KINECT_FOV_W, KINECT_FOV_H)#TODO are you sure? it should be smaller one I think - agent_state = env.nominal_agent_location + belief_agent_state = env.nominal_agent_location + real_agent_state = env.get_agent_location() # ForkedPdb().set_trace() - camera_horizon = 45 - camera_xyz = np.array([agent_state[k] for k in ['x','y','z']]) - camera_rotation = (agent_state['rotation'] + 90) % 360 - return fov, camera_horizon, camera_xyz, camera_rotation + + belief_camera_horizon = 45 + belief_camera_xyz = np.array([belief_agent_state[k] for k in ['x','y','z']]) + belief_camera_rotation = (belief_agent_state['rotation'] + 90) % 360 + + real_camera_xyz = np.array([real_agent_state[k] for k in ['x','y','z']]) + # ForkedPdb().set_trace() + + self.belief_prev_location.append(belief_camera_xyz) + self.real_prev_location.append(real_camera_xyz) + # ForkedPdb().set_trace() + + return fov, belief_camera_horizon, belief_camera_xyz, belief_camera_rotation def get_observation( self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any @@ -409,8 +420,8 @@ def get_observation( if task.num_steps_taken() == 0: self.pointnav_history_aggr = [] - self.real_prev_location = None - self.belief_prev_location = None + self.real_prev_location = [] + self.belief_prev_location = [] fov, camera_horizon, camera_xyz, camera_rotation = self.get_agent_belief_state(env) @@ -435,6 +446,30 @@ def get_observation( # cv2.imwrite('/Users/kianae/Desktop/mask.png', mask.squeeze().numpy() * 255) # cv2.imwrite('/Users/kianae/Desktop/arm_mask.png', arm_mask * 255) # # ForkedPdb().set_trace() #TODO remove + + # if len(self.belief_prev_location) > 190: + # import matplotlib + # matplotlib.use('TkAgg') + # import matplotlib.pyplot as plt + # fig = plt.figure() + # ax = fig.add_subplot(projection='3d') + # def draw_points(locations, color): + # xs = [x[0] for x in locations] + # ys = [x[1] for x in locations] + # zs = [x[2] for x in locations] + # ax.plot(xs, zs, ys, marker='o' if color=='g' else 'x', color=color) + + # def draw(locations, color): + # xs = [x['camera_xyz'][0] for x in locations] + # ys = [x['camera_xyz'][1] for x in locations] + # zs = [x['camera_xyz'][2] for x in locations] + # ax.plot(xs, zs, ys, marker='o' if color=='g' else 'x', color=color) + + # draw_points(self.real_prev_location, 'g'); draw_points(self.belief_prev_location, 'b'); plt.show() + # ForkedPdb().set_trace() + + + result = self.history_aggregation(camera_xyz, camera_rotation, arm_agent_coord, task.num_steps_taken()) return result diff --git a/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py index 3c31d28..b065155 100644 --- a/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py +++ b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py @@ -106,17 +106,17 @@ def __init__(self): self.REWARD_CONFIG['object_found'] = 1 # is this too big? self.ENV_ARGS['visibilityDistance'] = self.distance_thr - # self.ENV_ARGS['motion_noise_type'] = 'habitat' - # self.ENV_ARGS['motion_noise_args'] = dict() - # self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] - # self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0,0,0,0,0,0,0] - # self.ENV_ARGS['motion_noise_args']['effect_scale'] = 1 - - self.ENV_ARGS['motion_noise_type'] = 'simple1d' + self.ENV_ARGS['motion_noise_type'] = 'habitat' self.ENV_ARGS['motion_noise_args'] = dict() - self.ENV_ARGS['motion_noise_args']['ahead_noise_meta_dist_params'] = {'bias_dist': [0,0.02], 'variance_dist': [0,0.5]} - self.ENV_ARGS['motion_noise_args']['lateral_noise_meta_dist_params'] = {'bias_dist': [0,0.02], 'variance_dist': [0,0.02]} - self.ENV_ARGS['motion_noise_args']['turning_noise_meta_dist_params'] = {'bias_dist': [0,5], 'variance_dist': [0,5]} + self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] + self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01] + self.ENV_ARGS['motion_noise_args']['effect_scale'] = .25 + + # self.ENV_ARGS['motion_noise_type'] = 'simple1d' + # self.ENV_ARGS['motion_noise_args'] = dict() + # self.ENV_ARGS['motion_noise_args']['ahead_noise_meta_dist_params'] = {'bias_dist': [0,0.02], 'variance_dist': [0,0.02]} + # self.ENV_ARGS['motion_noise_args']['lateral_noise_meta_dist_params'] = {'bias_dist': [0,0.02], 'variance_dist': [0,0.02]} + # self.ENV_ARGS['motion_noise_args']['turning_noise_meta_dist_params'] = {'bias_dist': [0,2], 'variance_dist': [0,2]} @classmethod From 9ff402b169f4534aa295be78f1d885ff86113330 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Mon, 2 May 2022 10:17:33 -0700 Subject: [PATCH 05/53] cleanup --- ithor_arm/ithor_arm_environment.py | 5 +++-- ithor_arm/ithor_arm_noise_models.py | 9 ++++++++- ithor_arm/near_deadline_sensors.py | 18 ------------------ ...u_w_noise_0_4_agent_motion_noise_noreset.py | 8 ++++---- 4 files changed, 15 insertions(+), 25 deletions(-) diff --git a/ithor_arm/ithor_arm_environment.py b/ithor_arm/ithor_arm_environment.py index f311153..3a0c493 100644 --- a/ithor_arm/ithor_arm_environment.py +++ b/ithor_arm/ithor_arm_environment.py @@ -130,7 +130,7 @@ def __init__( print('Unrecognized motion noise model type') ForkedPdb().set_trace() else: - self.noise_model = NoiseInMotionSimple1DNormal() # without args initializes as trivial/zero-noise + self.noise_model = NoiseInMotionHabitatFlavor(effect_scale=0.0) # un-noise model self.ahead_nominal = 0.2 @@ -355,7 +355,6 @@ def update_nominal_location(self, action_dict): new_loc["z"] = action_dict['z'] new_loc["rotation"] = action_dict['rotation']['y'] new_loc["horizon"] = action_dict['horizon'] - # print('Agent teleported, nominal location reset to teleport destination') self.nominal_agent_location = new_loc @@ -528,6 +527,8 @@ def step( sr = self.controller.step(action_dict) self.list_of_actions_so_far.append(action_dict) + # RH: Nominal location only updates for successful actions. Note that that drift + # action might succeed even if the "main" action fails if sr.metadata["lastActionSuccess"]: self.update_nominal_location(original_action_dict) diff --git a/ithor_arm/ithor_arm_noise_models.py b/ithor_arm/ithor_arm_noise_models.py index e25b510..a07f044 100644 --- a/ithor_arm/ithor_arm_noise_models.py +++ b/ithor_arm/ithor_arm_noise_models.py @@ -49,13 +49,20 @@ def reset_noise_model(self): def get_ahead_drift(self,*_): # returns [ahead change, left/right change, rot change] from an ahead command in [m,m,deg] + + if self.effect_scale == 0: #speedup for trivial case + return [0,0,0] + rotation_drift = self.effect_scale * self.ahead.rotation.sample() linear_drift = self.effect_scale * self.ahead.linear.sample() - return [linear_drift[0], linear_drift[1], rotation_drift[0] * 180 / np.pi] def get_rotate_drift(self): # returns [ahead change, left/right change, rot change] from an ahead command in [m,m,deg] + + if self.effect_scale == 0: #speedup for trivial case + return [0,0,0] + rotation_drift = self.effect_scale * self.rotate.rotation.sample() linear_drift = self.effect_scale * self.rotate.linear.sample() diff --git a/ithor_arm/near_deadline_sensors.py b/ithor_arm/near_deadline_sensors.py index b7b912d..68b4c35 100644 --- a/ithor_arm/near_deadline_sensors.py +++ b/ithor_arm/near_deadline_sensors.py @@ -394,18 +394,15 @@ def get_agent_belief_state(self,env): fov=max(KINECT_FOV_W, KINECT_FOV_H)#TODO are you sure? it should be smaller one I think belief_agent_state = env.nominal_agent_location real_agent_state = env.get_agent_location() - # ForkedPdb().set_trace() belief_camera_horizon = 45 belief_camera_xyz = np.array([belief_agent_state[k] for k in ['x','y','z']]) belief_camera_rotation = (belief_agent_state['rotation'] + 90) % 360 real_camera_xyz = np.array([real_agent_state[k] for k in ['x','y','z']]) - # ForkedPdb().set_trace() self.belief_prev_location.append(belief_camera_xyz) self.real_prev_location.append(real_camera_xyz) - # ForkedPdb().set_trace() return fov, belief_camera_horizon, belief_camera_xyz, belief_camera_rotation @@ -432,21 +429,6 @@ def get_observation( midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken())) - # arm_mask = self.arm_mask_sensor.get_observation(env, task, *args, **kwargs) #TODO this is also called twice - # if arm_mask.sum() == 0: #Do we want to do some approximations or no? - # arm_world_coord = None #TODO approax for this - # else: - # arm_world_coord = get_mid_point_of_object_from_depth_and_mask(arm_mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) - # # distance_in_agent_coord = midpoint_agent_coord - arm_location_in_camera - # # result = distance_in_agent_coord.cpu() - # if arm_mask.sum() != 0 or mask.sum() != 0: - - # import cv2 - # cv2.imwrite('/Users/kianae/Desktop/image.png', env.kinect_frame[:,:,::-1]) - # cv2.imwrite('/Users/kianae/Desktop/mask.png', mask.squeeze().numpy() * 255) - # cv2.imwrite('/Users/kianae/Desktop/arm_mask.png', arm_mask * 255) - # # ForkedPdb().set_trace() #TODO remove - # if len(self.belief_prev_location) > 190: # import matplotlib # matplotlib.use('TkAgg') diff --git a/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py index b065155..b27fcf0 100644 --- a/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py +++ b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py @@ -26,7 +26,7 @@ from manipulathor_baselines.bring_object_baselines.models.pointnav_emulator_model import RGBDModelWPointNavEmulator -class PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoReset( +class PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoResetDebugged( BringObjectiThorBaseConfig, BringObjectMixInPPOConfig, BringObjectMixInSimpleGRUConfig, @@ -78,7 +78,7 @@ class PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoReset( def train_task_sampler_args(self, **kwargs): - sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoReset, self).train_task_sampler_args(**kwargs) + sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoResetDebugged, self).train_task_sampler_args(**kwargs) if platform.system() == "Darwin": pass else: @@ -89,7 +89,7 @@ def train_task_sampler_args(self, **kwargs): return sampler_args def test_task_sampler_args(self, **kwargs): - sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoReset, self).test_task_sampler_args(**kwargs) + sampler_args = super(PointNavNewModelAndHandWAgentLocationAndMotionNoiseNoResetDebugged, self).test_task_sampler_args(**kwargs) if platform.system() == "Darwin": pass else: @@ -110,7 +110,7 @@ def __init__(self): self.ENV_ARGS['motion_noise_args'] = dict() self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01] - self.ENV_ARGS['motion_noise_args']['effect_scale'] = .25 + self.ENV_ARGS['motion_noise_args']['effect_scale'] = .5 # self.ENV_ARGS['motion_noise_type'] = 'simple1d' # self.ENV_ARGS['motion_noise_args'] = dict() From 23f90dc2ee87d45cff25b3c1e6bb5713a117c5db Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Mon, 2 May 2022 16:34:17 -0700 Subject: [PATCH 06/53] procthor branch env updates --- dev_requirements.txt | 186 ++++++++++++++++++ ..._w_noise_0_4_agent_motion_noise_noreset.py | 2 +- 2 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 dev_requirements.txt diff --git a/dev_requirements.txt b/dev_requirements.txt new file mode 100644 index 0000000..d8a701d --- /dev/null +++ b/dev_requirements.txt @@ -0,0 +1,186 @@ +absl-py==0.12.0 +addict==2.4.0 +# ai2thor==0+dc0f9ecd8672dc2d62651f567ff95c63f3542332 +aiohttp==3.8.1 +aiosignal==1.2.0 +allenact==0.4.0 +allenact-plugins==0.4.0 +antlr4-python3-runtime==4.8 +appdirs==1.4.4 +appnope==0.1.2 +argon2-cffi==20.1.0 +astunparse==1.6.3 +async-generator==1.10 +async-timeout==4.0.2 +asynctest==0.13.0 +attrs==20.3.0 +aws-requests-auth==0.4.3 +backcall==0.2.0 +black==21.4b2 +bleach==3.3.0 +botocore==1.20.67 +cachetools==4.2.2 +certifi==2020.12.5 +cffi==1.14.5 +chardet==4.0.0 +charset-normalizer==2.0.12 +click==7.1.2 +clip @ git+https://github.com/openai/CLIP.git@04f4dc2ca1ed0acc9893bd1a3b526a7e02c4bb10 +cloudpickle==1.6.0 +cmake==3.18.4.post1 +colour==0.1.5 +cycler==0.10.0 +Cython==0.29.23 +dataclasses>0.5 +datasets==1.18.4 +decorator==4.4.2 +defusedxml==0.7.1 +# detectron2 @ git+https://github.com/facebookresearch/detectron2.git@d285dea0a58675217b061f48182dc031d2e3b677 +dill==0.3.4 +entrypoints==0.3 +filelock==3.0.12 +Flask==1.1.2 +flatbuffers==1.12 +frozenlist==1.2.0 +fsspec==2022.1.0 +ftfy==6.0.3 +future==0.18.2 +fvcore==0.1.5.post20210609 +gast==0.3.3 +gin==0.1.6 +gin-config==0.4.0 +google-auth==1.30.0 +google-auth-oauthlib==0.4.4 +google-pasta==0.2.0 +grpcio==1.32.0 +gym==0.17.3 +h5py==2.10.0 +huggingface-hub==0.4.0 +hydra-core==1.1.0 +idna==2.10 +idna-ssl==1.1.0 +imageio==2.9.0 +imageio-ffmpeg==0.4.3 +importlib-metadata==4.0.1 +importlib-resources==5.1.4 +iopath==0.1.8 +ipykernel==5.5.4 +ipython==7.16.1 +ipython-genutils==0.2.0 +ipywidgets==7.6.3 +itsdangerous==1.1.0 +jedi==0.17.0 +Jinja2==2.11.3 +jmespath==0.10.0 +joblib==1.1.0 +jsonschema==3.2.0 +jupyter==1.0.0 +jupyter-client==6.1.12 +jupyter-console==6.4.0 +jupyter-core==4.7.1 +jupyterlab-pygments==0.1.2 +jupyterlab-widgets==1.0.0 +Keras-Preprocessing==1.1.2 +kiwisolver==1.3.1 +llvmlite==0.36.0 +Markdown==3.3.4 +MarkupSafe==1.1.1 +matplotlib==3.3.4 +mistune==0.8.4 +moviepy==1.0.3 +msgpack==1.0.2 +multidict==5.2.0 +multiprocess==0.70.12.2 +mypy-extensions==0.4.3 +nbclient==0.5.3 +nbconvert==6.0.7 +nbformat==5.1.3 +nest-asyncio==1.5.1 +networkx==2.5.1 +notebook==6.3.0 +numba==0.53.1 +numpy==1.19.5 +numpy-quaternion==2021.4.5.14.42.35 +oauthlib==3.1.0 +omegaconf==2.1.0 +open3d==0.13.0 +opencv-contrib-python==4.5.5.64 +opt-einsum==3.3.0 +packaging==20.9 +pandas==1.1.5 +pandocfilters==1.4.3 +parso==0.8.2 +pathspec==0.8.1 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow==8.2.0 +portalocker==2.3.0 +proglog==0.1.9 +progressbar2==3.53.1 +prometheus-client==0.10.1 +prompt-toolkit==3.0.18 +protobuf==3.15.8 +ptyprocess==0.7.0 +pyarrow==6.0.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycocotools==2.0.2 +pycparser==2.20 +pydot==1.4.2 +pyglet==1.5.0 +Pygments==2.9.0 +pyparsing==2.4.7 +pyquaternion==0.9.9 +pyrsistent==0.17.3 +python-dateutil==2.8.1 +python-utils==2.5.6 +python-xlib==0.29 +pytz==2021.3 +PyWavelets==1.1.1 +PyYAML==5.4.1 +pyzmq==22.0.3 +qtconsole==5.1.0 +QtPy==1.9.0 +regex==2021.4.4 +requests==2.25.1 +requests-oauthlib==1.3.0 +responses==0.17.0 +rsa==4.7.2 +scikit-image==0.17.2 +scikit-learn==0.24.2 +scipy==1.5.4 +Send2Trash==1.5.0 +setproctitle==1.2.2 +Shapely==1.8.1.post1 +six==1.15.0 +tabulate==0.8.9 +tensorboard==2.5.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorboardX==2.2 +tensorflow==2.4.1 +tensorflow-estimator==2.4.0 +termcolor==1.1.0 +terminado==0.9.4 +testpath==0.4.4 +threadpoolctl==3.0.0 +tifffile==2020.9.3 +toml==0.10.2 +torch==1.8.1 +torchaudio==0.8.1 +torchvision==0.9.1 +tornado==6.1 +tqdm==4.63.0 +traitlets==4.3.3 +typed-ast==1.4.3 +typing-extensions==3.7.4.3 +urllib3==1.26.4 +wcwidth==0.2.5 +webencodings==0.5.1 +Werkzeug==1.0.1 +widgetsnbextension==3.5.1 +wrapt==1.12.1 +xxhash==3.0.0 +yacs==0.1.8 +yarl==1.7.2 +zipp==3.4.1 diff --git a/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py index b27fcf0..dfb36f2 100644 --- a/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py +++ b/manipulathor_baselines/bring_object_baselines/experiments/ithor/pointnav_complex_reward_no_pu_w_noise_0_4_agent_motion_noise_noreset.py @@ -9,7 +9,7 @@ from ithor_arm.bring_object_sensors import CategorySampleSensor, NoisyObjectMask, NoGripperRGBSensorThor, CategoryFeatureSampleSensor from ithor_arm.bring_object_task_samplers import DiverseBringObjectTaskSampler from ithor_arm.bring_object_tasks import WPickUPExploreBringObjectTask, ExploreWiseRewardTask -from ithor_arm.ithor_arm_constants import ENV_ARGS, TRAIN_OBJECTS, TEST_OBJECTS +from ithor_arm.ithor_arm_constants import MANIPULATHOR_ENV_ARGS, TRAIN_OBJECTS, TEST_OBJECTS from ithor_arm.ithor_arm_sensors import ( InitialAgentArmToObjectSensor, InitialObjectToGoalSensor, From 97eb7e2b46bf8b82732cf0da2654f5c8ae77e094 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 3 May 2022 10:59:33 -0700 Subject: [PATCH 07/53] add noise to stretch env --- ithor_arm/ithor_arm_environment.py | 9 +- .../stretch_ithor_arm_environment.py | 113 ++++++++++++++++-- 2 files changed, 108 insertions(+), 14 deletions(-) diff --git a/ithor_arm/ithor_arm_environment.py b/ithor_arm/ithor_arm_environment.py index 3a0c493..099be65 100644 --- a/ithor_arm/ithor_arm_environment.py +++ b/ithor_arm/ithor_arm_environment.py @@ -133,6 +133,7 @@ def __init__( self.noise_model = NoiseInMotionHabitatFlavor(effect_scale=0.0) # un-noise model self.ahead_nominal = 0.2 + self.rotate_nominal = 45 self.start(None) @@ -343,9 +344,9 @@ def update_nominal_location(self, action_dict): new_loc = copy.deepcopy(curr_loc) if action_dict['action'] is 'RotateLeft': - new_loc["rotation"] = (new_loc["rotation"] - 45) % 360 + new_loc["rotation"] = (new_loc["rotation"] - self.rotate_nominal) % 360 elif action_dict['action'] is 'RotateRight': - new_loc["rotation"] = (new_loc["rotation"] + 45) % 360 + new_loc["rotation"] = (new_loc["rotation"] + self.rotate_nominal) % 360 elif action_dict['action'] is 'MoveAhead': new_loc["x"] += self.ahead_nominal * np.sin(new_loc["rotation"] * np.pi / 180) new_loc["z"] += self.ahead_nominal * np.cos(new_loc["rotation"] * np.pi / 180) @@ -479,7 +480,7 @@ def step( action_dict = dict() action_dict["action"] = "RotateAgent" - action_dict["degrees"] = 45 + noise[2] + action_dict["degrees"] = noise[2] + self.rotate_nominal elif action in [ROTATE_LEFT]: noise = self.noise_model.get_rotate_drift() @@ -490,7 +491,7 @@ def step( action_dict = dict() action_dict["action"] = "RotateAgent" - action_dict["degrees"] = -45 + noise[2] + action_dict["degrees"] = noise[2] - self.rotate_nominal elif "MoveArm" in action: copy_aditions = copy.deepcopy(ADITIONAL_ARM_ARGS) diff --git a/utils/stretch_utils/stretch_ithor_arm_environment.py b/utils/stretch_utils/stretch_ithor_arm_environment.py index 8ff6bfa..19b2d2e 100644 --- a/utils/stretch_utils/stretch_ithor_arm_environment.py +++ b/utils/stretch_utils/stretch_ithor_arm_environment.py @@ -14,6 +14,9 @@ from torch.distributions.utils import lazy_property from ithor_arm.ithor_arm_environment import ManipulaTHOREnvironment +from ithor_arm.ithor_arm_noise_models import NoiseInMotionHabitatFlavor, NoiseInMotionSimple1DNormal +from ithor_arm.arm_calculation_utils import convert_world_to_agent_coordinate + from utils.stretch_utils.stretch_constants import ( ADITIONAL_ARM_ARGS, PICKUP, DONE, MOVE_AHEAD, ROTATE_RIGHT, ROTATE_LEFT, MOVE_BACK, MOVE_ARM_HEIGHT_P, MOVE_ARM_HEIGHT_M, MOVE_ARM_Z_P, MOVE_ARM_Z_M, MOVE_WRIST_P, MOVE_WRIST_M, MOVE_WRIST_P_SMALL, MOVE_WRIST_M_SMALL, ROTATE_LEFT_SMALL, ROTATE_RIGHT_SMALL, @@ -111,6 +114,20 @@ def __init__( self._always_return_visible_range = False self.simplify_physics = simplify_physics + if 'motion_noise_type' in env_args.keys(): + if env_args['motion_noise_type'] == 'habitat': + self.noise_model = NoiseInMotionHabitatFlavor(**env_args['motion_noise_args']) + elif env_args['motion_noise_type'] == 'simple1d': + self.noise_model = NoiseInMotionSimple1DNormal(**env_args['motion_noise_args']) + else: + print('Unrecognized motion noise model type') + ForkedPdb().set_trace() + else: + self.noise_model = NoiseInMotionHabitatFlavor(effect_scale=0.0) # un-noise model + + self.ahead_nominal = AGENT_MOVEMENT_CONSTANT + self.rotate_nominal = AGENT_ROTATION_DEG + # self.start(None) @@ -212,6 +229,9 @@ def reset( self.list_of_actions_so_far = [] + self.noise_model.reset_noise_model() + self.nominal_agent_location = self.get_agent_location() + def check_controller_version(self, controller=None): controller_to_check = controller @@ -288,6 +308,35 @@ def get_pickupable_objects(self): object_list = event.metadata["arm"]["pickupableObjects"] return object_list + + def update_nominal_location(self, action_dict): + # location = { + # "x": metadata["agent"]["position"]["x"], + # "y": metadata["agent"]["position"]["y"], + # "z": metadata["agent"]["position"]["z"], + # "rotation": metadata["agent"]["rotation"]["y"], + # "horizon": metadata["agent"]["cameraHorizon"], + # "standing": metadata.get("isStanding", metadata["agent"].get("isStanding")), + # } + + curr_loc = self.nominal_agent_location + new_loc = copy.deepcopy(curr_loc) + + if action_dict['action'] is 'RotateLeft': + new_loc["rotation"] = (new_loc["rotation"] - self.rotate_nominal) % 360 + elif action_dict['action'] is 'RotateRight': + new_loc["rotation"] = (new_loc["rotation"] + self.rotate_nominal) % 360 + elif action_dict['action'] is 'MoveAhead': + new_loc["x"] += self.ahead_nominal * np.sin(new_loc["rotation"] * np.pi / 180) + new_loc["z"] += self.ahead_nominal * np.cos(new_loc["rotation"] * np.pi / 180) + elif action_dict['action'] is 'TeleportFull': + new_loc["x"] = action_dict['x'] + new_loc["y"] = action_dict['y'] + new_loc["z"] = action_dict['z'] + new_loc["rotation"] = action_dict['rotation']['y'] + new_loc["horizon"] = action_dict['horizon'] + + self.nominal_agent_location = new_loc def step( self, action_dict: Dict[str, Union[str, int, float]] @@ -295,6 +344,7 @@ def step( """Take a step in the ai2thor environment.""" action = typing.cast(str, action_dict["action"]) + original_action_dict = copy.deepcopy(action_dict) skip_render = "renderImage" in action_dict and not action_dict["renderImage"] last_frame: Optional[np.ndarray] = None @@ -325,23 +375,61 @@ def step( action_dict = { 'action': 'Pass' } # we have to change the last action success if the pik up fails, we do that in the task now - - elif action in [MOVE_AHEAD, MOVE_BACK, ROTATE_LEFT, ROTATE_RIGHT]: + + elif action in [MOVE_AHEAD, MOVE_BACK, ROTATE_RIGHT, ROTATE_LEFT]: copy_aditions = copy.deepcopy(ADITIONAL_ARM_ARGS) + + # RH: order matters, nominal action happens last action_dict = {**action_dict, **copy_aditions} - if action == MOVE_AHEAD: + if action in [MOVE_AHEAD]: + noise = self.noise_model.get_ahead_drift(self.ahead_nominal) + + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = noise[2] + sr = self.controller.step(action_dict) + + action_dict = dict() action_dict["action"] = "MoveAgent" - action_dict["ahead"] = AGENT_MOVEMENT_CONSTANT - elif action == MOVE_BACK: + action_dict["ahead"] = noise[0] + self.ahead_nominal + action_dict["right"] = noise[1] + + elif action in [MOVE_BACK]: + noise = self.noise_model.get_ahead_drift(self.ahead_nominal) + # TODO revisit - make sense to sample the same for ahead and back? + # inclination is effect matters less than currently unmodeled hysteresis effects + + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = noise[2] + sr = self.controller.step(action_dict) + + action_dict = dict() + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = noise[0] - self.ahead_nominal + action_dict["right"] = noise[1] + + + elif action in [ROTATE_RIGHT]: + noise = self.noise_model.get_rotate_drift() action_dict["action"] = "MoveAgent" - action_dict["ahead"] = -AGENT_MOVEMENT_CONSTANT - elif action == ROTATE_RIGHT: + action_dict["ahead"] = noise[0] + action_dict["right"] = noise[1] + sr = self.controller.step(action_dict) + + action_dict = dict() action_dict["action"] = "RotateAgent" - action_dict["degrees"] = AGENT_ROTATION_DEG + action_dict["degrees"] = noise[2] + self.rotate_nominal + + elif action in [ROTATE_LEFT]: + noise = self.noise_model.get_rotate_drift() + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = noise[0] + action_dict["right"] = noise[1] + sr = self.controller.step(action_dict) - elif action == ROTATE_LEFT: + action_dict = dict() action_dict["action"] = "RotateAgent" - action_dict["degrees"] = -AGENT_ROTATION_DEG + action_dict["degrees"] = noise[2] - self.rotate_nominal + elif action in [MOVE_ARM_HEIGHT_P,MOVE_ARM_HEIGHT_M,MOVE_ARM_Z_P,MOVE_ARM_Z_M,]: base_position = get_relative_stretch_current_arm_state(self.controller) change_value = ARM_MOVE_CONSTANT @@ -374,6 +462,11 @@ def step( sr = self.controller.step(action_dict) self.list_of_actions_so_far.append(action_dict) + # RH: Nominal location only updates for successful actions. Note that that drift + # action might succeed even if the "main" action fails + if sr.metadata["lastActionSuccess"]: + self.update_nominal_location(original_action_dict) + if self._verbose: print(self.controller.last_event) From c082a91f3ac1a4551f66bc5a710f9e4a1c84559c Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 3 May 2022 11:20:57 -0700 Subject: [PATCH 08/53] testing noisy motion, same sensors --- ...nav_emul_stretch_all_rooms_noisy_motion.py | 158 ++++++++++++++++++ .../stretch_ithor_arm_environment.py | 35 +++- 2 files changed, 185 insertions(+), 8 deletions(-) create mode 100644 manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion.py diff --git a/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion.py b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion.py new file mode 100644 index 0000000..6170c31 --- /dev/null +++ b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion.py @@ -0,0 +1,158 @@ +import platform +import random + +import gym +import torch +from torch import nn + + +from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_ithor_base import \ + StretchBringObjectiThorBaseConfig +from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_mixin_ddppo import \ + StretchBringObjectMixInPPOConfig +from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_mixin_simplegru import \ + StretchBringObjectMixInSimpleGRUConfig +from manipulathor_baselines.stretch_bring_object_baselines.models.stretch_pointnav_emul_model import StretchPointNavEmulModel +from manipulathor_baselines.stretch_bring_object_baselines.models.stretch_real_pointnav_model import StretchRealPointNavModel +from manipulathor_utils.debugger_util import ForkedPdb +from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, KITCHEN_TRAIN, LIVING_ROOM_TRAIN, \ + BEDROOM_TRAIN, ROBOTHOR_TRAIN, ROBOTHOR_VAL, BATHROOM_TEST, BATHROOM_TRAIN, BEDROOM_TEST, LIVING_ROOM_TEST, \ + KITCHEN_TEST +from utils.stretch_utils.stretch_bring_object_task_samplers import StretchDiverseBringObjectTaskSampler +from utils.stretch_utils.stretch_bring_object_tasks import StretchExploreWiseRewardTask, \ + StretchExploreWiseRewardTaskOnlyPickUp, StretchObjectNavTask +from utils.stretch_utils.stretch_constants import STRETCH_ENV_ARGS, STRETCH_MANIPULATHOR_COMMIT_ID, INTEL_CAMERA_WIDTH +from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchIntel, DepthSensorStretchIntel, \ + RGBSensorStretchKinect, DepthSensorStretchKinect, AgentBodyPointNavSensor, AgentBodyPointNavEmulSensor, \ + RGBSensorStretchKinectZero, \ + DepthSensorStretchKinectZero, IntelRawDepthSensor, ArmPointNavEmulSensor, KinectRawDepthSensor, \ + KinectNoisyObjectMask, IntelNoisyObjectMask, StretchPickedUpObjSensor +from utils.stretch_utils.stretch_visualizer import StretchBringObjImageVisualizer + + +class PointNavEmulStretchAllRooms( + StretchBringObjectiThorBaseConfig, + StretchBringObjectMixInPPOConfig, + StretchBringObjectMixInSimpleGRUConfig, +): + """An Object Navigation experiment configuration in iThor with RGB + input.""" + desired_screen_size = INTEL_CAMERA_WIDTH + NOISE_LEVEL = 0 + distance_thr = 1.5 # is this a good number? + + source_mask_sensor_intel = IntelNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='source', distance_thr=distance_thr, only_close_big_masks=True) + destination_mask_sensor_intel = IntelNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='destination', distance_thr=distance_thr, only_close_big_masks=True) + depth_sensor_intel = IntelRawDepthSensor() + + source_mask_sensor_kinect = KinectNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='source', distance_thr=distance_thr, only_close_big_masks=True) + destination_mask_sensor_kinect = KinectNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='destination', distance_thr=distance_thr, only_close_big_masks=True) + depth_sensor_kinect = KinectRawDepthSensor() + + + SENSORS = [ + RGBSensorStretchIntel( + height=desired_screen_size, + width=desired_screen_size, + use_resnet_normalization=True, + uuid="rgb_lowres", + ), + DepthSensorStretchIntel(height=desired_screen_size,width=desired_screen_size,use_normalization=True,uuid="depth_lowres",), + RGBSensorStretchKinect( + height=desired_screen_size, + width=desired_screen_size, + use_resnet_normalization=True, + uuid="rgb_lowres_arm", + ), + DepthSensorStretchKinect( + height=desired_screen_size, + width=desired_screen_size, + use_normalization=True, + uuid="depth_lowres_arm", + ), + StretchPickedUpObjSensor(), + AgentBodyPointNavEmulSensor(type='source', mask_sensor=source_mask_sensor_intel, depth_sensor=depth_sensor_intel), + AgentBodyPointNavEmulSensor(type='destination', mask_sensor=destination_mask_sensor_intel, depth_sensor=depth_sensor_intel), + ArmPointNavEmulSensor(type='source', mask_sensor=source_mask_sensor_kinect, depth_sensor=depth_sensor_kinect), + ArmPointNavEmulSensor(type='destination', mask_sensor=destination_mask_sensor_kinect, depth_sensor=depth_sensor_kinect), + source_mask_sensor_intel, + destination_mask_sensor_intel, + source_mask_sensor_kinect, + destination_mask_sensor_kinect, + + ] + + MAX_STEPS = 200 + + TASK_SAMPLER = StretchDiverseBringObjectTaskSampler + TASK_TYPE = StretchExploreWiseRewardTask + + NUM_PROCESSES = 20 + + + TRAIN_SCENES = KITCHEN_TRAIN + LIVING_ROOM_TRAIN + BEDROOM_TRAIN + BATHROOM_TRAIN + TEST_SCENES = KITCHEN_TEST + LIVING_ROOM_TEST + BEDROOM_TEST + BATHROOM_TEST + OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ != 'robothor'])) + + + random.shuffle(TRAIN_SCENES) + + def __init__(self): + super().__init__() + self.REWARD_CONFIG['exploration_reward'] = 0.1 + self.REWARD_CONFIG['object_found'] = 1 + # self.ENV_ARGS = STRETCH_ENV_ARGS + self.ENV_ARGS['visibilityDistance'] = self.distance_thr + self.ENV_ARGS['commit_id'] = STRETCH_MANIPULATHOR_COMMIT_ID + self.ENV_ARGS['renderInstanceSegmentation'] = True + + self.ENV_ARGS['motion_noise_type'] = 'habitat' + self.ENV_ARGS['motion_noise_args'] = dict() + self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] + self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01] + self.ENV_ARGS['motion_noise_args']['effect_scale'] = .25 + + def test_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavEmulStretchAllRooms, self).test_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, AgentBodyPointNavEmulSensor): + sensor_type.device = torch.device(kwargs["devices"][0]) + + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, ArmPointNavEmulSensor): + sensor_type.device = torch.device(kwargs["devices"][0]) + + return sampler_args + + def train_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavEmulStretchAllRooms, self).train_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, AgentBodyPointNavEmulSensor): + sensor_type.device = torch.device(kwargs["devices"][0]) + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, ArmPointNavEmulSensor): + sensor_type.device = torch.device(kwargs["devices"][0]) + return sampler_args + + + @classmethod + def create_model(cls, **kwargs) -> nn.Module: + return StretchPointNavEmulModel( + action_space=gym.spaces.Discrete( + len(cls.TASK_TYPE.class_action_names()) + ), + observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, + hidden_size=512, + visualize=cls.VISUALIZE + ) + + @classmethod + def tag(cls): + return cls.__name__ diff --git a/utils/stretch_utils/stretch_ithor_arm_environment.py b/utils/stretch_utils/stretch_ithor_arm_environment.py index 19b2d2e..99d72e7 100644 --- a/utils/stretch_utils/stretch_ithor_arm_environment.py +++ b/utils/stretch_utils/stretch_ithor_arm_environment.py @@ -376,7 +376,7 @@ def step( 'action': 'Pass' } # we have to change the last action success if the pik up fails, we do that in the task now - elif action in [MOVE_AHEAD, MOVE_BACK, ROTATE_RIGHT, ROTATE_LEFT]: + elif action in [MOVE_AHEAD, MOVE_BACK, ROTATE_RIGHT, ROTATE_LEFT, ROTATE_RIGHT_SMALL, ROTATE_LEFT_SMALL]: copy_aditions = copy.deepcopy(ADITIONAL_ARM_ARGS) # RH: order matters, nominal action happens last @@ -429,6 +429,31 @@ def step( action_dict = dict() action_dict["action"] = "RotateAgent" action_dict["degrees"] = noise[2] - self.rotate_nominal + + elif action in [ROTATE_RIGHT_SMALL]: + # RH: Not scaling noise is deliberate. Small actions are harder to be accurate + noise = self.noise_model.get_rotate_drift() + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = noise[0] + action_dict["right"] = noise[1] + sr = self.controller.step(action_dict) + + action_dict = dict() + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = noise[2] + self.rotate_nominal / 5 + + elif action in [ROTATE_LEFT_SMALL]: + # RH: Not scaling noise is deliberate. Small actions are harder to be accurate + noise = self.noise_model.get_rotate_drift() + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = noise[0] + action_dict["right"] = noise[1] + sr = self.controller.step(action_dict) + + action_dict = dict() + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = noise[2] - self.rotate_nominal / 5 + elif action in [MOVE_ARM_HEIGHT_P,MOVE_ARM_HEIGHT_M,MOVE_ARM_Z_P,MOVE_ARM_Z_M,]: base_position = get_relative_stretch_current_arm_state(self.controller) @@ -451,12 +476,6 @@ def step( action_dict = dict(action='RotateWristRelative', yaw=-WRIST_ROTATION / 5) elif action == MOVE_WRIST_M_SMALL: action_dict = dict(action='RotateWristRelative', yaw=WRIST_ROTATION / 5) - elif action == ROTATE_LEFT_SMALL: - action_dict["action"] = "RotateAgent" - action_dict["degrees"] = -AGENT_ROTATION_DEG / 5 - elif action == ROTATE_RIGHT_SMALL: - action_dict["action"] = "RotateAgent" - action_dict["degrees"] = AGENT_ROTATION_DEG / 5 sr = self.controller.step(action_dict) @@ -466,7 +485,7 @@ def step( # action might succeed even if the "main" action fails if sr.metadata["lastActionSuccess"]: self.update_nominal_location(original_action_dict) - + if self._verbose: print(self.controller.last_event) From 9e031b899cdd8603e43d6657bd450cad7ebbf5fc Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Thu, 5 May 2022 10:55:10 -0700 Subject: [PATCH 09/53] *pointnavemul stretch sensors with dead reckoning implemented for further testing --- ...h_all_rooms_noisy_motion_dead_reckoning.py | 158 +++++++++++ utils/calculation_utils.py | 36 ++- .../stretch_ithor_arm_environment.py | 26 +- utils/stretch_utils/stretch_thor_sensors.py | 255 +++++++++++++++++- 4 files changed, 462 insertions(+), 13 deletions(-) create mode 100644 manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py diff --git a/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py new file mode 100644 index 0000000..39abd4c --- /dev/null +++ b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py @@ -0,0 +1,158 @@ +import platform +import random + +import gym +import torch +from torch import nn + + +from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_ithor_base import \ + StretchBringObjectiThorBaseConfig +from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_mixin_ddppo import \ + StretchBringObjectMixInPPOConfig +from manipulathor_baselines.stretch_bring_object_baselines.experiments.stretch_bring_object_mixin_simplegru import \ + StretchBringObjectMixInSimpleGRUConfig +from manipulathor_baselines.stretch_bring_object_baselines.models.stretch_pointnav_emul_model import StretchPointNavEmulModel +from manipulathor_baselines.stretch_bring_object_baselines.models.stretch_real_pointnav_model import StretchRealPointNavModel +from manipulathor_utils.debugger_util import ForkedPdb +from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, KITCHEN_TRAIN, LIVING_ROOM_TRAIN, \ + BEDROOM_TRAIN, ROBOTHOR_TRAIN, ROBOTHOR_VAL, BATHROOM_TEST, BATHROOM_TRAIN, BEDROOM_TEST, LIVING_ROOM_TEST, \ + KITCHEN_TEST +from utils.stretch_utils.stretch_bring_object_task_samplers import StretchDiverseBringObjectTaskSampler +from utils.stretch_utils.stretch_bring_object_tasks import StretchExploreWiseRewardTask, \ + StretchExploreWiseRewardTaskOnlyPickUp, StretchObjectNavTask +from utils.stretch_utils.stretch_constants import STRETCH_ENV_ARGS, STRETCH_MANIPULATHOR_COMMIT_ID, INTEL_CAMERA_WIDTH +from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchIntel, DepthSensorStretchIntel, \ + RGBSensorStretchKinect, DepthSensorStretchKinect, AgentBodyPointNavSensor, AgentBodyPointNavEmulSensor, \ + RGBSensorStretchKinectZero, AgentBodyPointNavEmulSensorDeadReckoning, ArmPointNavEmulSensorDeadReckoning, \ + DepthSensorStretchKinectZero, IntelRawDepthSensor, ArmPointNavEmulSensor, KinectRawDepthSensor, \ + KinectNoisyObjectMask, IntelNoisyObjectMask, StretchPickedUpObjSensor +from utils.stretch_utils.stretch_visualizer import StretchBringObjImageVisualizer + + +class PointNavEmulStretchAllRooms( + StretchBringObjectiThorBaseConfig, + StretchBringObjectMixInPPOConfig, + StretchBringObjectMixInSimpleGRUConfig, +): + """An Object Navigation experiment configuration in iThor with RGB + input.""" + desired_screen_size = INTEL_CAMERA_WIDTH + NOISE_LEVEL = 0 + distance_thr = 1.5 # is this a good number? + + source_mask_sensor_intel = IntelNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='source', distance_thr=distance_thr, only_close_big_masks=True) + destination_mask_sensor_intel = IntelNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='destination', distance_thr=distance_thr, only_close_big_masks=True) + depth_sensor_intel = IntelRawDepthSensor() + + source_mask_sensor_kinect = KinectNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='source', distance_thr=distance_thr, only_close_big_masks=True) + destination_mask_sensor_kinect = KinectNoisyObjectMask(height=desired_screen_size, width=desired_screen_size,noise=0, type='destination', distance_thr=distance_thr, only_close_big_masks=True) + depth_sensor_kinect = KinectRawDepthSensor() + + + SENSORS = [ + RGBSensorStretchIntel( + height=desired_screen_size, + width=desired_screen_size, + use_resnet_normalization=True, + uuid="rgb_lowres", + ), + DepthSensorStretchIntel(height=desired_screen_size,width=desired_screen_size,use_normalization=True,uuid="depth_lowres",), + RGBSensorStretchKinect( + height=desired_screen_size, + width=desired_screen_size, + use_resnet_normalization=True, + uuid="rgb_lowres_arm", + ), + DepthSensorStretchKinect( + height=desired_screen_size, + width=desired_screen_size, + use_normalization=True, + uuid="depth_lowres_arm", + ), + StretchPickedUpObjSensor(), + AgentBodyPointNavEmulSensorDeadReckoning(type='source', mask_sensor=source_mask_sensor_intel, depth_sensor=depth_sensor_intel), + AgentBodyPointNavEmulSensorDeadReckoning(type='destination', mask_sensor=destination_mask_sensor_intel, depth_sensor=depth_sensor_intel), + ArmPointNavEmulSensorDeadReckoning(type='source', mask_sensor=source_mask_sensor_kinect, depth_sensor=depth_sensor_kinect), + ArmPointNavEmulSensorDeadReckoning(type='destination', mask_sensor=destination_mask_sensor_kinect, depth_sensor=depth_sensor_kinect), + source_mask_sensor_intel, + destination_mask_sensor_intel, + source_mask_sensor_kinect, + destination_mask_sensor_kinect, + + ] + + MAX_STEPS = 200 + + TASK_SAMPLER = StretchDiverseBringObjectTaskSampler + TASK_TYPE = StretchExploreWiseRewardTask + + NUM_PROCESSES = 20 + + + TRAIN_SCENES = KITCHEN_TRAIN + LIVING_ROOM_TRAIN + BEDROOM_TRAIN + BATHROOM_TRAIN + TEST_SCENES = KITCHEN_TEST + LIVING_ROOM_TEST + BEDROOM_TEST + BATHROOM_TEST + OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ != 'robothor'])) + + + random.shuffle(TRAIN_SCENES) + + def __init__(self): + super().__init__() + self.REWARD_CONFIG['exploration_reward'] = 0.1 + self.REWARD_CONFIG['object_found'] = 1 + # self.ENV_ARGS = STRETCH_ENV_ARGS + self.ENV_ARGS['visibilityDistance'] = self.distance_thr + self.ENV_ARGS['commit_id'] = STRETCH_MANIPULATHOR_COMMIT_ID + self.ENV_ARGS['renderInstanceSegmentation'] = True + + self.ENV_ARGS['motion_noise_type'] = 'habitat' + self.ENV_ARGS['motion_noise_args'] = dict() + self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] + self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01] + self.ENV_ARGS['motion_noise_args']['effect_scale'] = .1 + + def test_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavEmulStretchAllRooms, self).test_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, AgentBodyPointNavEmulSensorDeadReckoning): + sensor_type.device = torch.device(kwargs["devices"][0]) + + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, ArmPointNavEmulSensorDeadReckoning): + sensor_type.device = torch.device(kwargs["devices"][0]) + + return sampler_args + + def train_task_sampler_args(self, **kwargs): + sampler_args = super(PointNavEmulStretchAllRooms, self).train_task_sampler_args(**kwargs) + if platform.system() == "Darwin": + pass + else: + + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, AgentBodyPointNavEmulSensorDeadReckoning): + sensor_type.device = torch.device(kwargs["devices"][0]) + for sensor_type in sampler_args['sensors']: + if isinstance(sensor_type, ArmPointNavEmulSensorDeadReckoning): + sensor_type.device = torch.device(kwargs["devices"][0]) + return sampler_args + + + @classmethod + def create_model(cls, **kwargs) -> nn.Module: + return StretchPointNavEmulModel( + action_space=gym.spaces.Discrete( + len(cls.TASK_TYPE.class_action_names()) + ), + observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, + hidden_size=512, + visualize=cls.VISUALIZE + ) + + @classmethod + def tag(cls): + return cls.__name__ diff --git a/utils/calculation_utils.py b/utils/calculation_utils.py index 7b18731..a5e0696 100644 --- a/utils/calculation_utils.py +++ b/utils/calculation_utils.py @@ -1,7 +1,9 @@ import torch -from allenact.embodiedai.mapping.mapping_utils.point_cloud_utils import depth_frame_to_world_space_xyz +from allenact.embodiedai.mapping.mapping_utils.point_cloud_utils import depth_frame_to_world_space_xyz, camera_space_xyz_to_world_xyz +from utils.noise_in_motion_util import squeeze_bool_mask import numpy as np + def position_distance(s1, s2): position1 = s1["position"] position2 = s2["position"] @@ -27,4 +29,34 @@ def calc_world_coordinates(min_xyz, camera_xyz, camera_rotation, camera_horizon, horizon=camera_horizon, fov=fov, ) - return world_space_point_cloud \ No newline at end of file + return world_space_point_cloud + +def calc_world_xyz_from_agent_relative(object_xyz_list, agent_xyz, agent_rotation, device): + # This might just be a duplication/convenience thing of the above + with torch.no_grad(): + agent_xyz = (torch.from_numpy(agent_xyz).float().to(device)) + + object_xyz_matrix = np.ndarray([3,len(object_xyz_list)]) + for i in range(len(object_xyz_list)): + x,y,z = object_xyz_list[i]['position']['x'],object_xyz_list[i]['position']['y'],object_xyz_list[i]['position']['z'] + object_xyz_matrix[:,i] = [x,y,z] + object_xyz_matrix = torch.from_numpy(object_xyz_matrix).to(device) + + world_xyz_matrix = camera_space_xyz_to_world_xyz(object_xyz_matrix, agent_xyz, agent_rotation, horizon=0 ).numpy() + world_positions = [] + for i in range(len(object_xyz_list)): + x,y,z = world_xyz_matrix[:,i] + world_positions.append(dict(position=dict(x=x, y=y,z=z), rotation=dict(x=0,y=0, z=0))) + + return world_positions + +def get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device): + mask = squeeze_bool_mask(mask) + depth_frame_masked = depth_frame_original.copy() + depth_frame_masked[~mask] = -1 + depth_frame_masked[depth_frame_masked == 0] = -1 # TODO: what is this for? missing values? + world_space_point_cloud = calc_world_coordinates(min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, device, depth_frame_masked) + valid_points = (world_space_point_cloud == world_space_point_cloud).sum(dim=-1) == 3 + point_in_world = world_space_point_cloud[valid_points] + midpoint_agent_coord = point_in_world.mean(dim=0) + return midpoint_agent_coord diff --git a/utils/stretch_utils/stretch_ithor_arm_environment.py b/utils/stretch_utils/stretch_ithor_arm_environment.py index 99d72e7..fc0d79a 100644 --- a/utils/stretch_utils/stretch_ithor_arm_environment.py +++ b/utils/stretch_utils/stretch_ithor_arm_environment.py @@ -15,7 +15,6 @@ from ithor_arm.ithor_arm_environment import ManipulaTHOREnvironment from ithor_arm.ithor_arm_noise_models import NoiseInMotionHabitatFlavor, NoiseInMotionSimple1DNormal -from ithor_arm.arm_calculation_utils import convert_world_to_agent_coordinate from utils.stretch_utils.stretch_constants import ( ADITIONAL_ARM_ARGS, @@ -324,11 +323,18 @@ def update_nominal_location(self, action_dict): if action_dict['action'] is 'RotateLeft': new_loc["rotation"] = (new_loc["rotation"] - self.rotate_nominal) % 360 - elif action_dict['action'] is 'RotateRight': + if action_dict['action'] is 'RotateLeftSmall': + new_loc["rotation"] = (new_loc["rotation"] - self.rotate_nominal/5) % 360 + if action_dict['action'] is 'RotateRight': new_loc["rotation"] = (new_loc["rotation"] + self.rotate_nominal) % 360 + elif action_dict['action'] is 'RotateRightSmall': + new_loc["rotation"] = (new_loc["rotation"] + self.rotate_nominal/5) % 360 elif action_dict['action'] is 'MoveAhead': new_loc["x"] += self.ahead_nominal * np.sin(new_loc["rotation"] * np.pi / 180) new_loc["z"] += self.ahead_nominal * np.cos(new_loc["rotation"] * np.pi / 180) + elif action_dict['action'] is 'MoveBack': + new_loc["x"] -= self.ahead_nominal * np.sin(new_loc["rotation"] * np.pi / 180) + new_loc["z"] -= self.ahead_nominal * np.cos(new_loc["rotation"] * np.pi / 180) elif action_dict['action'] is 'TeleportFull': new_loc["x"] = action_dict['x'] new_loc["y"] = action_dict['y'] @@ -431,28 +437,28 @@ def step( action_dict["degrees"] = noise[2] - self.rotate_nominal elif action in [ROTATE_RIGHT_SMALL]: - # RH: Not scaling noise is deliberate. Small actions are harder to be accurate + # RH: lesser scaling noise is deliberate. Small actions are harder to be accurate noise = self.noise_model.get_rotate_drift() action_dict["action"] = "MoveAgent" - action_dict["ahead"] = noise[0] - action_dict["right"] = noise[1] + action_dict["ahead"] = noise[0]/2 + action_dict["right"] = noise[1]/2 sr = self.controller.step(action_dict) action_dict = dict() action_dict["action"] = "RotateAgent" - action_dict["degrees"] = noise[2] + self.rotate_nominal / 5 + action_dict["degrees"] = noise[2]/2 + self.rotate_nominal / 5 elif action in [ROTATE_LEFT_SMALL]: - # RH: Not scaling noise is deliberate. Small actions are harder to be accurate + # RH: lesser scaling noise is deliberate. Small actions are harder to be accurate noise = self.noise_model.get_rotate_drift() action_dict["action"] = "MoveAgent" - action_dict["ahead"] = noise[0] - action_dict["right"] = noise[1] + action_dict["ahead"] = noise[0]/2 + action_dict["right"] = noise[1]/2 sr = self.controller.step(action_dict) action_dict = dict() action_dict["action"] = "RotateAgent" - action_dict["degrees"] = noise[2] - self.rotate_nominal / 5 + action_dict["degrees"] = noise[2]/2 - self.rotate_nominal / 5 elif action in [MOVE_ARM_HEIGHT_P,MOVE_ARM_HEIGHT_M,MOVE_ARM_Z_P,MOVE_ARM_Z_M,]: diff --git a/utils/stretch_utils/stretch_thor_sensors.py b/utils/stretch_utils/stretch_thor_sensors.py index 0297d50..dd97fa0 100644 --- a/utils/stretch_utils/stretch_thor_sensors.py +++ b/utils/stretch_utils/stretch_thor_sensors.py @@ -25,12 +25,13 @@ # from ithor_arm.ithor_arm_environment import StretchManipulaTHOREnvironment # from ithor_arm.ithor_arm_sensors import DepthSensorThor # from ithor_arm.near_deadline_sensors import calc_world_coordinates -from utils.calculation_utils import calc_world_coordinates +from utils.calculation_utils import calc_world_coordinates, calc_world_xyz_from_agent_relative, get_mid_point_of_object_from_depth_and_mask from manipulathor_utils.debugger_util import ForkedPdb from utils.noise_in_motion_util import squeeze_bool_mask from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment from utils.stretch_utils.stretch_sim2real_utils import kinect_reshape, intel_reshape +from scripts.stretch_jupyter_helper import get_relative_stretch_current_arm_state @@ -418,6 +419,258 @@ def average_so_far(self, camera_xyz, camera_rotation, arm_state, current_step_nu return agent_centric_middle_of_object + + +class AgentBodyPointNavEmulSensorDeadReckoning(Sensor): + + def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "point_nav_emul", **kwargs: Any): + observation_space = gym.spaces.Box( + low=0, high=1, shape=(1,), dtype=np.float32 + ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) + self.type = type + self.mask_sensor = mask_sensor + self.depth_sensor = depth_sensor + uuid = '{}_{}'.format(uuid, type) + + self.min_xyz = np.zeros((3)) + self.dummy_answer = torch.zeros(3) + self.dummy_answer[:] = 4 # is this good enough? + self.device = torch.device("cpu") + + super().__init__(**prepare_locals_for_super(locals())) + + + def get_relative_nominal_locations(self,env): + if len(env.controller.last_event.metadata['thirdPartyCameras']) != 1: + print('Warning multiple cameras') + + true_base = env.get_agent_location() + true_base = copy.deepcopy(dict(position=dict(x=true_base['x'], y=true_base['y'],z=true_base['z']), + rotation=dict(x=0,y=true_base['rotation'], z=0))) + + nominal_base = env.nominal_agent_location + nominal_base_xyz = np.array([nominal_base['x'],nominal_base['y'],nominal_base['z']]) + nominal_base = copy.deepcopy(dict(position=dict(x=nominal_base['x'], y=nominal_base['y'],z=nominal_base['z']), + rotation=dict(x=0,y=nominal_base['rotation'], z=0))) + + m = copy.deepcopy(env.controller.last_event.metadata) + + wrist = env.get_absolute_hand_state() + relative_wrist = convert_world_to_agent_coordinate(wrist,true_base) + + camera1 = m["cameraPosition"] + camera1 = dict(position=dict(x=camera1['x'], y=camera1['y'],z=camera1['z']), rotation=dict(x=0,y=0, z=0)) + relative_camera_1 = convert_world_to_agent_coordinate(camera1,true_base) + + camera2 = m['thirdPartyCameras'][0] + relative_camera_2 = convert_world_to_agent_coordinate(camera2,true_base) + + relative_list = [relative_wrist,relative_camera_1,relative_camera_2] + nominal_list = calc_world_xyz_from_agent_relative(relative_list,nominal_base_xyz,nominal_base['rotation']['y'],self.device) + + return dict(agent=nominal_base, wrist=nominal_list[0],camera1=nominal_list[1],camera2=nominal_list[2]) + + def get_agent_belief_state(self,env): + fov = env.controller.last_event.metadata['fov'] # assumption: real one is fine here + real_agent_state = env.get_agent_location() + nominal_positions = self.get_relative_nominal_locations(env) + + belief_camera_horizon = env.controller.last_event.metadata["agent"]["cameraHorizon"] # assumption: real one is fine here + belief_camera_xyz = np.array([nominal_positions['camera1']['position'][k] for k in ['x','y','z']]) + belief_camera_rotation = (nominal_positions['agent']['rotation']['y']) % 360 + + real_camera_xyz = np.array([real_agent_state[k] for k in ['x','y','z']]) + + self.belief_prev_location.append(belief_camera_xyz) + self.real_prev_location.append(real_camera_xyz) + arm_state = nominal_positions['wrist']# get_relative_stretch_current_arm_state(env.controller) + + return fov, belief_camera_horizon, belief_camera_xyz, belief_camera_rotation, arm_state + + def get_observation( + self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any + ) -> Any: + + # mask = squeeze_bool_mask(self.mask_sensor.get_observation(env, task, *args, **kwargs)) + mask = self.mask_sensor.get_observation(env, task, *args, **kwargs) + depth_frame = self.depth_sensor.get_observation(env, task, *args, **kwargs) + if task.num_steps_taken() == 0: + self.pointnav_history_aggr = [] + self.real_prev_location = [] + self.belief_prev_location = [] + + fov, camera_horizon, camera_xyz, camera_rotation, arm_state = self.get_agent_belief_state(env) + + if mask.sum() != 0: + midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) + self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken())) + + # if len(self.belief_prev_location) > 190: + # import matplotlib + # matplotlib.use('TkAgg') + # import matplotlib.pyplot as plt + # fig = plt.figure() + # ax = fig.add_subplot(projection='3d') + # def draw_points(locations, color): + # xs = [x[0] for x in locations] + # ys = [x[1] for x in locations] + # zs = [x[2] for x in locations] + # ax.plot(xs, zs, ys, marker='o' if color=='g' else 'x', color=color) + + # def draw(locations, color): + # xs = [x['camera_xyz'][0] for x in locations] + # ys = [x['camera_xyz'][1] for x in locations] + # zs = [x['camera_xyz'][2] for x in locations] + # ax.plot(xs, zs, ys, marker='o' if color=='g' else 'x', color=color) + + # draw_points(self.real_prev_location, 'g'); draw_points(self.belief_prev_location, 'b'); plt.show() + # ForkedPdb().set_trace() + + return self.history_aggregation(camera_xyz, camera_rotation, task.num_steps_taken()) + + def history_aggregation(self, camera_xyz, camera_rotation, current_step_number): + if len(self.pointnav_history_aggr) == 0: + return self.dummy_answer + else: + weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr] + total_weights = sum(weights) + total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr] + total_sum = sum(total_sum) + midpoint = total_sum / total_weights + agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0)) + midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0)) + midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state) + midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']]) + + return midpoint_agent_coord.cpu() + + +class ArmPointNavEmulSensorDeadReckoning(Sensor): + + def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "arm_point_nav_emul", **kwargs: Any): + observation_space = gym.spaces.Box( + low=0, high=1, shape=(1,), dtype=np.float32 + ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) + self.type = type + self.mask_sensor = mask_sensor + self.depth_sensor = depth_sensor + uuid = '{}_{}'.format(uuid, type) + + self.min_xyz = np.zeros((3)) + + self.dummy_answer = torch.zeros(3) + self.dummy_answer[:] = 4 # is this good enough? + self.device = torch.device("cpu") + super().__init__(**prepare_locals_for_super(locals())) + + def get_relative_nominal_locations(self,env): + if len(env.controller.last_event.metadata['thirdPartyCameras']) != 1: + print('Warning multiple cameras') + + true_base = env.get_agent_location() + true_base = copy.deepcopy(dict(position=dict(x=true_base['x'], y=true_base['y'],z=true_base['z']), + rotation=dict(x=0,y=true_base['rotation'], z=0))) + + nominal_base = env.nominal_agent_location + nominal_base_xyz = np.array([nominal_base['x'],nominal_base['y'],nominal_base['z']]) + nominal_base = copy.deepcopy(dict(position=dict(x=nominal_base['x'], y=nominal_base['y'],z=nominal_base['z']), + rotation=dict(x=0,y=nominal_base['rotation'], z=0))) + + m = copy.deepcopy(env.controller.last_event.metadata) + + wrist = env.get_absolute_hand_state() + relative_wrist = convert_world_to_agent_coordinate(wrist,true_base) + + camera1 = m["cameraPosition"] + camera1 = dict(position=dict(x=camera1['x'], y=camera1['y'],z=camera1['z']), rotation=dict(x=0,y=0, z=0)) + relative_camera_1 = convert_world_to_agent_coordinate(camera1,true_base) + + camera2 = m['thirdPartyCameras'][0] + relative_camera_2 = convert_world_to_agent_coordinate(camera2,true_base) + + relative_list = [relative_wrist,relative_camera_1,relative_camera_2] + nominal_list = calc_world_xyz_from_agent_relative(relative_list,nominal_base_xyz,nominal_base['rotation']['y'],self.device) + + return dict(agent=nominal_base, wrist=nominal_list[0],camera1=nominal_list[1],camera2=nominal_list[2]) + + def get_agent_belief_state(self,env): + real_agent_state = env.get_agent_location() + nominal_positions = self.get_relative_nominal_locations(env) + + + belief_camera_xyz = np.array([nominal_positions['camera2']['position'][k] for k in ['x','y','z']]) + belief_camera_rotation = (nominal_positions['agent']['rotation']['y'] + 90) % 360 + + real_camera_xyz = np.array([real_agent_state[k] for k in ['x','y','z']]) + + metadata = copy.deepcopy(env.controller.last_event.metadata['thirdPartyCameras'][0]) + belief_camera_horizon = metadata['rotation']['x'] # assumption: real one is fine here + fov = metadata['fieldOfView'] # assumption: real one is fine here + assert abs(metadata['rotation']['z'] - 0) < 0.1 #TODO: why + + self.belief_prev_location.append(belief_camera_xyz) + self.real_prev_location.append(real_camera_xyz) + arm_state = nominal_positions['wrist']# get_relative_stretch_current_arm_state(env.controller) + + return fov, belief_camera_horizon, belief_camera_xyz, belief_camera_rotation, arm_state + + def get_accurate_locations(self, env): + if len(env.controller.last_event.metadata['thirdPartyCameras']) != 1: + print('Warning multiple cameras') + metadata = copy.deepcopy(env.controller.last_event.metadata['thirdPartyCameras'][0]) + # ForkedPdb().set_trace() + camera_xyz = np.array([metadata["position"][k] for k in ["x", "y", "z"]]) + # camera_rotation = np.array([metadata["rotation"][k] for k in ["x", "y", "z"]]) + camera_rotation = metadata['rotation']['y'] + camera_horizon = metadata['rotation']['x'] + assert abs(metadata['rotation']['z'] - 0) < 0.1 + arm_state = env.get_absolute_hand_state() + fov = metadata['fieldOfView'] + return dict(camera_xyz=camera_xyz, camera_rotation=camera_rotation, camera_horizon=camera_horizon, arm_state=arm_state, fov=fov) + + def get_observation( + self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any + ) -> Any: + + mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) + depth_frame = self.depth_sensor.get_observation(env, task, *args, **kwargs) + + if task.num_steps_taken() == 0: + self.pointnav_history_aggr = [] + self.real_prev_location = [] + self.belief_prev_location = [] + + fov, camera_horizon, camera_xyz, camera_rotation, arm_state = self.get_agent_belief_state(env) + + if mask.sum() != 0: + midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) + self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken())) + + return self.history_aggregation(camera_xyz, camera_rotation, arm_state, task.num_steps_taken()) + + def history_aggregation(self, camera_xyz, camera_rotation, arm_world_coord, current_step_number): + if len(self.pointnav_history_aggr) == 0: + return self.dummy_answer + else: + weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr] + total_weights = sum(weights) + total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr] + total_sum = sum(total_sum) + midpoint = total_sum / total_weights + agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0)) + midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0)) + midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state) + midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']]) + + arm_agent_coord = convert_world_to_agent_coordinate(arm_world_coord, agent_state) + arm_state_agent_coord = torch.Tensor([arm_agent_coord['position'][k] for k in ['x','y','z']]) + # ForkedPdb().set_trace() + + distance_in_agent_coord = midpoint_agent_coord - arm_state_agent_coord + + return distance_in_agent_coord.cpu() + + # TODO we have to rewrite the noisy movement experiment ones? # class AgentGTLocationSensor(Sensor): # From 39e325cf999574e407199d3eaeafff1e239f64d2 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Thu, 5 May 2022 16:36:53 -0700 Subject: [PATCH 10/53] functional training for *pointnavemul dead reckoning sensors on stretch with noisy motion --- ...h_all_rooms_noisy_motion_dead_reckoning.py | 2 +- .../models/stretch_pointnav_emul_model.py | 1 + utils/calculation_utils.py | 2 +- utils/stretch_utils/stretch_thor_sensors.py | 47 ++++--------------- 4 files changed, 11 insertions(+), 41 deletions(-) diff --git a/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py index 39abd4c..1ce18f1 100644 --- a/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py +++ b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py @@ -110,7 +110,7 @@ def __init__(self): self.ENV_ARGS['motion_noise_args'] = dict() self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01] - self.ENV_ARGS['motion_noise_args']['effect_scale'] = .1 + self.ENV_ARGS['motion_noise_args']['effect_scale'] = .25 def test_task_sampler_args(self, **kwargs): sampler_args = super(PointNavEmulStretchAllRooms, self).test_task_sampler_args(**kwargs) diff --git a/manipulathor_baselines/stretch_bring_object_baselines/models/stretch_pointnav_emul_model.py b/manipulathor_baselines/stretch_bring_object_baselines/models/stretch_pointnav_emul_model.py index 47290bf..0e63d26 100644 --- a/manipulathor_baselines/stretch_bring_object_baselines/models/stretch_pointnav_emul_model.py +++ b/manipulathor_baselines/stretch_bring_object_baselines/models/stretch_pointnav_emul_model.py @@ -182,6 +182,7 @@ def forward( # type:ignore visual_observation_arm = torch.cat([observations['depth_lowres_arm'], observations['rgb_lowres_arm'], arm_mask], dim=-1).float() visual_observation_arm = check_for_nan_visual_observations(visual_observation_arm) visual_observation_encoding_arm = compute_cnn_output(self.full_visual_encoder_arm, visual_observation_arm) + # ForkedPdb().set_trace() # arm_distance_to_obj_source = observations['point_nav_real_source'].copy() diff --git a/utils/calculation_utils.py b/utils/calculation_utils.py index a5e0696..f01693d 100644 --- a/utils/calculation_utils.py +++ b/utils/calculation_utils.py @@ -42,7 +42,7 @@ def calc_world_xyz_from_agent_relative(object_xyz_list, agent_xyz, agent_rotatio object_xyz_matrix[:,i] = [x,y,z] object_xyz_matrix = torch.from_numpy(object_xyz_matrix).to(device) - world_xyz_matrix = camera_space_xyz_to_world_xyz(object_xyz_matrix, agent_xyz, agent_rotation, horizon=0 ).numpy() + world_xyz_matrix = camera_space_xyz_to_world_xyz(object_xyz_matrix, agent_xyz, agent_rotation, horizon=0 ).cpu().numpy() world_positions = [] for i in range(len(object_xyz_list)): x,y,z = world_xyz_matrix[:,i] diff --git a/utils/stretch_utils/stretch_thor_sensors.py b/utils/stretch_utils/stretch_thor_sensors.py index dd97fa0..33c7faf 100644 --- a/utils/stretch_utils/stretch_thor_sensors.py +++ b/utils/stretch_utils/stretch_thor_sensors.py @@ -491,7 +491,6 @@ def get_observation( self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any ) -> Any: - # mask = squeeze_bool_mask(self.mask_sensor.get_observation(env, task, *args, **kwargs)) mask = self.mask_sensor.get_observation(env, task, *args, **kwargs) depth_frame = self.depth_sensor.get_observation(env, task, *args, **kwargs) if task.num_steps_taken() == 0: @@ -505,27 +504,6 @@ def get_observation( midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken())) - # if len(self.belief_prev_location) > 190: - # import matplotlib - # matplotlib.use('TkAgg') - # import matplotlib.pyplot as plt - # fig = plt.figure() - # ax = fig.add_subplot(projection='3d') - # def draw_points(locations, color): - # xs = [x[0] for x in locations] - # ys = [x[1] for x in locations] - # zs = [x[2] for x in locations] - # ax.plot(xs, zs, ys, marker='o' if color=='g' else 'x', color=color) - - # def draw(locations, color): - # xs = [x['camera_xyz'][0] for x in locations] - # ys = [x['camera_xyz'][1] for x in locations] - # zs = [x['camera_xyz'][2] for x in locations] - # ax.plot(xs, zs, ys, marker='o' if color=='g' else 'x', color=color) - - # draw_points(self.real_prev_location, 'g'); draw_points(self.belief_prev_location, 'b'); plt.show() - # ForkedPdb().set_trace() - return self.history_aggregation(camera_xyz, camera_rotation, task.num_steps_taken()) def history_aggregation(self, camera_xyz, camera_rotation, current_step_number): @@ -614,26 +592,18 @@ def get_agent_belief_state(self,env): return fov, belief_camera_horizon, belief_camera_xyz, belief_camera_rotation, arm_state - def get_accurate_locations(self, env): - if len(env.controller.last_event.metadata['thirdPartyCameras']) != 1: - print('Warning multiple cameras') - metadata = copy.deepcopy(env.controller.last_event.metadata['thirdPartyCameras'][0]) - # ForkedPdb().set_trace() - camera_xyz = np.array([metadata["position"][k] for k in ["x", "y", "z"]]) - # camera_rotation = np.array([metadata["rotation"][k] for k in ["x", "y", "z"]]) - camera_rotation = metadata['rotation']['y'] - camera_horizon = metadata['rotation']['x'] - assert abs(metadata['rotation']['z'] - 0) < 0.1 - arm_state = env.get_absolute_hand_state() - fov = metadata['fieldOfView'] - return dict(camera_xyz=camera_xyz, camera_rotation=camera_rotation, camera_horizon=camera_horizon, arm_state=arm_state, fov=fov) - def get_observation( self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any ) -> Any: - mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) + mask = self.mask_sensor.get_observation(env, task, *args, **kwargs) depth_frame = self.depth_sensor.get_observation(env, task, *args, **kwargs) + + # catch rare NaN error where tiny mask is lost in 0 missing values + squeeze_mask = squeeze_bool_mask(mask) + depth_frame_masked = depth_frame.copy() + depth_frame_masked[~squeeze_mask] = -1 + depth_frame_masked[depth_frame_masked == 0] = -1 if task.num_steps_taken() == 0: self.pointnav_history_aggr = [] @@ -642,7 +612,7 @@ def get_observation( fov, camera_horizon, camera_xyz, camera_rotation, arm_state = self.get_agent_belief_state(env) - if mask.sum() != 0: + if depth_frame_masked[depth_frame_masked>0].sum() != 0: midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken())) @@ -664,7 +634,6 @@ def history_aggregation(self, camera_xyz, camera_rotation, arm_world_coord, curr arm_agent_coord = convert_world_to_agent_coordinate(arm_world_coord, agent_state) arm_state_agent_coord = torch.Tensor([arm_agent_coord['position'][k] for k in ['x','y','z']]) - # ForkedPdb().set_trace() distance_in_agent_coord = midpoint_agent_coord - arm_state_agent_coord From 8e1f5f03a66e10eb9699fe051ec5c0e7a41c1b9a Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Fri, 6 May 2022 13:34:49 -0700 Subject: [PATCH 11/53] add random motion for failed nominal action and increse failed action penalty - hope better for real --- ...h_all_rooms_noisy_motion_dead_reckoning.py | 1 + .../stretch_ithor_arm_environment.py | 35 +++++++++++++------ utils/stretch_utils/stretch_thor_sensors.py | 4 +-- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py index 1ce18f1..8f9ecec 100644 --- a/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py +++ b/manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/pointnav_emul_stretch_all_rooms_noisy_motion_dead_reckoning.py @@ -101,6 +101,7 @@ def __init__(self): super().__init__() self.REWARD_CONFIG['exploration_reward'] = 0.1 self.REWARD_CONFIG['object_found'] = 1 + self.REWARD_CONFIG['failed_action_penalty'] = -0.06 # double the current - stretch really likes to back up into things # self.ENV_ARGS = STRETCH_ENV_ARGS self.ENV_ARGS['visibilityDistance'] = self.distance_thr self.ENV_ARGS['commit_id'] = STRETCH_MANIPULATHOR_COMMIT_ID diff --git a/utils/stretch_utils/stretch_ithor_arm_environment.py b/utils/stretch_utils/stretch_ithor_arm_environment.py index fc0d79a..45764e0 100644 --- a/utils/stretch_utils/stretch_ithor_arm_environment.py +++ b/utils/stretch_utils/stretch_ithor_arm_environment.py @@ -387,6 +387,7 @@ def step( # RH: order matters, nominal action happens last action_dict = {**action_dict, **copy_aditions} + # ForkedPdb().set_trace() if action in [MOVE_AHEAD]: noise = self.noise_model.get_ahead_drift(self.ahead_nominal) @@ -394,7 +395,7 @@ def step( action_dict["degrees"] = noise[2] sr = self.controller.step(action_dict) - action_dict = dict() + action_dict = {**copy_aditions} action_dict["action"] = "MoveAgent" action_dict["ahead"] = noise[0] + self.ahead_nominal action_dict["right"] = noise[1] @@ -408,7 +409,7 @@ def step( action_dict["degrees"] = noise[2] sr = self.controller.step(action_dict) - action_dict = dict() + action_dict = {**copy_aditions} action_dict["action"] = "MoveAgent" action_dict["ahead"] = noise[0] - self.ahead_nominal action_dict["right"] = noise[1] @@ -421,7 +422,7 @@ def step( action_dict["right"] = noise[1] sr = self.controller.step(action_dict) - action_dict = dict() + action_dict = {**copy_aditions} action_dict["action"] = "RotateAgent" action_dict["degrees"] = noise[2] + self.rotate_nominal @@ -432,7 +433,7 @@ def step( action_dict["right"] = noise[1] sr = self.controller.step(action_dict) - action_dict = dict() + action_dict = {**copy_aditions} action_dict["action"] = "RotateAgent" action_dict["degrees"] = noise[2] - self.rotate_nominal @@ -444,7 +445,7 @@ def step( action_dict["right"] = noise[1]/2 sr = self.controller.step(action_dict) - action_dict = dict() + action_dict = {**copy_aditions} action_dict["action"] = "RotateAgent" action_dict["degrees"] = noise[2]/2 + self.rotate_nominal / 5 @@ -456,7 +457,7 @@ def step( action_dict["right"] = noise[1]/2 sr = self.controller.step(action_dict) - action_dict = dict() + action_dict = {**copy_aditions} action_dict["action"] = "RotateAgent" action_dict["degrees"] = noise[2]/2 - self.rotate_nominal / 5 @@ -484,14 +485,14 @@ def step( action_dict = dict(action='RotateWristRelative', yaw=WRIST_ROTATION / 5) - sr = self.controller.step(action_dict) + sr_nominal = self.controller.step(action_dict) self.list_of_actions_so_far.append(action_dict) # RH: Nominal location only updates for successful actions. Note that that drift # action might succeed even if the "main" action fails - if sr.metadata["lastActionSuccess"]: + if sr_nominal.metadata["lastActionSuccess"]: self.update_nominal_location(original_action_dict) - + if self._verbose: print(self.controller.last_event) @@ -502,4 +503,18 @@ def step( assert last_frame is not None self.last_event.frame = last_frame - return sr + if not sr_nominal.metadata["lastActionSuccess"] and action in [MOVE_AHEAD, MOVE_BACK, ROTATE_RIGHT, ROTATE_LEFT, ROTATE_RIGHT_SMALL, ROTATE_LEFT_SMALL]: + # if the action fails, sample the noise model for a turn + # does this mess up metadata? and is this reasonable? what action failure modes happen in sim vs real? + noise = self.noise_model.get_rotate_drift() + action_dict = {**copy.deepcopy(ADITIONAL_ARM_ARGS)} + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = noise[0] + action_dict["right"] = noise[1] + sr = self.controller.step(action_dict) + action_dict = {**copy.deepcopy(ADITIONAL_ARM_ARGS)} + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = noise[2] + sr = self.controller.step(action_dict) + + return sr_nominal diff --git a/utils/stretch_utils/stretch_thor_sensors.py b/utils/stretch_utils/stretch_thor_sensors.py index 33c7faf..c7540dc 100644 --- a/utils/stretch_utils/stretch_thor_sensors.py +++ b/utils/stretch_utils/stretch_thor_sensors.py @@ -488,7 +488,7 @@ def get_agent_belief_state(self,env): return fov, belief_camera_horizon, belief_camera_xyz, belief_camera_rotation, arm_state def get_observation( - self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any + self, env: StretchManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any ) -> Any: mask = self.mask_sensor.get_observation(env, task, *args, **kwargs) @@ -593,7 +593,7 @@ def get_agent_belief_state(self,env): return fov, belief_camera_horizon, belief_camera_xyz, belief_camera_rotation, arm_state def get_observation( - self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any + self, env: StretchManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any ) -> Any: mask = self.mask_sensor.get_observation(env, task, *args, **kwargs) From 40060a17b4bb23f42309faf963a97251ba6d75ed Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 24 May 2022 10:41:41 -0700 Subject: [PATCH 12/53] objnav config and sampler refactor 1 - did not work --- .../experiments/bring_object_thor_base.py | 2 + .../ithor/obj_nav_2camera_procthor_wide.py | 21 +- .../experiments/ithor/obj_nav_base_config.py | 120 +++++++++ .../experiments/ithor/obj_nav_for_procthor.py | 250 ++++++++++++------ ...nav_for_procthor_clip_resnet50_rgb_only.py | 36 +-- .../procthor_object_nav_task_samplers.py | 128 ++++----- .../procthor_object_nav_tasks.py | 7 +- utils/stretch_utils/stretch_visualizer.py | 17 +- 8 files changed, 373 insertions(+), 208 deletions(-) create mode 100644 manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_base_config.py diff --git a/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py b/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py index b310a12..3ed619f 100644 --- a/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py +++ b/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py @@ -129,6 +129,8 @@ def make_sampler_fn(cls, **kwargs) -> TaskSampler: viz(exp_name=exp_name_w_time) for viz in cls.POTENTIAL_VISUALIZERS ] kwargs["visualizers"] = visualizers + + ForkedPdb().set_trace() kwargs["objects"] = cls.OBJECT_TYPES kwargs["task_type"] = cls.TASK_TYPE kwargs["exp_name"] = exp_name_w_time diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py index 2c042d8..5e3cfbf 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py @@ -101,33 +101,22 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( NUM_PROCESSES = 50 - - TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(9999) if i not in PROCTHOR_INVALID_SCENES] # 9999 is all of train - if platform.system() == "Darwin": - TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)] - - TEST_SCENES = [f'ProcTHOR{i}' for i in range(1)] - random.shuffle(TRAIN_SCENES) + NUM_PROCESSES = 40 + # NUM_TRAIN_HOUSES = 40 def __init__(self): super().__init__() - self.REWARD_CONFIG['goal_success_reward'] = 10.0 - self.REWARD_CONFIG['step_penalty'] = -0.01 - self.REWARD_CONFIG['failed_stop_reward'] = 0.0 - self.REWARD_CONFIG['reached_horizon_reward'] = 0.0 - self.REWARD_CONFIG['shaping_weight'] = 1.0 self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) + self.ENV_ARGS['p_randomize_material'] = 0.8 self.ENV_ARGS['visibilityDistance'] = self.distance_thr self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice self.ENV_ARGS['scene'] = 'Procedural' - self.ENV_ARGS['renderInstanceSegmentation'] = 'False' - #TODO depth is not False. Kiana added this: self.ENV_ARGS['renderInstanceSegmentation'] = False - self.ENV_ARGS['renderDepthImage'] = False + self.ENV_ARGS['renderDepthImage'] = False self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID - self.ENV_ARGS['allow_flipping'] = False #TODO important change everywhere + self.ENV_ARGS['allow_flipping'] = False @classmethod diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_base_config.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_base_config.py new file mode 100644 index 0000000..e59a3b6 --- /dev/null +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_base_config.py @@ -0,0 +1,120 @@ +from typing import Sequence + +import torch +import torch.optim as optim + +# from ai2thor.platform import CloudRendering +# from allenact.embodiedai.sensors.vision_sensors import DepthSensor +from manipulathor_baselines.bring_object_baselines.experiments.bring_object_thor_base import BringObjectThorBaseConfig +from allenact.utils.experiment_utils import ( + Builder, + PipelineStage, + TrainingPipeline, + TrainingSettings, +) +from allenact.algorithms.onpolicy_sync.losses import PPO +from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig + +from allenact.base_abstractions.sensor import Sensor + +from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler +from utils.procthor_utils.procthor_object_nav_tasks import ObjectNavTask +from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID + + +class ObjectNavBaseConfig(BringObjectThorBaseConfig): + """The base config for ProcTHOR ObjectNav experiments.""" + + TASK_SAMPLER = AllRoomsObjectNavTaskSampler + TASK_TYPE = ObjectNavTask + + TRAIN_DEVICES = ( + tuple(range(torch.cuda.device_count())) + if torch.cuda.is_available() + else (torch.device("cpu"),) + ) + VAL_DEVICES = ( + (torch.cuda.device_count() - 1,) + if torch.cuda.is_available() + else (torch.device("cpu"),) + ) + TEST_DEVICES = ( + tuple(range(torch.cuda.device_count())) + if torch.cuda.is_available() + else (torch.device("cpu"),) + ) + + SENSORS: Sequence[Sensor] = [] + + DISTANCE_TYPE = "l2" # "geo" # Can be "geo" or "l2" + + def __init__(self): + super().__init__() + self.REWARD_CONFIG = { + "step_penalty": -0.01, + "goal_success_reward": 10.0, + "reached_horizon_reward": 0.0, + "failed_stop_reward": 0.0, + "shaping_weight": 1.0, + "positive_only_reward": False, + } + + def training_pipeline(self, **kwargs): + log_interval_small = ( + self.NUM_PROCESSES * 32 * 10 + if torch.cuda.is_available + else 1 + ) + log_interval_medium = ( + self.NUM_PROCESSES * 64 * 5 + if torch.cuda.is_available + else 1 + ) + log_interval_large = ( + self.NUM_PROCESSES * 128 * 5 + if torch.cuda.is_available + else 1 + ) + + batch_steps_0 = int(10e6) + batch_steps_1 = int(10e6) + batch_steps_2 = int(1e9) - batch_steps_1 - batch_steps_0 + + return TrainingPipeline( + save_interval=5_000_000, + metric_accumulate_interval=10_000, + optimizer_builder=Builder(optim.Adam, dict(lr=0.0003)), + num_mini_batch=1, + update_repeats=4, + max_grad_norm=0.5, + num_steps=128, + named_losses={"ppo_loss": PPO(**PPOConfig)}, + gamma=0.99, + use_gae=True, + gae_lambda=0.95, + advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, + pipeline_stages=[ + PipelineStage( + loss_names=["ppo_loss"], + max_stage_steps=batch_steps_0, + training_settings=TrainingSettings( + num_steps=32, metric_accumulate_interval=log_interval_small + ), + ), + PipelineStage( + loss_names=["ppo_loss"], + max_stage_steps=batch_steps_1, + training_settings=TrainingSettings( + num_steps=64, metric_accumulate_interval=log_interval_medium + ), + ), + PipelineStage( + loss_names=["ppo_loss"], + max_stage_steps=batch_steps_2, + training_settings=TrainingSettings( + num_steps=128, metric_accumulate_interval=log_interval_large + ), + ), + ], + ) + diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py index 74a0fef..364be57 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py @@ -3,38 +3,42 @@ import datasets import numpy as np +from math import ceil import torch -import torch.optim as optim -from ai2thor.platform import CloudRendering +from allenact.utils.system import get_logger +from allenact.base_abstractions.preprocessor import ( + Preprocessor, + SensorPreprocessorGraph, +) +from allenact.base_abstractions.sensor import ( + Sensor, + Union, +) +from allenact.base_abstractions.task import TaskSampler from allenact.embodiedai.sensors.vision_sensors import DepthSensor -from manipulathor_baselines.bring_object_baselines.experiments.bring_object_thor_base import BringObjectThorBaseConfig from allenact.utils.experiment_utils import ( Builder, - PipelineStage, - TrainingPipeline, - TrainingSettings, ) -from allenact.algorithms.onpolicy_sync.losses import PPO -from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig -from allenact.base_abstractions.sensor import Sensor - -# from allenact.utils.system import get_logger -# from training import cfg -# from training.tasks.object_nav import ObjectNavTaskSampler -# from training.utils.types import RewardConfig, TaskSamplerArgs +from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_base_config import ObjectNavBaseConfig from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler from utils.procthor_utils.procthor_object_nav_tasks import ObjectNavTask +from utils.procthor_utils.procthor_helper import PROCTHOR_INVALID_SCENES from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID +from manipulathor_utils.debugger_util import ForkedPdb + -class ProcTHORObjectNavBaseConfig(BringObjectThorBaseConfig): + +class ProcTHORObjectNavBaseConfig(ObjectNavBaseConfig): """The base config for ProcTHOR ObjectNav experiments.""" TASK_SAMPLER = ProcTHORObjectNavTaskSampler TASK_TYPE = ObjectNavTask + OBJECT_TYPES: Optional[Sequence[str]] + TRAIN_DEVICES = ( tuple(range(torch.cuda.device_count())) if torch.cuda.is_available() @@ -51,24 +55,12 @@ class ProcTHORObjectNavBaseConfig(BringObjectThorBaseConfig): else (torch.device("cpu"),) ) - # TEST_ON_VALIDATION: bool = cfg.evaluation.test_on_validation - - # AGENT_MODE = cfg.agent.agent_mode - # CAMERA_WIDTH = cfg.agent.camera_width - # CAMERA_HEIGHT = cfg.agent.camera_height - # STEP_SIZE = cfg.agent.step_size - # VISIBILITY_DISTANCE = cfg.agent.visibility_distance - # FIELD_OF_VIEW = cfg.agent.field_of_view - # ROTATE_STEP_DEGREES = cfg.agent.rotate_step_degrees - - # HOUSE_DATASET = datasets.load_dataset( - # "allenai/houses", use_auth_token=True, ignore_verifications=True - # ) - SENSORS: Sequence[Sensor] = [] DISTANCE_TYPE = "l2" # "geo" # Can be "geo" or "l2" + CAP_TRAINING = None + ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[ int ] = 20 # default config/main.yaml @@ -76,64 +68,152 @@ class ProcTHORObjectNavBaseConfig(BringObjectThorBaseConfig): -1 ) # Should be > 0 if `ADVANCE_SCENE_ROLLOUT_PERIOD` is `None` RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 1 # TODO apparently this won't work with 100 (why?) - - @final - def training_pipeline(self, **kwargs): - log_interval_small = ( - self.NUM_PROCESSES * 32 * 10 - if torch.cuda.is_available - else 1 + + HOUSE_DATASET = datasets.load_dataset( + "allenai/houses", use_auth_token=True, ignore_verifications=True + ) + + MAX_STEPS = 500 + NUM_TRAIN_HOUSES = None # none means all + + + @classmethod + def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: + return tuple() + + @classmethod + def make_sampler_fn( + cls, **kwargs + ) -> TaskSampler: + from datetime import datetime + + now = datetime.now() + + exp_name_w_time = cls.__name__ + "_" + now.strftime("%m_%d_%Y_%H_%M_%S_%f") + if cls.VISUALIZE: + visualizers = [ + viz(exp_name=exp_name_w_time) for viz in cls.POTENTIAL_VISUALIZERS + ] + kwargs["visualizers"] = visualizers + kwargs["task_type"] = cls.TASK_TYPE + kwargs["cap_training"] = cls.CAP_TRAINING + kwargs["exp_name"] = exp_name_w_time + + return cls.TASK_SAMPLER(**kwargs) + + @staticmethod + def _partition_inds(n: int, num_parts: int): + return np.round( + np.linspace(start=0, stop=n, num=num_parts + 1, endpoint=True) + ).astype(np.int32) + + def _get_sampler_args_for_scene_split( + self, + houses: datasets.Dataset, + mode: Literal["train", "eval"], + resample_same_scene_freq: int, + allow_oversample: bool, + allow_flipping: bool, + process_ind: int, + total_processes: int, + max_tasks: Optional[int], + devices: Optional[List[int]] = None, + seeds: Optional[List[int]] = None, + deterministic_cudnn: bool = False, + extra_controller_args: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: + # NOTE: oversample some scenes -> bias + oversample_warning = ( + f"Warning: oversampling some of the houses ({houses}) to feed all processes ({total_processes})." + " You can avoid this by setting a number of workers divisible by the number of scenes" ) - log_interval_medium = ( - self.NUM_PROCESSES * 64 * 5 - if torch.cuda.is_available - else 1 + house_inds = list(range(len(houses))) + if total_processes > len(houses): + if not allow_oversample: + raise RuntimeError( + f"Cannot have `total_processes > len(houses)`" + f" ({total_processes} > {len(houses)}) when `allow_oversample` is `False`." + ) + + if total_processes % len(houses) != 0: + get_logger().warning(oversample_warning) + house_inds = house_inds * ceil(total_processes / len(houses)) + house_inds = house_inds[ + : total_processes * (len(house_inds) // total_processes) + ] + elif len(houses) % total_processes != 0: + get_logger().warning(oversample_warning) + + inds = self._partition_inds(len(house_inds), total_processes) + house_inds = house_inds[inds[process_ind] : inds[process_ind + 1]] + house_inds = [h for h in house_inds if h not in PROCTHOR_INVALID_SCENES] + + return { + "mode": mode, + "houses": houses, + "house_inds": house_inds, + "process_ind": process_ind, + "env_args": self.ENV_ARGS, + "max_steps": self.MAX_STEPS, + "sensors": self.SENSORS, + "seed": seeds[process_ind] if seeds is not None else None, + "deterministic_cudnn": deterministic_cudnn, + "rewards_config": self.REWARD_CONFIG, + "target_object_types": self.OBJECT_TYPES, + "max_tasks": max_tasks if max_tasks is not None else len(house_inds), + "allow_flipping": allow_flipping, + "distance_type": self.DISTANCE_TYPE, + "resample_same_scene_freq": resample_same_scene_freq, + "scene_name": "Procedural" + + } + + def train_task_sampler_args(self, **kwargs) -> Dict[str, Any]: + train_houses = self.HOUSE_DATASET["train"] + if self.NUM_TRAIN_HOUSES: + train_houses = train_houses.select(range(self.NUM_TRAIN_HOUSES)) + + out = self._get_sampler_args_for_scene_split( + houses=train_houses, + mode="train", + allow_oversample=True, + max_tasks=float("inf"), + allow_flipping=self.ENV_ARGS['allow_flipping'], + resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN, + # extra_controller_args=dict(branch="nanna", scene="Procedural"), + **kwargs, ) - log_interval_large = ( - self.NUM_PROCESSES * 128 * 5 - if torch.cuda.is_available - else 1 + return out + + def valid_task_sampler_args(self, **kwargs) -> Dict[str, Any]: + val_houses = self.HOUSE_DATASET["validation"] + out = self._get_sampler_args_for_scene_split( + houses=val_houses.select(range(100)), + mode="eval", + allow_oversample=False, + max_tasks=10, + allow_flipping=False, + resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, + # extra_controller_args=dict(scene="Procedural"), + **kwargs, ) - - batch_steps_0 = int(10e6) - batch_steps_1 = int(10e6) - batch_steps_2 = int(1e9) - batch_steps_1 - batch_steps_0 - - return TrainingPipeline( - save_interval=5_000_000, - metric_accumulate_interval=10_000, - optimizer_builder=Builder(optim.Adam, dict(lr=0.0003)), - num_mini_batch=1, - update_repeats=4, - max_grad_norm=0.5, - num_steps=128, - named_losses={"ppo_loss": PPO(**PPOConfig)}, - gamma=0.99, - use_gae=True, - gae_lambda=0.95, - advance_scene_rollout_period=self.ADVANCE_SCENE_ROLLOUT_PERIOD, - pipeline_stages=[ - PipelineStage( - loss_names=["ppo_loss"], - max_stage_steps=batch_steps_0, - training_settings=TrainingSettings( - num_steps=32, metric_accumulate_interval=log_interval_small - ), - ), - PipelineStage( - loss_names=["ppo_loss"], - max_stage_steps=batch_steps_1, - training_settings=TrainingSettings( - num_steps=64, metric_accumulate_interval=log_interval_medium - ), - ), - PipelineStage( - loss_names=["ppo_loss"], - max_stage_steps=batch_steps_2, - training_settings=TrainingSettings( - num_steps=128, metric_accumulate_interval=log_interval_large - ), - ), - ], + return out + + def test_task_sampler_args(self, **kwargs) -> Dict[str, Any]: + if self.TEST_ON_VALIDATION: + return self.valid_task_sampler_args(**kwargs) + + test_houses = self.HOUSE_DATASET["test"] + out = self._get_sampler_args_for_scene_split( + houses=test_houses.select(range(100)), + mode="eval", + allow_oversample=False, + max_tasks=10, + allow_flipping=False, + resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, + # extra_controller_args=dict(scene="Procedural"), + **kwargs, ) + return out + diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index 5d3ea82..f84fcd7 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -58,42 +58,34 @@ class ProcTHORObjectNavClipResnet50RGBOnly( ), ] - MAX_STEPS = 500 - if platform.system() == "Darwin": - MAX_STEPS = 50 + TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = ObjectNavTask + TASK_TYPE = StretchObjectNavTask ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] - VISUALIZE = False - - NUM_PROCESSES = 30 - - TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(10)] # 9999 is all of train - # if platform.system() == "Darwin": - # TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)] - - TEST_SCENES = [f'ProcTHOR{i}' for i in range(1)] - random.shuffle(TRAIN_SCENES) + NUM_PROCESSES = 40 + # NUM_TRAIN_HOUSES = 40 # set None or comment out for all + MAX_STEPS = 500 + if platform.system() == "Darwin": + MAX_STEPS = 10 + NUM_TRAIN_HOUSES = 40 + VISUALIZE = False def __init__(self): super().__init__() - self.REWARD_CONFIG['goal_success_reward'] = 10.0 - self.REWARD_CONFIG['step_penalty'] = -0.01 - self.REWARD_CONFIG['failed_stop_reward'] = 0.0 - self.REWARD_CONFIG['reached_horizon_reward'] = 0.0 - self.REWARD_CONFIG['shaping_weight'] = 1.0 - # self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) + self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) + self.ENV_ARGS['p_randomize_material'] = 0.8 self.ENV_ARGS['visibilityDistance'] = self.distance_thr self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice self.ENV_ARGS['scene'] = 'Procedural' - self.ENV_ARGS['renderInstanceSegmentation'] = 'False' + self.ENV_ARGS['renderInstanceSegmentation'] = False + self.ENV_ARGS['renderDepthImage'] = False self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID - self.ENV_ARGS['allow_flipping'] = True + self.ENV_ARGS['allow_flipping'] = False @classmethod diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index 07e6319..cec1dc4 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -1,4 +1,8 @@ import glob +from optparse import Option +from attrs import define +from typing import Any, Dict, List, Optional, Sequence +from typing_extensions import Literal import platform import pickle import random @@ -34,19 +38,20 @@ class ProcTHORObjectNavTaskSampler(TaskSampler): def __init__( self, - scenes: List[str], sensors: List[Sensor], max_steps: int, env_args: Dict[str, Any], - action_space: gym.Space, rewards_config: Dict, - objects: List[str], task_type: type, - scene_period: Optional[Union[int, str]] = None, + houses: datasets.Dataset, + house_inds: List[int], + target_object_types: List[str], + resample_same_scene_freq: int, + distance_type: str, max_tasks: Optional[int] = None, seed: Optional[int] = None, deterministic_cudnn: bool = False, - fixed_tasks: Optional[List[Dict[str, Any]]] = None, + allow_flipping: bool = False, visualizers: List[LoggerVisualizer] = [], *args, **kwargs @@ -56,85 +61,55 @@ def __init__( self.environment_type = env_args['environment_type'] del env_args['environment_type'] self.env_args = env_args - self.scenes = scenes + self.grid_size = 0.25 self.env: Optional[StretchManipulaTHOREnvironment] = None self.sensors = sensors self.max_steps = max_steps - self._action_space = action_space - self.objects = objects - - self.scene_counter: Optional[int] = None - self.scene_order: Optional[List[str]] = None - self.scene_id: Optional[int] = None - self.scene_period: Optional[ - Union[str, int] - ] = scene_period # default makes a random choice - + self._action_space = gym.spaces.Discrete(len(self.TASK_TYPE.class_action_names())) + self.resample_same_scene_freq = resample_same_scene_freq self._last_sampled_task: Optional[Task] = None - + self.seed: Optional[int] = None - self.set_seed(seed) + if seed is not None: + self.set_seed(seed) if deterministic_cudnn: set_deterministic_cudnn() self.visualizers = visualizers - self.sampler_mode = kwargs["sampler_mode"] + self.mode = kwargs["mode"] self.cap_training = kwargs["cap_training"] - # if self.env_args['scene'] == 'Procedural': - self.house_dataset = datasets.load_dataset("allenai/houses", use_auth_token=True) - - RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN = ( - -1 - ) # Should be > 0 if `ADVANCE_SCENE_ROLLOUT_PERIOD` is `None` - # RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 100 - # if platform.system() == "Darwin": - # RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 1 - - RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 50 - self.resample_same_scene_freq = -1 #RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN - if platform.system() == "Darwin": - self.resample_same_scene_freq = 1#TODO NOW WHAT SHOULD WE DO? kiana for now - # assert self.resample_same_scene_freq == 1 # IMPORTANT IT WON"T WORK FOR 100 self.episode_index = 0 + self.houses = houses + self.house_inds = house_inds self.house_inds_index = 0 - self.reachable_positions_map = {} - self.house_dataset = self.house_dataset['train'] #TODO separately for test and val - # self.house_dataset = self.house_dataset.select(range(10)) - # ForkedPdb().set_trace() - - # ROOMS_TO_USE = [int(scene.replace('ProcTHOR', '')) for scene in self.scenes] - # ROOMS_TO_USE = [x for x in range(self.house_dataset.num_rows)] - ROOMS_TO_USE = [int(scene.replace('ProcTHOR', '')) for scene in self.scenes] - - # self.args_house_inds = [x for x in range(self.house_dataset.num_rows)] - self.args_house_inds = ROOMS_TO_USE self.valid_rotations = [0,90,180,270] - self.distance_type = "l2" + self.distance_type = distance_type self.distance_cache = DynamicDistanceCache(rounding=1) - self.target_object_types_set = set(self.objects) + self.target_object_types_set = set(target_object_types) self.obj_type_counter = Counter( - {obj_type: 0 for obj_type in self.objects} + {obj_type: 0 for obj_type in target_object_types} ) self.reachable_positions_map: Dict[int, Vector3] = dict() self.objects_in_scene_map: Dict[str, List[str]] = dict() self.visible_objects_cache = dict() - self.max_tasks = max_tasks if max_tasks is not None else np.Inf # stop when I tell you to stop + self.max_tasks = max_tasks #if max_tasks is not None else np.Inf # stop when I tell you to stop self.reset_tasks = self.max_tasks - self.max_vis_points=6 - self.max_agent_positions=6 + + self.max_vis_points = 6 + self.max_agent_positions = 6 + self.p_greedy_target_object = 0.8 + self.reset() def set_seed(self, seed: int): - self.seed = seed - if seed is not None: - set_seed(seed) + set_seed(seed) def _create_environment(self, **kwargs) -> StretchManipulaTHOREnvironment: env = self.environment_type( @@ -165,8 +140,8 @@ def last_sampled_task(self) -> Optional[StretchObjectNavTask]: return self._last_sampled_task def close(self) -> None: - if self.env.controller is not None: - self.env.controller.stop() + if self.env is not None: + self.env.stop() @property def all_observation_spaces_equal(self) -> bool: @@ -194,7 +169,7 @@ def get_nearest_agent_height(self, y_coordinate: float) -> float: @property def house_index(self) -> int: - return self.args_house_inds[self.house_inds_index] + return self.house_inds[self.house_inds_index] def is_object_visible(self, object_id: str) -> bool: """Return True if object_id is visible without any interaction in the scene. @@ -267,7 +242,7 @@ def sample_target_object_ids(self) -> Tuple[str, List[str]]: Objects returned will all be of the same objectType. Only considers visible objects in the house. """ - if random.random() < 0.8: # p_greedy_target_object + if random.random() < self.p_greedy_target_object: for obj_type, count in reversed(self.obj_type_counter.most_common()): instances_of_type = self.target_objects_in_scene.get(obj_type, []) @@ -321,7 +296,7 @@ def increment_scene(self) -> bool: # self.env.controller.step(action="DestroyHouse", raise_for_failure=True) self.env.controller.reset() self.env.list_of_actions_so_far = [] - self.house_entry = self.house_dataset[self.house_index] + self.house_entry = self.houses[self.house_index] self.house = pickle.loads(self.house_entry["house"]) if platform.system() == "Darwin": #TODO remove @@ -336,7 +311,7 @@ def increment_scene(self) -> bool: pose = self.house["metadata"]["agent"].copy() event = self.env.controller.step(action="TeleportFull", **pose) if not event: - # get_logger().warning(f"Initial teleport failing in {self.house_index}.") # clear logger noise + get_logger().warning(f"Initial teleport failing in {self.house_index}.") # clear logger noise return False #TODO this can mess FPS self.env.controller.step(action="MakeAllObjectsMoveable") self.env.controller.step(action="MakeObjectsStaticKinematicMassThreshold") @@ -353,10 +328,11 @@ def increment_scene(self) -> bool: return False reachable_positions = rp_event.metadata["actionReturn"] self.reachable_positions_map[self.house_index] = reachable_positions + return True def increment_scene_index(self): - self.house_inds_index = (self.house_inds_index + 1) % len(self.args_house_inds) + self.house_inds_index = (self.house_inds_index + 1) % len(self.house_inds) def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObjectNavTask]: if self.env is None: @@ -385,7 +361,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject while not self.increment_scene(): pass - if random.random() < 0.8: #TODO + if random.random() < self.env_args['p_randomize_material']: self.env.controller.step(action="RandomizeMaterials", raise_for_failure=True) else: self.env.controller.step(action="ResetMaterials", raise_for_failure=True) @@ -397,21 +373,25 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject ) # NOTE: Set agent pose - starting_pose = AgentPose( - position=random.choice(self.reachable_positions), - rotation=Vector3(x=0, y=random.choice(self.valid_rotations), z=0), - horizon=0, - standing=True, - ) - event = self.env.controller.step(action="TeleportFull", **starting_pose) - # if not event: - # get_logger().warning( - # f"Teleport failing in {self.house_index} at {starting_pose}" - # ) + event = None + attempts = 0 + while not event: + attempts+=1 + starting_pose = AgentPose( + position=random.choice(self.reachable_positions), + rotation=Vector3(x=0, y=random.choice(self.valid_rotations), z=0), + horizon=0, + standing=True, + ) + event = self.env.controller.step(action="TeleportFull", **starting_pose) + if attempts > 10: + get_logger().error(f"Teleport failed {attempts-1} times in house {self.house_index} - something may be wrong") + self.episode_index += 1 self.max_tasks -= 1 - + + # ForkedPdb().set_trace() self._last_sampled_task = self.TASK_TYPE( env=self.env, sensors=self.sensors, diff --git a/utils/procthor_utils/procthor_object_nav_tasks.py b/utils/procthor_utils/procthor_object_nav_tasks.py index e2e334b..7074cb5 100644 --- a/utils/procthor_utils/procthor_object_nav_tasks.py +++ b/utils/procthor_utils/procthor_object_nav_tasks.py @@ -1,3 +1,4 @@ +from ast import For import random from collections import Counter from typing import Any, Dict, List, Optional, Tuple, Union @@ -103,6 +104,7 @@ def __init__( ) self.observations = [self.env.last_event.frame] self._metrics = None + ForkedPdb().set_trace() def min_l2_distance_to_target(self) -> float: """Return the minimum distance to a target object. @@ -235,7 +237,7 @@ def shaping(self) -> float: max_reward_mag = position_dist(p0, p1, ignore_y=True) if ( - self.reward_config.positive_only_reward + self.reward_config['positive_only_reward'] and cur_distance is not None and cur_distance > 0.5 ): @@ -251,7 +253,7 @@ def shaping(self) -> float: min(reward, max_reward_mag), -max_reward_mag, ) - * self.reward_config.shaping_weight + * self.reward_config['shaping_weight'] ) def get_observations(self, **kwargs) -> Any: @@ -311,6 +313,7 @@ def metrics(self) -> Dict[str, Any]: def _step(self, action: int) -> RLStepResult: action_str = self.class_action_names()[action] + ForkedPdb().set_trace() if self.mirror: if action_str == "RotateRight": diff --git a/utils/stretch_utils/stretch_visualizer.py b/utils/stretch_utils/stretch_visualizer.py index d87969d..d8c3cf4 100644 --- a/utils/stretch_utils/stretch_visualizer.py +++ b/utils/stretch_utils/stretch_visualizer.py @@ -229,22 +229,21 @@ def finish_episode(self, environment, episode_info, task_info): def log(self, environment, action_str, obs): image_intel = environment.intel_frame - depth_intel = environment.intel_depth try: - image_kinect = environment.kinect_frame - depth_kinect = environment.kinect_depth - combined_frame = np.concatenate([image_intel, image_kinect, depth_to_rgb(depth_intel), depth_to_rgb(depth_kinect)],axis=1) - except: - if depth_intel is None or depth_kinect is None: - combined_frame = np.concatenate([image_intel, image_kinect],axis=1) - else: + depth_intel = environment.intel_depth + try: + image_kinect = environment.kinect_frame + depth_kinect = environment.kinect_depth + combined_frame = np.concatenate([image_intel, image_kinect, depth_to_rgb(depth_intel), depth_to_rgb(depth_kinect)],axis=1) + except: combined_frame = np.concatenate([image_intel, depth_to_rgb(depth_intel)],axis=1) + except: + combined_frame = np.concatenate([image_intel],axis=1) # image_intel = intel_reshape(image_intel) # kinect_frame = kinect_reshape(kinect_frame) self.action_queue.append(action_str) - # combined_frame = np.concatenate([image_intel, image_kinect, depth_to_rgb(depth_intel), depth_to_rgb(depth_kinect)],axis=1) for o in obs: if ("rgb" in o) and isinstance(obs[o], np.ndarray): viz_obs = unnormalize_clip_image(obs[o])*255 From e73c25c7b7aa97caa92bd5471457f6c61f75063e Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 24 May 2022 13:11:24 -0700 Subject: [PATCH 13/53] refactor 2 --- .../experiments/bring_object_thor_base.py | 2 - .../experiments/ithor/obj_nav_for_procthor.py | 53 ++++++++++++------- ...nav_for_procthor_clip_resnet50_rgb_only.py | 4 +- .../procthor_object_nav_task_samplers.py | 4 +- .../procthor_object_nav_tasks.py | 1 - 5 files changed, 38 insertions(+), 26 deletions(-) diff --git a/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py b/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py index 3ed619f..b310a12 100644 --- a/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py +++ b/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py @@ -129,8 +129,6 @@ def make_sampler_fn(cls, **kwargs) -> TaskSampler: viz(exp_name=exp_name_w_time) for viz in cls.POTENTIAL_VISUALIZERS ] kwargs["visualizers"] = visualizers - - ForkedPdb().set_trace() kwargs["objects"] = cls.OBJECT_TYPES kwargs["task_type"] = cls.TASK_TYPE kwargs["exp_name"] = exp_name_w_time diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py index 364be57..82105ab 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py @@ -96,7 +96,7 @@ def make_sampler_fn( ] kwargs["visualizers"] = visualizers kwargs["task_type"] = cls.TASK_TYPE - kwargs["cap_training"] = cls.CAP_TRAINING + # kwargs["cap_training"] = cls.CAP_TRAINING kwargs["exp_name"] = exp_name_w_time return cls.TASK_SAMPLER(**kwargs) @@ -113,7 +113,6 @@ def _get_sampler_args_for_scene_split( mode: Literal["train", "eval"], resample_same_scene_freq: int, allow_oversample: bool, - allow_flipping: bool, process_ind: int, total_processes: int, max_tasks: Optional[int], @@ -128,7 +127,8 @@ def _get_sampler_args_for_scene_split( " You can avoid this by setting a number of workers divisible by the number of scenes" ) house_inds = list(range(len(houses))) - if total_processes > len(houses): + valid_house_inds = [h for h in house_inds if h not in PROCTHOR_INVALID_SCENES] + if total_processes > len(valid_house_inds): if not allow_oversample: raise RuntimeError( f"Cannot have `total_processes > len(houses)`" @@ -137,21 +137,22 @@ def _get_sampler_args_for_scene_split( if total_processes % len(houses) != 0: get_logger().warning(oversample_warning) - house_inds = house_inds * ceil(total_processes / len(houses)) - house_inds = house_inds[ - : total_processes * (len(house_inds) // total_processes) + valid_house_inds = valid_house_inds * ceil(total_processes / len(houses)) + valid_house_inds = valid_house_inds[ + : total_processes * (len(valid_house_inds) // total_processes) ] elif len(houses) % total_processes != 0: get_logger().warning(oversample_warning) - inds = self._partition_inds(len(house_inds), total_processes) - house_inds = house_inds[inds[process_ind] : inds[process_ind + 1]] - house_inds = [h for h in house_inds if h not in PROCTHOR_INVALID_SCENES] + inds = self._partition_inds(len(valid_house_inds), total_processes) + selected_house_inds = valid_house_inds[inds[process_ind] : inds[process_ind + 1]] + + x_display = (("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None) return { - "mode": mode, + "sampler_mode": mode, "houses": houses, - "house_inds": house_inds, + "house_inds": selected_house_inds, "process_ind": process_ind, "env_args": self.ENV_ARGS, "max_steps": self.MAX_STEPS, @@ -161,19 +162,21 @@ def _get_sampler_args_for_scene_split( "rewards_config": self.REWARD_CONFIG, "target_object_types": self.OBJECT_TYPES, "max_tasks": max_tasks if max_tasks is not None else len(house_inds), - "allow_flipping": allow_flipping, "distance_type": self.DISTANCE_TYPE, "resample_same_scene_freq": resample_same_scene_freq, "scene_name": "Procedural" - } + }, x_display - def train_task_sampler_args(self, **kwargs) -> Dict[str, Any]: + def train_task_sampler_args( + self, + **kwargs + ) -> Dict[str, Any]: train_houses = self.HOUSE_DATASET["train"] if self.NUM_TRAIN_HOUSES: train_houses = train_houses.select(range(self.NUM_TRAIN_HOUSES)) - out = self._get_sampler_args_for_scene_split( + out,x_display = self._get_sampler_args_for_scene_split( houses=train_houses, mode="train", allow_oversample=True, @@ -183,20 +186,29 @@ def train_task_sampler_args(self, **kwargs) -> Dict[str, Any]: # extra_controller_args=dict(branch="nanna", scene="Procedural"), **kwargs, ) + out["cap_training"] = self.CAP_TRAINING + out["env_args"] = {} + out["env_args"].update(self.ENV_ARGS) + out["env_args"]["x_display"] = x_display + ForkedPdb().set_trace() return out def valid_task_sampler_args(self, **kwargs) -> Dict[str, Any]: val_houses = self.HOUSE_DATASET["validation"] - out = self._get_sampler_args_for_scene_split( + out,x_display = self._get_sampler_args_for_scene_split( houses=val_houses.select(range(100)), mode="eval", allow_oversample=False, max_tasks=10, - allow_flipping=False, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # extra_controller_args=dict(scene="Procedural"), **kwargs, ) + out["cap_training"] = self.CAP_TRAINING + out["env_args"] = {} + out["env_args"].update(self.ENV_ARGS) + out["env_args"]["x_display"] = x_display + return out def test_task_sampler_args(self, **kwargs) -> Dict[str, Any]: @@ -204,16 +216,19 @@ def test_task_sampler_args(self, **kwargs) -> Dict[str, Any]: return self.valid_task_sampler_args(**kwargs) test_houses = self.HOUSE_DATASET["test"] - out = self._get_sampler_args_for_scene_split( + out,x_display = self._get_sampler_args_for_scene_split( houses=test_houses.select(range(100)), mode="eval", allow_oversample=False, max_tasks=10, - allow_flipping=False, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # extra_controller_args=dict(scene="Procedural"), **kwargs, ) + out["cap_training"] = self.CAP_TRAINING + out["env_args"] = {} + out["env_args"].update(self.ENV_ARGS) + out["env_args"]["x_display"] = x_display return out diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index f84fcd7..1a5dbea 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -67,12 +67,12 @@ class ProcTHORObjectNavClipResnet50RGBOnly( NUM_PROCESSES = 40 - # NUM_TRAIN_HOUSES = 40 # set None or comment out for all + NUM_TRAIN_HOUSES = 500 # set None or comment out for all MAX_STEPS = 500 if platform.system() == "Darwin": MAX_STEPS = 10 NUM_TRAIN_HOUSES = 40 - VISUALIZE = False + # VISUALIZE = False def __init__(self): super().__init__() diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index cec1dc4..49254b1 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -51,7 +51,6 @@ def __init__( max_tasks: Optional[int] = None, seed: Optional[int] = None, deterministic_cudnn: bool = False, - allow_flipping: bool = False, visualizers: List[LoggerVisualizer] = [], *args, **kwargs @@ -80,7 +79,7 @@ def __init__( self.visualizers = visualizers - self.mode = kwargs["mode"] + self.sampler_mode = kwargs["sampler_mode"] self.cap_training = kwargs["cap_training"] self.episode_index = 0 @@ -413,6 +412,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject "mirrored": self.env_args['allow_flipping'] and random.random() > 0.5, }, ) + # ForkedPdb().set_trace() return self._last_sampled_task def reset(self): diff --git a/utils/procthor_utils/procthor_object_nav_tasks.py b/utils/procthor_utils/procthor_object_nav_tasks.py index 7074cb5..bd217e3 100644 --- a/utils/procthor_utils/procthor_object_nav_tasks.py +++ b/utils/procthor_utils/procthor_object_nav_tasks.py @@ -104,7 +104,6 @@ def __init__( ) self.observations = [self.env.last_event.frame] self._metrics = None - ForkedPdb().set_trace() def min_l2_distance_to_target(self) -> float: """Return the minimum distance to a target object. From 67612ffc43ee17940733543e0ff686180d23e25f Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 24 May 2022 13:34:48 -0700 Subject: [PATCH 14/53] refactor 3 - complete 2camera --- .../ithor/obj_nav_2camera_procthor_wide.py | 11 ----------- .../experiments/ithor/obj_nav_for_procthor.py | 6 +++--- utils/procthor_utils/procthor_object_nav_tasks.py | 2 -- 3 files changed, 3 insertions(+), 16 deletions(-) diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py index 5e3cfbf..66c4e29 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py @@ -155,17 +155,6 @@ def create_model(cls, **kwargs) -> nn.Module: ) resnet_preprocessor_uuids = ["rgb_clip_resnet","rgb_clip_resnet_arm"] - # return ResnetTensorNavActorCritic( - # action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - # goal_sensor_uuid=goal_sensor_uuid, - # rgb_resnet_preprocessor_uuid="rgb_clip_resnet", - # depth_resnet_preprocessor_uuid="rgb_clip_resnet_arm", # a convenient lie - can't use with a depth sensor too - # hidden_size=512, - # goal_dims=32, - # add_prev_actions=True, - # ) - return ResnetTensorNavNCameraActorCritic( action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py index 82105ab..96b7a1b 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py @@ -181,7 +181,6 @@ def train_task_sampler_args( mode="train", allow_oversample=True, max_tasks=float("inf"), - allow_flipping=self.ENV_ARGS['allow_flipping'], resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN, # extra_controller_args=dict(branch="nanna", scene="Procedural"), **kwargs, @@ -190,7 +189,6 @@ def train_task_sampler_args( out["env_args"] = {} out["env_args"].update(self.ENV_ARGS) out["env_args"]["x_display"] = x_display - ForkedPdb().set_trace() return out def valid_task_sampler_args(self, **kwargs) -> Dict[str, Any]: @@ -206,7 +204,8 @@ def valid_task_sampler_args(self, **kwargs) -> Dict[str, Any]: ) out["cap_training"] = self.CAP_TRAINING out["env_args"] = {} - out["env_args"].update(self.ENV_ARGS) + out["env_args"].update(self.ENV_ARGS) + out["env_args"]['allow_flipping'] = False out["env_args"]["x_display"] = x_display return out @@ -228,6 +227,7 @@ def test_task_sampler_args(self, **kwargs) -> Dict[str, Any]: out["cap_training"] = self.CAP_TRAINING out["env_args"] = {} out["env_args"].update(self.ENV_ARGS) + out["env_args"]['allow_flipping'] = False out["env_args"]["x_display"] = x_display return out diff --git a/utils/procthor_utils/procthor_object_nav_tasks.py b/utils/procthor_utils/procthor_object_nav_tasks.py index bd217e3..5ca5b15 100644 --- a/utils/procthor_utils/procthor_object_nav_tasks.py +++ b/utils/procthor_utils/procthor_object_nav_tasks.py @@ -26,7 +26,6 @@ ROTATE_LEFT_SMALL ) from ithor_arm.ithor_arm_environment import ManipulaTHOREnvironment -# from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment from manipulathor_utils.debugger_util import ForkedPdb @@ -312,7 +311,6 @@ def metrics(self) -> Dict[str, Any]: def _step(self, action: int) -> RLStepResult: action_str = self.class_action_names()[action] - ForkedPdb().set_trace() if self.mirror: if action_str == "RotateRight": From ca86f82d9ea8fa28ef7ddc3542fc0023389382c1 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 24 May 2022 14:42:59 -0700 Subject: [PATCH 15/53] refactor 4 - cleanup and condense --- .../ithor/obj_nav_2camera_procthor_wide.py | 4 +--- .../experiments/ithor/obj_nav_for_procthor.py | 17 +++++++++++------ .../procthor_object_nav_task_samplers.py | 4 ---- .../procthor_utils/procthor_object_nav_tasks.py | 2 -- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py index 66c4e29..0481ec2 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py @@ -102,7 +102,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( NUM_PROCESSES = 50 NUM_PROCESSES = 40 - # NUM_TRAIN_HOUSES = 40 + NUM_TRAIN_HOUSES = 50 def __init__(self): @@ -112,10 +112,8 @@ def __init__(self): self.ENV_ARGS['p_randomize_material'] = 0.8 self.ENV_ARGS['visibilityDistance'] = self.distance_thr self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - self.ENV_ARGS['scene'] = 'Procedural' self.ENV_ARGS['renderInstanceSegmentation'] = False self.ENV_ARGS['renderDepthImage'] = False - self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID self.ENV_ARGS['allow_flipping'] = False diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py index 96b7a1b..6505489 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py @@ -1,5 +1,6 @@ from typing import Any, Dict, List, Optional, Sequence from typing_extensions import Literal, final +import platform import datasets import numpy as np @@ -67,6 +68,10 @@ class ProcTHORObjectNavBaseConfig(ObjectNavBaseConfig): RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN = ( -1 ) # Should be > 0 if `ADVANCE_SCENE_ROLLOUT_PERIOD` is `None` + + if platform.system() == "Darwin": + RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN = 1 + RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 1 # TODO apparently this won't work with 100 (why?) HOUSE_DATASET = datasets.load_dataset( @@ -96,7 +101,6 @@ def make_sampler_fn( ] kwargs["visualizers"] = visualizers kwargs["task_type"] = cls.TASK_TYPE - # kwargs["cap_training"] = cls.CAP_TRAINING kwargs["exp_name"] = exp_name_w_time return cls.TASK_SAMPLER(**kwargs) @@ -119,7 +123,6 @@ def _get_sampler_args_for_scene_split( devices: Optional[List[int]] = None, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, - extra_controller_args: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: # NOTE: oversample some scenes -> bias oversample_warning = ( @@ -182,13 +185,14 @@ def train_task_sampler_args( allow_oversample=True, max_tasks=float("inf"), resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN, - # extra_controller_args=dict(branch="nanna", scene="Procedural"), **kwargs, ) out["cap_training"] = self.CAP_TRAINING out["env_args"] = {} out["env_args"].update(self.ENV_ARGS) out["env_args"]["x_display"] = x_display + out["env_args"]['commit_id'] = PROCTHOR_COMMIT_ID + out["env_args"]['scene'] = 'Procedural' return out def valid_task_sampler_args(self, **kwargs) -> Dict[str, Any]: @@ -199,7 +203,6 @@ def valid_task_sampler_args(self, **kwargs) -> Dict[str, Any]: allow_oversample=False, max_tasks=10, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, - # extra_controller_args=dict(scene="Procedural"), **kwargs, ) out["cap_training"] = self.CAP_TRAINING @@ -207,7 +210,8 @@ def valid_task_sampler_args(self, **kwargs) -> Dict[str, Any]: out["env_args"].update(self.ENV_ARGS) out["env_args"]['allow_flipping'] = False out["env_args"]["x_display"] = x_display - + out["env_args"]['commit_id'] = PROCTHOR_COMMIT_ID + out["env_args"]['scene'] = 'Procedural' return out def test_task_sampler_args(self, **kwargs) -> Dict[str, Any]: @@ -221,7 +225,6 @@ def test_task_sampler_args(self, **kwargs) -> Dict[str, Any]: allow_oversample=False, max_tasks=10, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, - # extra_controller_args=dict(scene="Procedural"), **kwargs, ) out["cap_training"] = self.CAP_TRAINING @@ -229,6 +232,8 @@ def test_task_sampler_args(self, **kwargs) -> Dict[str, Any]: out["env_args"].update(self.ENV_ARGS) out["env_args"]['allow_flipping'] = False out["env_args"]["x_display"] = x_display + out["env_args"]['commit_id'] = PROCTHOR_COMMIT_ID + out["env_args"]['scene'] = 'Procedural' return out diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index 49254b1..fd67c9b 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -14,7 +14,6 @@ import datasets import numpy as np from ai2thor.controller import Controller -# from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import Sensor from allenact.base_abstractions.task import Task, TaskSampler from allenact.utils.cache_utils import DynamicDistanceCache @@ -22,7 +21,6 @@ from allenact.utils.system import get_logger from ithor_arm.ithor_arm_viz import LoggerVisualizer -# from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment from utils.procthor_utils.procthor_types import AgentPose, Vector3 from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask @@ -390,7 +388,6 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject self.episode_index += 1 self.max_tasks -= 1 - # ForkedPdb().set_trace() self._last_sampled_task = self.TASK_TYPE( env=self.env, sensors=self.sensors, @@ -412,7 +409,6 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject "mirrored": self.env_args['allow_flipping'] and random.random() > 0.5, }, ) - # ForkedPdb().set_trace() return self._last_sampled_task def reset(self): diff --git a/utils/procthor_utils/procthor_object_nav_tasks.py b/utils/procthor_utils/procthor_object_nav_tasks.py index 5ca5b15..33b8b83 100644 --- a/utils/procthor_utils/procthor_object_nav_tasks.py +++ b/utils/procthor_utils/procthor_object_nav_tasks.py @@ -1,6 +1,4 @@ -from ast import For import random -from collections import Counter from typing import Any, Dict, List, Optional, Tuple, Union from typing_extensions import Literal From 1430f219539b1385ece4d5a34b6d4477aa815422 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 24 May 2022 15:34:41 -0700 Subject: [PATCH 16/53] refactor 5 - update and test other experiment configs (narrow, robothor, ithor) --- .../ithor/obj_nav_2camera_ithor_wide.py | 35 ++------- .../ithor/obj_nav_2camera_procthor_narrow.py | 34 ++------- .../ithor/obj_nav_2camera_procthor_wide.py | 2 +- .../ithor/obj_nav_2camera_robothor_narrow.py | 36 ++-------- ...nav_for_procthor_clip_resnet50_rgb_only.py | 72 ++++++------------- .../all_rooms_object_nav_task_sampler.py | 1 + 6 files changed, 43 insertions(+), 137 deletions(-) diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py index 1222f03..574af82 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py @@ -16,10 +16,10 @@ from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig +from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_base_config import ObjectNavBaseConfig from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS +from utils.stretch_utils.stretch_constants import STRETCH_MANIPULATHOR_COMMIT_ID, STRETCH_ENV_ARGS from manipulathor_utils.debugger_util import ForkedPdb from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor @@ -29,14 +29,10 @@ from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer from ithor_arm.ithor_arm_viz import TestMetricLogger -from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL -from ithor_arm.ithor_arm_constants import TRAIN_OBJECTS, TEST_OBJECTS -from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic - class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( - ProcTHORObjectNavBaseConfig + ObjectNavBaseConfig ): """An Object Navigation experiment configuration in iThor with RGB input.""" @@ -54,16 +50,12 @@ class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( for i in range(26, 31) ] - ALL_SCENES = TRAIN_SCENES + TEST_SCENES + VALID_SCENES # OBJECT_TYPES = tuple(sorted(TRAIN_OBJECTS)) with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: OBJECT_TYPES=yaml.safe_load(f) - # UNSEEN_OBJECT_TYPES = tuple(sorted(TEST_OBJECTS)) - - NOISE_LEVEL = 0 distance_thr = 1.0 # match procthor config mean = np.array([0.48145466, 0.4578275, 0.40821073]) @@ -104,19 +96,12 @@ class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( def __init__(self): super().__init__() - self.REWARD_CONFIG['goal_success_reward'] = 10.0 - self.REWARD_CONFIG['step_penalty'] = -0.01 - self.REWARD_CONFIG['failed_stop_reward'] = 0.0 - self.REWARD_CONFIG['reached_horizon_reward'] = 0.0 - self.REWARD_CONFIG['shaping_weight'] = 1.0 self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) self.ENV_ARGS['visibilityDistance'] = self.distance_thr self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - # self.ENV_ARGS['scene'] = 'Procedural' - self.ENV_ARGS['renderInstanceSegmentation'] = 'False' - self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID - self.ENV_ARGS['allow_flipping'] = True + self.ENV_ARGS['renderInstanceSegmentation'] = False + self.ENV_ARGS['commit_id'] = STRETCH_MANIPULATHOR_COMMIT_ID @classmethod @@ -163,16 +148,6 @@ def create_model(cls, **kwargs) -> nn.Module: goal_dims=32, add_prev_actions=True, ) - # return ResnetTensorNavActorCritic( - # action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - # goal_sensor_uuid=goal_sensor_uuid, - # rgb_resnet_preprocessor_uuid="rgb_clip_resnet", - # depth_resnet_preprocessor_uuid="rgb_clip_resnet_arm", # a convenient lie - can't use with a depth sensor too - # hidden_size=512, - # goal_dims=32, - # add_prev_actions=True, - # ) @classmethod def tag(cls): diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py index f097c55..b71e344 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py @@ -77,31 +77,20 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV( POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] - NUM_PROCESSES = 56 - - TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(9999)] # 9999 is all of train - if platform.system() == "Darwin": - TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)] - - TEST_SCENES = [f'ProcTHOR{i}' for i in range(100)] - random.shuffle(TRAIN_SCENES) + NUM_PROCESSES = 40 + # NUM_TRAIN_HOUSES = 50 def __init__(self): super().__init__() - self.REWARD_CONFIG['goal_success_reward'] = 10.0 - self.REWARD_CONFIG['step_penalty'] = -0.01 - self.REWARD_CONFIG['failed_stop_reward'] = 0.0 - self.REWARD_CONFIG['reached_horizon_reward'] = 0.0 - self.REWARD_CONFIG['shaping_weight'] = 1.0 self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) + self.ENV_ARGS['p_randomize_material'] = 0.8 self.ENV_ARGS['visibilityDistance'] = self.distance_thr self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - self.ENV_ARGS['scene'] = 'Procedural' - self.ENV_ARGS['renderInstanceSegmentation'] = 'False' - self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID - self.ENV_ARGS['allow_flipping'] = True + self.ENV_ARGS['renderInstanceSegmentation'] = False + self.ENV_ARGS['renderDepthImage'] = False + self.ENV_ARGS['allow_flipping'] = False @classmethod @@ -149,17 +138,6 @@ def create_model(cls, **kwargs) -> nn.Module: goal_dims=32, add_prev_actions=True, ) - # return ResnetTensorNavActorCritic( - # action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - # goal_sensor_uuid=goal_sensor_uuid, - # rgb_resnet_preprocessor_uuid="rgb_clip_resnet", - # depth_resnet_preprocessor_uuid="rgb_clip_resnet_arm", # a convenient lie - can't use with a depth sensor too - # hidden_size=512, - # goal_dims=32, - # add_prev_actions=True, - # ) - @classmethod def tag(cls): diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py index 0481ec2..8638de3 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py @@ -102,7 +102,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( NUM_PROCESSES = 50 NUM_PROCESSES = 40 - NUM_TRAIN_HOUSES = 50 + # NUM_TRAIN_HOUSES = 50 #TODO: house > num_proc oversampling not implemented yet def __init__(self): diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py index 4ee0dd3..c9343e0 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py @@ -9,17 +9,16 @@ import yaml import copy -from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig +from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_base_config import ObjectNavBaseConfig from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS +from utils.stretch_utils.stretch_constants import STRETCH_MANIPULATHOR_COMMIT_ID, STRETCH_ENV_ARGS from manipulathor_utils.debugger_util import ForkedPdb from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor @@ -28,14 +27,13 @@ from allenact.utils.experiment_utils import Builder from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer from ithor_arm.ithor_arm_viz import TestMetricLogger -from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL class RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOV( - ProcTHORObjectNavBaseConfig + ObjectNavBaseConfig ): """An Object Navigation experiment configuration in iThor with RGB input.""" @@ -86,28 +84,18 @@ class RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOV( TASK_TYPE = StretchObjectNavTask ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] - VISUALIZE = False - - - NUM_PROCESSES = 56 + NUM_PROCESSES = 40 def __init__(self): super().__init__() - self.REWARD_CONFIG['goal_success_reward'] = 10.0 - self.REWARD_CONFIG['step_penalty'] = -0.01 - self.REWARD_CONFIG['failed_stop_reward'] = 0.0 - self.REWARD_CONFIG['reached_horizon_reward'] = 0.0 - self.REWARD_CONFIG['shaping_weight'] = 1.0 self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) self.ENV_ARGS['visibilityDistance'] = self.distance_thr self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - # self.ENV_ARGS['scene'] = 'Procedural' - self.ENV_ARGS['renderInstanceSegmentation'] = 'False' - self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID - self.ENV_ARGS['allow_flipping'] = True + self.ENV_ARGS['renderInstanceSegmentation'] = False + self.ENV_ARGS['commit_id'] = STRETCH_MANIPULATHOR_COMMIT_ID @classmethod @@ -153,17 +141,7 @@ def create_model(cls, **kwargs) -> nn.Module: hidden_size=512, goal_dims=32, add_prev_actions=True, - ) - # return ResnetTensorNavActorCritic( - # action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - # goal_sensor_uuid=goal_sensor_uuid, - # rgb_resnet_preprocessor_uuid="rgb_clip_resnet", - # depth_resnet_preprocessor_uuid="rgb_clip_resnet_arm", # a convenient lie - can't use with a depth sensor too - # hidden_size=512, - # goal_dims=32, - # add_prev_actions=True, - # ) + ) @classmethod def tag(cls): diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index 1a5dbea..bec26ac 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -58,7 +58,9 @@ class ProcTHORObjectNavClipResnet50RGBOnly( ), ] - + MAX_STEPS = 500 + if platform.system() == "Darwin": + MAX_STEPS = 100 TASK_SAMPLER = ProcTHORObjectNavTaskSampler TASK_TYPE = StretchObjectNavTask @@ -67,12 +69,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( NUM_PROCESSES = 40 - NUM_TRAIN_HOUSES = 500 # set None or comment out for all - MAX_STEPS = 500 - if platform.system() == "Darwin": - MAX_STEPS = 10 - NUM_TRAIN_HOUSES = 40 - # VISUALIZE = False + # NUM_TRAIN_HOUSES = 50 def __init__(self): super().__init__() @@ -81,10 +78,8 @@ def __init__(self): self.ENV_ARGS['p_randomize_material'] = 0.8 self.ENV_ARGS['visibilityDistance'] = self.distance_thr self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - self.ENV_ARGS['scene'] = 'Procedural' self.ENV_ARGS['renderInstanceSegmentation'] = False - self.ENV_ARGS['renderDepthImage'] = False - self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID + self.ENV_ARGS['renderDepthImage'] = False self.ENV_ARGS['allow_flipping'] = False @@ -92,60 +87,39 @@ def __init__(self): @final def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: preprocessors = [] - rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None) - - if rgb_sensor is not None: - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid=rgb_sensor.uuid, - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet", - visualize=cls.VISUALIZE - ) + # rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None) + + preprocessors.append( + ClipResNetPreprocessor( + rgb_input_uuid="rgb_lowres", + clip_model_type="RN50", + pool=False, + output_uuid="rgb_clip_resnet", + visualize=cls.VISUALIZE ) - + ) return preprocessors - # @classmethod - # @final - # def create_model(cls, **kwargs) -> nn.Module: - # goal_sensor_uuid = next( - # (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), - # None, - # ) - # resnet_preprocessor_uuids = ["rgb_clip_resnet"] - - # return ResnetTensorNavNCameraActorCritic( - # action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - # goal_sensor_uuid=goal_sensor_uuid, - # resnet_preprocessor_uuids=resnet_preprocessor_uuids, - # hidden_size=512, - # goal_dims=32, - # add_prev_actions=True, - # ) + @classmethod + @final def create_model(cls, **kwargs) -> nn.Module: - has_rgb = any(isinstance(s, RGBSensorThor) for s in cls.SENSORS) - has_depth = False #any(isinstance(s, DepthSensor) for s in cls.SENSORS) - goal_sensor_uuid = next( (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), None, ) + resnet_preprocessor_uuids = ["rgb_clip_resnet"] - return ResnetTensorNavActorCritic( - action_space=gym.spaces.Discrete(len(ObjectNavTask.class_action_names())), + return ResnetTensorNavNCameraActorCritic( + action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid=goal_sensor_uuid, - rgb_resnet_preprocessor_uuid="rgb_clip_resnet" if has_rgb else None, - depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None, + resnet_preprocessor_uuids=resnet_preprocessor_uuids, hidden_size=512, goal_dims=32, - add_prev_actions=True #cfg.model.add_prev_actions_embedding, + add_prev_actions=True, ) @classmethod def tag(cls): - return cls.__name__ + return cls.__name__ diff --git a/utils/procthor_utils/all_rooms_object_nav_task_sampler.py b/utils/procthor_utils/all_rooms_object_nav_task_sampler.py index 76dab88..1aaae8f 100644 --- a/utils/procthor_utils/all_rooms_object_nav_task_sampler.py +++ b/utils/procthor_utils/all_rooms_object_nav_task_sampler.py @@ -340,6 +340,7 @@ def get_full_object_info(env, object_type): reward_config=self.rewards_config, distance_type=self.distance_type, distance_cache=self.distance_cache, + additional_visualize=False # not implemented for non-procthor ) return self._last_sampled_task From aab3d725bacccd3739570b8afd5b32e729fde3bf Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 24 May 2022 15:56:02 -0700 Subject: [PATCH 17/53] incorporate kiana changs to viz --- .../ithor/obj_nav_2camera_ithor_wide.py | 18 ++++++++++++++++++ .../ithor/obj_nav_2camera_procthor_narrow.py | 19 +++++++++++++++++++ .../ithor/obj_nav_2camera_procthor_wide.py | 8 +++++--- .../ithor/obj_nav_2camera_robothor_narrow.py | 18 ++++++++++++++++++ .../models/clip_objnav_ncamera_model.py | 8 +++++++- .../models/clip_preprocessors.py | 2 +- 6 files changed, 68 insertions(+), 5 deletions(-) diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py index 574af82..0da05c3 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py @@ -85,6 +85,24 @@ class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( MAX_STEPS = 500 if platform.system() == "Darwin": MAX_STEPS = 100 + SENSORS += [ + RGBSensorStretchKinectBigFov( + height=224, + width=224, + use_resnet_normalization=True, + mean=mean, + stdev=stdev, + uuid="rgb_lowres_arm_only_viz", + ), + RGBSensorThor( + height=224, + width=224, + use_resnet_normalization=True, + mean=mean, + stdev=stdev, + uuid="rgb_lowres_only_viz", + ), + ] TASK_SAMPLER = AllRoomsObjectNavTaskSampler TASK_TYPE = StretchObjectNavTask diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py index b71e344..9afd758 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py @@ -70,6 +70,25 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV( MAX_STEPS = 500 if platform.system() == "Darwin": MAX_STEPS = 100 + SENSORS += [ + RGBSensorStretchKinect( + height=224, + width=224, + use_resnet_normalization=True, + mean=mean, + stdev=stdev, + uuid="rgb_lowres_arm_only_viz", + ), + RGBSensorStretchIntel( + height=224, + width=224, + use_resnet_normalization=True, + mean=mean, + stdev=stdev, + uuid="rgb_lowres_only_viz", + ), + ] + TASK_SAMPLER = ProcTHORObjectNavTaskSampler TASK_TYPE = StretchObjectNavTask diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py index 8638de3..76c5ce9 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py @@ -73,8 +73,12 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( ] MAX_STEPS = 500 + NUM_PROCESSES = 40 + # NUM_TRAIN_HOUSES = 50 #TODO: house > num_proc oversampling not implemented yet + if platform.system() == "Darwin": - MAX_STEPS = 10 + MAX_STEPS = 100 + NUM_TRAIN_HOUSES = 100 SENSORS += [ RGBSensorStretchKinectBigFov( height=224, @@ -101,8 +105,6 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( NUM_PROCESSES = 50 - NUM_PROCESSES = 40 - # NUM_TRAIN_HOUSES = 50 #TODO: house > num_proc oversampling not implemented yet def __init__(self): diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py index c9343e0..7ee002b 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py @@ -79,6 +79,24 @@ class RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOV( MAX_STEPS = 500 if platform.system() == "Darwin": MAX_STEPS = 500 + SENSORS += [ + RGBSensorStretchKinect( + height=224, + width=224, + use_resnet_normalization=True, + mean=mean, + stdev=stdev, + uuid="rgb_lowres_arm_only_viz", + ), + RGBSensorStretchIntel( + height=224, + width=224, + use_resnet_normalization=True, + mean=mean, + stdev=stdev, + uuid="rgb_lowres_only_viz", + ), + ] TASK_SAMPLER = AllRoomsObjectNavTaskSampler TASK_TYPE = StretchObjectNavTask diff --git a/manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py b/manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py index 4295c91..c905e57 100644 --- a/manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py +++ b/manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py @@ -6,6 +6,8 @@ import platform from datetime import datetime from typing import Tuple, Optional, List, Dict, cast +import platform +from datetime import datetime import gym import torch @@ -21,7 +23,6 @@ from manipulathor_utils.debugger_util import ForkedPdb from utils.hacky_viz_utils import hacky_visualization - class ResnetTensorNavNCameraActorCritic(VisualNavActorCritic): def __init__( # base params @@ -95,6 +96,11 @@ def forward(self, **kwargs): #TODO NOW remove hacky_visualization(kwargs['observations'], base_directory_to_right_images=self.starting_time) return super(ResnetTensorNavNCameraActorCritic, self).forward(**kwargs) + def forward(self, **kwargs): #TODO NOW remove + if platform.system() == "Darwin": + hacky_visualization(kwargs['observations'], base_directory_to_right_images=self.starting_time) + return super(ResnetTensorNavNCameraActorCritic, self).forward(**kwargs) + class ResnetNCameraTensorGoalEncoder(nn.Module): def __init__( diff --git a/manipulathor_baselines/procthor_baselines/models/clip_preprocessors.py b/manipulathor_baselines/procthor_baselines/models/clip_preprocessors.py index d7bcfc2..b676ab3 100644 --- a/manipulathor_baselines/procthor_baselines/models/clip_preprocessors.py +++ b/manipulathor_baselines/procthor_baselines/models/clip_preprocessors.py @@ -10,7 +10,7 @@ from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.misc_utils import prepare_locals_for_super from manipulathor_utils.debugger_util import ForkedPdb -from utils.hacky_viz_utils import hacky_visualization +# from utils.hacky_viz_utils import hacky_visualization from datetime import datetime From 26b5eb32e13663c151c163b3b996925d0ebc52e0 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Wed, 25 May 2022 13:24:15 -0700 Subject: [PATCH 18/53] fix cherry-pick --- .../ithor/obj_nav_2camera_ithor_wide.py | 100 ++++----- .../ithor/obj_nav_2camera_procthor_narrow.py | 147 ++----------- .../ithor/obj_nav_2camera_procthor_wide.py | 134 ++---------- .../ithor/obj_nav_2camera_robothor_narrow.py | 132 ++---------- .../experiments/ithor/obj_nav_base_config.py | 5 + .../experiments/ithor/obj_nav_for_procthor.py | 100 +++------ ...nav_for_procthor_clip_resnet50_rgb_only.py | 87 +++----- .../ithor/obj_nav_for_procthor_rgb_only.py | 113 ---------- .../ithor/obj_nav_for_robothor_rgb_only.py | 153 -------------- .../models/clip_objnav_ncamera_model.py | 4 +- .../clip_resnet_ncamera_preprocess_mixin.py | 81 +++++++ .../models/objnav_model_rgb_only.py | 199 ------------------ 12 files changed, 254 insertions(+), 1001 deletions(-) delete mode 100644 manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_rgb_only.py delete mode 100644 manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_robothor_rgb_only.py create mode 100644 manipulathor_baselines/procthor_baselines/models/clip_resnet_ncamera_preprocess_mixin.py delete mode 100644 manipulathor_baselines/procthor_baselines/models/objnav_model_rgb_only.py diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py index 0da05c3..75b70ef 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py @@ -22,8 +22,10 @@ from utils.stretch_utils.stretch_constants import STRETCH_MANIPULATHOR_COMMIT_ID, STRETCH_ENV_ARGS from manipulathor_utils.debugger_util import ForkedPdb -from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic +# from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor +from manipulathor_baselines.procthor_baselines.models.clip_resnet_ncamera_preprocess_mixin import \ + ClipResNetPreprocessNCameraGRUActorCriticMixin +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer @@ -62,19 +64,19 @@ class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( stdev = np.array([0.26862954, 0.26130258, 0.27577711]) SENSORS = [ RGBSensorThor( - height=224, - width=224, + height=ObjectNavBaseConfig.SCREEN_SIZE, + width=ObjectNavBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, - mean=mean, - stdev=stdev, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres", ), RGBSensorStretchKinectBigFov( - height=224, - width=224, + height=ObjectNavBaseConfig.SCREEN_SIZE, + width=ObjectNavBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, - mean=mean, - stdev=stdev, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres_arm", ), GoalObjectTypeThorSensor( @@ -87,19 +89,19 @@ class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( MAX_STEPS = 100 SENSORS += [ RGBSensorStretchKinectBigFov( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, + height=ObjectNavBaseConfig.SCREEN_SIZE, + width=ObjectNavBaseConfig.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres_arm_only_viz", ), RGBSensorThor( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, + height=ObjectNavBaseConfig.SCREEN_SIZE, + width=ObjectNavBaseConfig.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres_only_viz", ), ] @@ -109,8 +111,9 @@ class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] - NUM_PROCESSES = 40 + CLIP_MODEL_TYPE = "RN50" + def __init__(self): super().__init__() @@ -120,52 +123,23 @@ def __init__(self): self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice self.ENV_ARGS['renderInstanceSegmentation'] = False self.ENV_ARGS['commit_id'] = STRETCH_MANIPULATHOR_COMMIT_ID + + self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin( + sensors=self.SENSORS, + clip_model_type=self.CLIP_MODEL_TYPE, + screen_size=self.SCREEN_SIZE, + ) + def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: + return self.preprocessing_and_model.preprocessors() - @classmethod - @final - def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: - preprocessors = [] - # rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None) - - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet", - visualize=cls.VISUALIZE - ) - ) - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres_arm", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet_arm", - visualize=cls.VISUALIZE - ) + + def create_model(self, **kwargs) -> nn.Module: + return self.preprocessing_and_model.create_model( + num_actions=len(self.TASK_TYPE.class_action_names()), **kwargs, + visualize=self.VISUALIZE ) - return preprocessors - @classmethod - @final - def create_model(cls, **kwargs) -> nn.Module: - goal_sensor_uuid = next( - (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), - None, - ) - resnet_preprocessor_uuids = ["rgb_clip_resnet","rgb_clip_resnet_arm"] - - return ResnetTensorNavNCameraActorCritic( - action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - goal_sensor_uuid=goal_sensor_uuid, - resnet_preprocessor_uuids=resnet_preprocessor_uuids, - hidden_size=512, - goal_dims=32, - add_prev_actions=True, - ) @classmethod def tag(cls): diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py index 9afd758..57cb9e6 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py @@ -1,39 +1,15 @@ import platform -import random -from typing import Sequence, Union -from typing_extensions import final - -import gym -import numpy as np -from torch import nn import yaml -import copy -from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor - -from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment - -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig -from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS -from manipulathor_utils.debugger_util import ForkedPdb - -from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic -from allenact.base_abstractions.preprocessor import Preprocessor -from allenact.utils.experiment_utils import Builder -from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer -from ithor_arm.ithor_arm_viz import TestMetricLogger -from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic - +from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor_clip_resnet50_rgb_only import ProcTHORObjectNavClipResnet50RGBOnly +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor class ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV( - ProcTHORObjectNavBaseConfig + ProcTHORObjectNavClipResnet50RGBOnly ): """An Object Navigation experiment configuration in iThor with RGB input.""" @@ -41,25 +17,21 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV( with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: OBJECT_TYPES=yaml.safe_load(f) - NOISE_LEVEL = 0 - distance_thr = 1.0 # match procthor config - mean = np.array([0.48145466, 0.4578275, 0.40821073]) - stdev = np.array([0.26862954, 0.26130258, 0.27577711]) SENSORS = [ RGBSensorStretchIntel( - height=224, - width=224, + height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, use_resnet_normalization=True, - mean=mean, - stdev=stdev, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres", ), RGBSensorStretchKinect( - height=224, - width=224, + height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, use_resnet_normalization=True, - mean=mean, - stdev=stdev, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres_arm", ), GoalObjectTypeThorSensor( @@ -67,99 +39,22 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV( ), ] - MAX_STEPS = 500 if platform.system() == "Darwin": - MAX_STEPS = 100 SENSORS += [ RGBSensorStretchKinect( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, + height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres_arm_only_viz", ), RGBSensorStretchIntel( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, + height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres_only_viz", ), ] - - - TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = StretchObjectNavTask - ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment - POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] - - - NUM_PROCESSES = 40 - # NUM_TRAIN_HOUSES = 50 - - - def __init__(self): - super().__init__() - - self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) - self.ENV_ARGS['p_randomize_material'] = 0.8 - self.ENV_ARGS['visibilityDistance'] = self.distance_thr - self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - self.ENV_ARGS['renderInstanceSegmentation'] = False - self.ENV_ARGS['renderDepthImage'] = False - self.ENV_ARGS['allow_flipping'] = False - - - @classmethod - @final - def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: - preprocessors = [] - # rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None) - - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet", - visualize=cls.VISUALIZE - ) - ) - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres_arm", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet_arm", - visualize=cls.VISUALIZE - ) - ) - return preprocessors - - - @classmethod - @final - def create_model(cls, **kwargs) -> nn.Module: - goal_sensor_uuid = next( - (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), - None, - ) - resnet_preprocessor_uuids = ["rgb_clip_resnet","rgb_clip_resnet_arm"] - - return ResnetTensorNavNCameraActorCritic( - action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - goal_sensor_uuid=goal_sensor_uuid, - resnet_preprocessor_uuids=resnet_preprocessor_uuids, - hidden_size=512, - goal_dims=32, - add_prev_actions=True, - ) - - @classmethod - def tag(cls): - return cls.__name__ - - diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py index 76c5ce9..6cb797b 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py @@ -1,13 +1,5 @@ import platform -import random -from typing import Sequence, Union -from typing_extensions import final - -import gym -import numpy as np -from torch import nn import yaml -import copy from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor @@ -15,27 +7,16 @@ from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchKinectBigFov from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor - -from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment - -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig -from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler +from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor_clip_resnet50_rgb_only import ProcTHORObjectNavClipResnet50RGBOnly from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS from manipulathor_utils.debugger_util import ForkedPdb -from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic -from allenact.base_abstractions.preprocessor import Preprocessor -from allenact.utils.experiment_utils import Builder -from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer -from ithor_arm.ithor_arm_viz import TestMetricLogger -from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( - ProcTHORObjectNavBaseConfig + ProcTHORObjectNavClipResnet50RGBOnly ): """An Object Navigation experiment configuration in iThor with RGB input.""" @@ -43,27 +24,22 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: OBJECT_TYPES=yaml.safe_load(f) - NOISE_LEVEL = 0 - distance_thr = 1.0 # match procthor config - mean = np.array([0.48145466, 0.4578275, 0.40821073]) - stdev = np.array([0.26862954, 0.26130258, 0.27577711]) - #TODO use resnet clip normalizations? SENSORS = [ RGBSensorThor( - height=224, - width=224, + height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, use_resnet_normalization=True, - mean=mean, - stdev=stdev, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres", ), RGBSensorStretchKinectBigFov( - height=224, - width=224, + height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, use_resnet_normalization=True, - mean=mean, - stdev=stdev, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres_arm", ), @@ -72,98 +48,30 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( ), ] - MAX_STEPS = 500 - NUM_PROCESSES = 40 - # NUM_TRAIN_HOUSES = 50 #TODO: house > num_proc oversampling not implemented yet + # NUM_PROCESSES = 40 + # NUM_TRAIN_HOUSES = None if platform.system() == "Darwin": - MAX_STEPS = 100 - NUM_TRAIN_HOUSES = 100 SENSORS += [ RGBSensorStretchKinectBigFov( - height=224, - width=224, + height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, use_resnet_normalization=True, - mean=mean, - stdev=stdev, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres_arm_only_viz", ), RGBSensorThor( - height=224, - width=224, + height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, use_resnet_normalization=True, - mean=mean, - stdev=stdev, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres_only_viz", ), ] - TASK_SAMPLER = ProcTHORObjectNavTaskSampler TASK_TYPE = StretchObjectNavTask - ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment - POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] - - NUM_PROCESSES = 50 - - - - def __init__(self): - super().__init__() - - self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) - self.ENV_ARGS['p_randomize_material'] = 0.8 - self.ENV_ARGS['visibilityDistance'] = self.distance_thr - self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - self.ENV_ARGS['renderInstanceSegmentation'] = False - self.ENV_ARGS['renderDepthImage'] = False - self.ENV_ARGS['allow_flipping'] = False - - - @classmethod - @final - def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: - preprocessors = [] - # rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None) - - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet", - visualize=cls.VISUALIZE - ) - ) - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres_arm", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet_arm", - visualize=cls.VISUALIZE - ) - ) - return preprocessors - - - @classmethod - @final - def create_model(cls, **kwargs) -> nn.Module: - goal_sensor_uuid = next( - (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), - None, - ) - resnet_preprocessor_uuids = ["rgb_clip_resnet","rgb_clip_resnet_arm"] - - return ResnetTensorNavNCameraActorCritic( - action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - goal_sensor_uuid=goal_sensor_uuid, - resnet_preprocessor_uuids=resnet_preprocessor_uuids, - hidden_size=512, - goal_dims=32, - add_prev_actions=True, - ) @classmethod def tag(cls): diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py index 7ee002b..31513cd 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py @@ -1,39 +1,20 @@ import platform import random -from typing import Sequence, Union -from typing_extensions import final - -import gym -import numpy as np -from torch import nn import yaml -import copy from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor - -from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment - -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_base_config import ObjectNavBaseConfig -from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask -from utils.stretch_utils.stretch_constants import STRETCH_MANIPULATHOR_COMMIT_ID, STRETCH_ENV_ARGS -from manipulathor_utils.debugger_util import ForkedPdb - -from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic -from allenact.base_abstractions.preprocessor import Preprocessor -from allenact.utils.experiment_utils import Builder -from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer -from ithor_arm.ithor_arm_viz import TestMetricLogger +from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \ + ithorObjectNavClipResnet50RGBOnly2CameraWideFOV +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL class RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOV( - ObjectNavBaseConfig + ithorObjectNavClipResnet50RGBOnly2CameraWideFOV ): """An Object Navigation experiment configuration in iThor with RGB input.""" @@ -46,29 +27,24 @@ class RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOV( with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: OBJECT_TYPES=yaml.safe_load(f) - random.shuffle(TRAIN_SCENES) random.shuffle(TEST_SCENES) - NOISE_LEVEL = 0 - distance_thr = 1.0 # match procthor config - mean = np.array([0.48145466, 0.4578275, 0.40821073]) - stdev = np.array([0.26862954, 0.26130258, 0.27577711]) SENSORS = [ RGBSensorStretchIntel( - height=224, - width=224, + height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, use_resnet_normalization=True, - mean=mean, - stdev=stdev, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres", ), RGBSensorStretchKinect( - height=224, - width=224, + height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, use_resnet_normalization=True, - mean=mean, - stdev=stdev, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres_arm", ), GoalObjectTypeThorSensor( @@ -76,93 +52,25 @@ class RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOV( ), ] - MAX_STEPS = 500 if platform.system() == "Darwin": - MAX_STEPS = 500 SENSORS += [ RGBSensorStretchKinect( - height=224, - width=224, + height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, use_resnet_normalization=True, - mean=mean, - stdev=stdev, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres_arm_only_viz", ), RGBSensorStretchIntel( - height=224, - width=224, + height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, use_resnet_normalization=True, - mean=mean, - stdev=stdev, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres_only_viz", ), ] - TASK_SAMPLER = AllRoomsObjectNavTaskSampler - TASK_TYPE = StretchObjectNavTask - ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment - POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] - - NUM_PROCESSES = 40 - - - def __init__(self): - super().__init__() - - self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) - self.ENV_ARGS['visibilityDistance'] = self.distance_thr - self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - self.ENV_ARGS['renderInstanceSegmentation'] = False - self.ENV_ARGS['commit_id'] = STRETCH_MANIPULATHOR_COMMIT_ID - - - @classmethod - @final - def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: - preprocessors = [] - # rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None) - - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet", - visualize=cls.VISUALIZE - ) - ) - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres_arm", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet_arm", - visualize=cls.VISUALIZE - ) - ) - return preprocessors - - @classmethod - @final - def create_model(cls, **kwargs) -> nn.Module: - goal_sensor_uuid = next( - (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), - None, - ) - resnet_preprocessor_uuids = ["rgb_clip_resnet","rgb_clip_resnet_arm"] - - return ResnetTensorNavNCameraActorCritic( - action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - goal_sensor_uuid=goal_sensor_uuid, - resnet_preprocessor_uuids=resnet_preprocessor_uuids, - hidden_size=512, - goal_dims=32, - add_prev_actions=True, - ) - - @classmethod - def tag(cls): - return cls.__name__ diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_base_config.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_base_config.py index e59a3b6..ff32853 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_base_config.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_base_config.py @@ -17,6 +17,9 @@ from allenact.base_abstractions.sensor import Sensor +from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer +from ithor_arm.ithor_arm_viz import TestMetricLogger + from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler from utils.procthor_utils.procthor_object_nav_tasks import ObjectNavTask from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID @@ -46,6 +49,8 @@ class ObjectNavBaseConfig(BringObjectThorBaseConfig): SENSORS: Sequence[Sensor] = [] + POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] + DISTANCE_TYPE = "l2" # "geo" # Can be "geo" or "l2" def __init__(self): diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py index 6505489..bb36420 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py @@ -1,3 +1,4 @@ +from ast import For from typing import Any, Dict, List, Optional, Sequence from typing_extensions import Literal, final import platform @@ -74,6 +75,8 @@ class ProcTHORObjectNavBaseConfig(ObjectNavBaseConfig): RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 1 # TODO apparently this won't work with 100 (why?) + TEST_ON_VALIDATION = False + HOUSE_DATASET = datasets.load_dataset( "allenai/houses", use_auth_token=True, ignore_verifications=True ) @@ -81,7 +84,6 @@ class ProcTHORObjectNavBaseConfig(ObjectNavBaseConfig): MAX_STEPS = 500 NUM_TRAIN_HOUSES = None # none means all - @classmethod def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return tuple() @@ -100,7 +102,6 @@ def make_sampler_fn( viz(exp_name=exp_name_w_time) for viz in cls.POTENTIAL_VISUALIZERS ] kwargs["visualizers"] = visualizers - kwargs["task_type"] = cls.TASK_TYPE kwargs["exp_name"] = exp_name_w_time return cls.TASK_SAMPLER(**kwargs) @@ -116,60 +117,46 @@ def _get_sampler_args_for_scene_split( houses: datasets.Dataset, mode: Literal["train", "eval"], resample_same_scene_freq: int, - allow_oversample: bool, process_ind: int, - total_processes: int, max_tasks: Optional[int], + allow_flipping: Optional[bool] = None, devices: Optional[List[int]] = None, - seeds: Optional[List[int]] = None, - deterministic_cudnn: bool = False, + **kwargs, ) -> Dict[str, Any]: - # NOTE: oversample some scenes -> bias - oversample_warning = ( - f"Warning: oversampling some of the houses ({houses}) to feed all processes ({total_processes})." - " You can avoid this by setting a number of workers divisible by the number of scenes" - ) - house_inds = list(range(len(houses))) - valid_house_inds = [h for h in house_inds if h not in PROCTHOR_INVALID_SCENES] - if total_processes > len(valid_house_inds): - if not allow_oversample: - raise RuntimeError( - f"Cannot have `total_processes > len(houses)`" - f" ({total_processes} > {len(houses)}) when `allow_oversample` is `False`." - ) - - if total_processes % len(houses) != 0: - get_logger().warning(oversample_warning) - valid_house_inds = valid_house_inds * ceil(total_processes / len(houses)) - valid_house_inds = valid_house_inds[ - : total_processes * (len(valid_house_inds) // total_processes) - ] - elif len(houses) % total_processes != 0: - get_logger().warning(oversample_warning) - inds = self._partition_inds(len(valid_house_inds), total_processes) - selected_house_inds = valid_house_inds[inds[process_ind] : inds[process_ind + 1]] + house_inds = list(range(len(houses))) + scenes = [str(h) for h in house_inds if h not in PROCTHOR_INVALID_SCENES] + general_args = super()._get_sampler_args_for_scene_split(scenes=scenes, # scenes=scenes, + process_ind=process_ind, + **kwargs) x_display = (("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None) - return { + procthor_specific = { "sampler_mode": mode, "houses": houses, - "house_inds": selected_house_inds, + "house_inds": list(map(int,general_args['scenes'])), "process_ind": process_ind, - "env_args": self.ENV_ARGS, - "max_steps": self.MAX_STEPS, - "sensors": self.SENSORS, - "seed": seeds[process_ind] if seeds is not None else None, - "deterministic_cudnn": deterministic_cudnn, - "rewards_config": self.REWARD_CONFIG, "target_object_types": self.OBJECT_TYPES, "max_tasks": max_tasks if max_tasks is not None else len(house_inds), "distance_type": self.DISTANCE_TYPE, "resample_same_scene_freq": resample_same_scene_freq, "scene_name": "Procedural" + } + del general_args['scenes'] + out = {**general_args,**procthor_specific} + + out["task_type"] = self.TASK_TYPE + out["cap_training"] = self.CAP_TRAINING + + out["env_args"]["x_display"] = x_display + out["env_args"]['commit_id'] = PROCTHOR_COMMIT_ID + out["env_args"]['scene'] = 'Procedural' + if allow_flipping is not None: + out["env_args"]['allow_flipping'] = allow_flipping + + return out - }, x_display def train_task_sampler_args( self, @@ -179,61 +166,36 @@ def train_task_sampler_args( if self.NUM_TRAIN_HOUSES: train_houses = train_houses.select(range(self.NUM_TRAIN_HOUSES)) - out,x_display = self._get_sampler_args_for_scene_split( + return self._get_sampler_args_for_scene_split( houses=train_houses, mode="train", - allow_oversample=True, max_tasks=float("inf"), resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN, **kwargs, ) - out["cap_training"] = self.CAP_TRAINING - out["env_args"] = {} - out["env_args"].update(self.ENV_ARGS) - out["env_args"]["x_display"] = x_display - out["env_args"]['commit_id'] = PROCTHOR_COMMIT_ID - out["env_args"]['scene'] = 'Procedural' - return out def valid_task_sampler_args(self, **kwargs) -> Dict[str, Any]: val_houses = self.HOUSE_DATASET["validation"] - out,x_display = self._get_sampler_args_for_scene_split( + return self._get_sampler_args_for_scene_split( houses=val_houses.select(range(100)), mode="eval", - allow_oversample=False, max_tasks=10, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, + allow_flipping=False, **kwargs, ) - out["cap_training"] = self.CAP_TRAINING - out["env_args"] = {} - out["env_args"].update(self.ENV_ARGS) - out["env_args"]['allow_flipping'] = False - out["env_args"]["x_display"] = x_display - out["env_args"]['commit_id'] = PROCTHOR_COMMIT_ID - out["env_args"]['scene'] = 'Procedural' - return out def test_task_sampler_args(self, **kwargs) -> Dict[str, Any]: if self.TEST_ON_VALIDATION: return self.valid_task_sampler_args(**kwargs) test_houses = self.HOUSE_DATASET["test"] - out,x_display = self._get_sampler_args_for_scene_split( + return self._get_sampler_args_for_scene_split( houses=test_houses.select(range(100)), mode="eval", - allow_oversample=False, max_tasks=10, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, + allow_flipping=False, **kwargs, ) - out["cap_training"] = self.CAP_TRAINING - out["env_args"] = {} - out["env_args"].update(self.ENV_ARGS) - out["env_args"]['allow_flipping'] = False - out["env_args"]["x_display"] = x_display - out["env_args"]['commit_id'] = PROCTHOR_COMMIT_ID - out["env_args"]['scene'] = 'Procedural' - return out - diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index bec26ac..e1a22f6 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -18,39 +18,33 @@ from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask, ObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS +from utils.stretch_utils.stretch_constants import STRETCH_ENV_ARGS from manipulathor_utils.debugger_util import ForkedPdb -from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor +from manipulathor_baselines.procthor_baselines.models.clip_resnet_ncamera_preprocess_mixin import \ + ClipResNetPreprocessNCameraGRUActorCriticMixin from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder -from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic -from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import ResnetTensorNavNCameraActorCritic -from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer -from ithor_arm.ithor_arm_viz import TestMetricLogger - class ProcTHORObjectNavClipResnet50RGBOnly( ProcTHORObjectNavBaseConfig ): - """An Object Navigation experiment configuration in iThor with RGB - input.""" + """Single-camera Object Navigation experiment configuration in ProcTHOR, using CLIP preprocessing.""" with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: OBJECT_TYPES=yaml.safe_load(f) NOISE_LEVEL = 0 distance_thr = 1.0 # match procthor config - mean = np.array([0.48145466, 0.4578275, 0.40821073]) - stdev = np.array([0.26862954, 0.26130258, 0.27577711]) SENSORS = [ RGBSensorThor( - height=224, - width=224, + height=ProcTHORObjectNavBaseConfig.SCREEN_SIZE, + width=ProcTHORObjectNavBaseConfig.SCREEN_SIZE, use_resnet_normalization=True, - mean=mean, - stdev=stdev, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres", ), GoalObjectTypeThorSensor( @@ -61,15 +55,26 @@ class ProcTHORObjectNavClipResnet50RGBOnly( MAX_STEPS = 500 if platform.system() == "Darwin": MAX_STEPS = 100 + NUM_TRAIN_HOUSES = 100 + SENSORS += [ + RGBSensorThor( + height=ProcTHORObjectNavBaseConfig.SCREEN_SIZE, + width=ProcTHORObjectNavBaseConfig.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres_only_viz", + ), + ] + + NUM_PROCESSES = 40 TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = StretchObjectNavTask + TASK_TYPE = ObjectNavTask ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment - POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] + CLIP_MODEL_TYPE = "RN50" - NUM_PROCESSES = 40 - # NUM_TRAIN_HOUSES = 50 def __init__(self): super().__init__() @@ -82,42 +87,20 @@ def __init__(self): self.ENV_ARGS['renderDepthImage'] = False self.ENV_ARGS['allow_flipping'] = False - - @classmethod - @final - def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: - preprocessors = [] - # rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None) - - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid="rgb_lowres", - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet", - visualize=cls.VISUALIZE - ) + self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin( + sensors=self.SENSORS, + clip_model_type=self.CLIP_MODEL_TYPE, + screen_size=self.SCREEN_SIZE, ) - return preprocessors + def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: + return self.preprocessing_and_model.preprocessors() - @classmethod - @final - def create_model(cls, **kwargs) -> nn.Module: - goal_sensor_uuid = next( - (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), - None, - ) - resnet_preprocessor_uuids = ["rgb_clip_resnet"] - - return ResnetTensorNavNCameraActorCritic( - action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - goal_sensor_uuid=goal_sensor_uuid, - resnet_preprocessor_uuids=resnet_preprocessor_uuids, - hidden_size=512, - goal_dims=32, - add_prev_actions=True, + + def create_model(self, **kwargs) -> nn.Module: + return self.preprocessing_and_model.create_model( + num_actions=len(self.TASK_TYPE.class_action_names()), **kwargs, + visualize=self.VISUALIZE ) @classmethod diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_rgb_only.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_rgb_only.py deleted file mode 100644 index 9a0c637..0000000 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_rgb_only.py +++ /dev/null @@ -1,113 +0,0 @@ -import platform -import random -from typing import Sequence, Union - -import gym -import numpy as np -from torch import nn -import yaml - -# from ithor_arm.bring_object_sensors import RGBSensorThorNoNan -from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor -from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor - - -from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment - -from manipulathor_baselines.procthor_baselines.models.objnav_model_rgb_only import ObjNavOnlyRGBModel -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig -from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import ProcTHORObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID -# from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS -from manipulathor_utils.debugger_util import ForkedPdb - -# from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -# from allenact.base_abstractions.preprocessor import Preprocessor -# from allenact.utils.experiment_utils import Builder -# from allenact_plugins.navigation_plugin.objectnav.models import ( -# ResnetTensorNavActorCritic, -# ) - - - -class ProcTHORObjectNavRGBOnly( - ProcTHORObjectNavBaseConfig -): - """An Object Navigation experiment configuration in iThor with RGB - input.""" - - with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: - OBJECT_TYPES=yaml.safe_load(f) - - NOISE_LEVEL = 0 - distance_thr = 1.0 # match procthor config - mean = np.array([0.48145466, 0.4578275, 0.40821073]) - stdev = np.array([0.26862954, 0.26130258, 0.27577711]) - SENSORS = [ - RGBSensorThor( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres", - ), - GoalObjectTypeThorSensor( - object_types=OBJECT_TYPES, - ), - ] - - MAX_STEPS = 500 - if platform.system() == "Darwin": - MAX_STEPS = 10 - - TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = ProcTHORObjectNavTask - ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment - - NUM_PROCESSES = 56 - - TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(9999)] # 9999 is all of train - if platform.system() == "Darwin": - TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)] - - TEST_SCENES = [f'ProcTHOR{i}' for i in range(1)] - - - # OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list])) - - random.shuffle(TRAIN_SCENES) - - if platform.system() == "Darwin": - MAX_STEPS = 10 - - - def __init__(self): - super().__init__() - self.REWARD_CONFIG['goal_success_reward'] = 10.0 - self.REWARD_CONFIG['step_penalty'] = -0.01 - self.REWARD_CONFIG['failed_stop_reward'] = 0.0 - self.REWARD_CONFIG['reached_horizon_reward'] = 0.0 - self.REWARD_CONFIG['shaping_weight'] = 1.0 - - self.ENV_ARGS['visibilityDistance'] = self.distance_thr - self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - self.ENV_ARGS['scene'] = 'Procedural' - self.ENV_ARGS['renderInstanceSegmentation'] = 'False' - self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID - - @classmethod - def create_model(cls, **kwargs) -> nn.Module: - return ObjNavOnlyRGBModel( - action_space=gym.spaces.Discrete( - len(cls.TASK_TYPE.class_action_names()) - ), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - hidden_size=512, - visualize=cls.VISUALIZE - ) - - @classmethod - def tag(cls): - return cls.__name__ diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_robothor_rgb_only.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_robothor_rgb_only.py deleted file mode 100644 index 46f4626..0000000 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_robothor_rgb_only.py +++ /dev/null @@ -1,153 +0,0 @@ -import platform -import random -from typing import Sequence, Union -from typing_extensions import final - -import gym -import numpy as np -from torch import nn -import copy - -from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor -from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor - - -from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment - -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig -from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS -from manipulathor_utils.debugger_util import ForkedPdb - -from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -from allenact.base_abstractions.preprocessor import Preprocessor -from allenact.utils.experiment_utils import Builder -from allenact_plugins.navigation_plugin.objectnav.models import ResnetTensorNavActorCritic - -from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL -from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer -from ithor_arm.ithor_arm_viz import BringObjImageVisualizer, TestMetricLogger - - -class RoboTHORObjectNavClipResnet50RGBOnly( - ProcTHORObjectNavBaseConfig -): - """An Object Navigation experiment configuration in iThor with RGB - input.""" - - - TRAIN_SCENES = ROBOTHOR_TRAIN - TEST_SCENES = ROBOTHOR_VAL - OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ == 'robothor'])) - OBJECT_TYPES.sort() - OBJECT_TYPES = ['AlarmClock', 'Apple', 'BaseballBat', 'BasketBall', 'Bed', 'Bowl', 'Chair', - 'GarbageCan', 'HousePlant', 'Laptop', 'Mug', 'Sofa', 'SprayBottle', 'Television', - 'Toilet', 'Vase'] # habitat challenge object - just get it to eval bc trained with goal sensor. - # ForkedPdb().set_trace() - - random.shuffle(TRAIN_SCENES) - random.shuffle(TEST_SCENES) - - NOISE_LEVEL = 0 - distance_thr = 1.0 # match procthor config - mean = np.array([0.48145466, 0.4578275, 0.40821073]) - stdev = np.array([0.26862954, 0.26130258, 0.27577711]) - SENSORS = [ - RGBSensorThor( - height=224, - width=224, - use_resnet_normalization=True, - mean=mean, - stdev=stdev, - uuid="rgb_lowres", - ), - GoalObjectTypeThorSensor( - object_types=OBJECT_TYPES, - ), - ] - - MAX_STEPS = 500 - if platform.system() == "Darwin": - MAX_STEPS = 50 - - TASK_SAMPLER = AllRoomsObjectNavTaskSampler - TASK_TYPE = StretchObjectNavTask - ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment - - POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] - - NUM_PROCESSES = 56 - - # TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(9999)] # 9999 is all of train - # if platform.system() == "Darwin": - # TRAIN_SCENES = [f'ProcTHOR{i}' for i in range(100)] - - # TEST_SCENES = [f'ProcTHOR{i}' for i in range(1)] - # random.shuffle(TRAIN_SCENES) - - def __init__(self): - super().__init__() - self.REWARD_CONFIG['goal_success_reward'] = 10.0 - self.REWARD_CONFIG['step_penalty'] = -0.01 - self.REWARD_CONFIG['failed_stop_reward'] = 0.0 - self.REWARD_CONFIG['reached_horizon_reward'] = 0.0 - self.REWARD_CONFIG['shaping_weight'] = 1.0 - - self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) - self.ENV_ARGS['visibilityDistance'] = self.distance_thr - self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - # self.ENV_ARGS['scene'] = 'Procedural' - self.ENV_ARGS['renderInstanceSegmentation'] = 'False' - self.ENV_ARGS['commit_id'] = PROCTHOR_COMMIT_ID - self.ENV_ARGS['allow_flipping'] = True - - - @classmethod - @final - def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: - preprocessors = [] - rgb_sensor = next((s for s in cls.SENSORS if isinstance(s, RGBSensorThor)), None) - - if rgb_sensor is not None: - preprocessors.append( - ClipResNetPreprocessor( - rgb_input_uuid=rgb_sensor.uuid, - clip_model_type="RN50", - pool=False, - output_uuid="rgb_clip_resnet", - visualize=cls.VISUALIZE - ) - ) - - return preprocessors - - - @classmethod - @final - def create_model(cls, **kwargs) -> nn.Module: - has_rgb = any(isinstance(s, RGBSensorThor) for s in cls.SENSORS) - # has_depth = any(isinstance(s, DepthSensor) for s in cls.SENSORS) - has_depth=False - - goal_sensor_uuid = next( - (s.uuid for s in cls.SENSORS if isinstance(s, GoalObjectTypeThorSensor)), - None, - ) - - return ResnetTensorNavActorCritic( - action_space=gym.spaces.Discrete(len(cls.TASK_TYPE.class_action_names())), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - goal_sensor_uuid=goal_sensor_uuid, - rgb_resnet_preprocessor_uuid="rgb_clip_resnet" if has_rgb else None, - depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None, - hidden_size=512, - goal_dims=32, - add_prev_actions=True, - ) - - @classmethod - def tag(cls): - return cls.__name__ - - diff --git a/manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py b/manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py index c905e57..178cb69 100644 --- a/manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py +++ b/manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py @@ -44,6 +44,7 @@ def __init__( goal_dims: int = 32, resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32), combiner_hidden_out_dims: Tuple[int, int] = (128, 32), + visualize=False, ): super().__init__( action_space=action_space, @@ -53,6 +54,7 @@ def __init__( beliefs_fusion=beliefs_fusion, auxiliary_uuids=auxiliary_uuids, ) + self.visualize = visualize self.goal_visual_encoder = ResnetNCameraTensorGoalEncoder( # type:ignore self.observation_space, @@ -97,7 +99,7 @@ def forward(self, **kwargs): #TODO NOW remove return super(ResnetTensorNavNCameraActorCritic, self).forward(**kwargs) def forward(self, **kwargs): #TODO NOW remove - if platform.system() == "Darwin": + if self.visualize: hacky_visualization(kwargs['observations'], base_directory_to_right_images=self.starting_time) return super(ResnetTensorNavNCameraActorCritic, self).forward(**kwargs) diff --git a/manipulathor_baselines/procthor_baselines/models/clip_resnet_ncamera_preprocess_mixin.py b/manipulathor_baselines/procthor_baselines/models/clip_resnet_ncamera_preprocess_mixin.py new file mode 100644 index 0000000..b66d014 --- /dev/null +++ b/manipulathor_baselines/procthor_baselines/models/clip_resnet_ncamera_preprocess_mixin.py @@ -0,0 +1,81 @@ +from typing import Sequence, Union, Type + +import attr +import gym +import numpy as np +import torch.nn as nn + +from allenact.base_abstractions.preprocessor import Preprocessor +from allenact.base_abstractions.sensor import Sensor +from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor +from allenact.utils.experiment_utils import Builder +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor +from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor + +from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import \ + ResnetTensorNavNCameraActorCritic +from manipulathor_utils.debugger_util import ForkedPdb + + +@attr.s(kw_only=True) +class ClipResNetPreprocessNCameraGRUActorCriticMixin: + sensors: Sequence[Sensor] = attr.ib() + clip_model_type: str = attr.ib() + screen_size: int = attr.ib() + pool: bool = attr.ib(default=False) + + def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: + preprocessors = [] + self.resnet_preprocessor_uuids = [] + for camera in [s for s in self.sensors if (isinstance(s, RGBSensor) or isinstance(s,DepthSensor))]: + if "_only_viz" not in camera.uuid: + if isinstance(camera, RGBSensor): + assert ( + np.linalg.norm( + np.array(camera._norm_means) + - np.array(ClipResNetPreprocessor.CLIP_RGB_MEANS) + ) + < 1e-5 + ) + assert ( + np.linalg.norm( + np.array(camera._norm_sds) + - np.array(ClipResNetPreprocessor.CLIP_RGB_STDS) + ) + < 1e-5 + ) + preprocessors.append( + ClipResNetPreprocessor( + rgb_input_uuid=camera.uuid, + clip_model_type=self.clip_model_type, + pool=self.pool, + output_uuid=camera.uuid+"_clip_resnet", + ) + ) + self.resnet_preprocessor_uuids.append(camera.uuid+"_clip_resnet") + + else: + self.resnet_preprocessor_uuids.append(camera.uuid) + + return preprocessors + + def create_model(self, num_actions: int, visualize: bool, **kwargs) -> nn.Module: + goal_sensor_uuid = next( + (s.uuid for s in self.sensors if isinstance(s, GoalObjectTypeThorSensor)), + None, + ) + self.resnet_preprocessor_uuids = [s for s in self.resnet_preprocessor_uuids if "_only_viz" not in s] + + # display or assert sensor order here? possible source for sneaky failure if they're not the same + # as in pretraining when loading weights. + + return ResnetTensorNavNCameraActorCritic( + action_space=gym.spaces.Discrete(num_actions), + observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, + goal_sensor_uuid=goal_sensor_uuid, + resnet_preprocessor_uuids=self.resnet_preprocessor_uuids, + hidden_size=512, + goal_dims=32, + add_prev_actions=True, + visualize=visualize + ) \ No newline at end of file diff --git a/manipulathor_baselines/procthor_baselines/models/objnav_model_rgb_only.py b/manipulathor_baselines/procthor_baselines/models/objnav_model_rgb_only.py deleted file mode 100644 index 91ef13c..0000000 --- a/manipulathor_baselines/procthor_baselines/models/objnav_model_rgb_only.py +++ /dev/null @@ -1,199 +0,0 @@ -"""Baseline models for use in the object navigation task. - -Object navigation is currently available as a Task in AI2-THOR and -Facebook's Habitat. -""" -import platform -from datetime import datetime -from typing import Tuple, Optional - -import gym -import torch -from allenact.algorithms.onpolicy_sync.policy import ( - ActorCriticModel, - LinearCriticHead, - DistributionType, - Memory, - ObservationType, -) -from allenact.base_abstractions.distributions import CategoricalDistr -from allenact.base_abstractions.misc import ActorCriticOutput -from allenact.embodiedai.models.basic_models import RNNStateEncoder -from allenact.utils.model_utils import make_cnn, compute_cnn_output -from gym.spaces.dict import Dict as SpaceDict -from torch import nn - -from manipulathor_utils.debugger_util import ForkedPdb -from utils.model_utils import LinearActorHeadNoCategory -from utils.hacky_viz_utils import hacky_visualization - - -class ObjNavOnlyRGBModel(ActorCriticModel[CategoricalDistr]): - """Baseline recurrent actor critic model for preddistancenav task. - - # Attributes - action_space : The space of actions available to the agent. Currently only discrete - actions are allowed (so this space will always be of type `gym.spaces.Discrete`). - observation_space : The observation space expected by the agent. This observation space - should include (optionally) 'rgb' images and 'depth' images. - hidden_size : The hidden size of the GRU RNN. - object_type_embedding_dim: The dimensionality of the embedding corresponding to the goal - object type. - """ - - def __init__( - self, - action_space: gym.spaces.Discrete, - observation_space: SpaceDict, - hidden_size=512, - obj_state_embedding_size=512, - trainable_masked_hidden_state: bool = False, - num_rnn_layers=1, - rnn_type="GRU", - teacher_forcing=1, - visualize=False, - ): - """Initializer. - - See class documentation for parameter definitions. - """ - super().__init__(action_space=action_space, observation_space=observation_space) - self.visualize = visualize - - self._hidden_size = hidden_size - self.object_type_embedding_size = obj_state_embedding_size - - # sensor_names = self.observation_space.spaces.keys() - network_args = {'input_channels': 3, 'layer_channels': [32, 64, 32], 'kernel_sizes': [(8, 8), (4, 4), (3, 3)], 'strides': [(4, 4), (2, 2), (1, 1)], 'paddings': [(0, 0), (0, 0), (0, 0)], 'dilations': [(1, 1), (1, 1), (1, 1)], 'output_height': 24, 'output_width': 24, 'output_channels': 512, 'flatten': True, 'output_relu': True} - self.full_visual_encoder = make_cnn(**network_args) - - self.state_encoder = RNNStateEncoder( - 512, - self._hidden_size, - trainable_masked_hidden_state=trainable_masked_hidden_state, - num_layers=num_rnn_layers, - rnn_type=rnn_type, - ) - - self.actor_pickup = LinearActorHeadNoCategory(self._hidden_size, action_space.n) - self.critic_pickup = LinearCriticHead(self._hidden_size) - - self.train() - # self.detection_model.eval() - - self.starting_time = datetime.now().strftime("{}_%m_%d_%Y_%H_%M_%S_%f".format(self.__class__.__name__)) - - - - @property - def recurrent_hidden_state_size(self) -> int: - """The recurrent hidden state size of the model.""" - return self._hidden_size - - @property - def num_recurrent_layers(self) -> int: - """Number of recurrent hidden layers.""" - return self.state_encoder.num_recurrent_layers - - def _recurrent_memory_specification(self): - return dict( - rnn=( - ( - ("layer", self.num_recurrent_layers), - ("sampler", None), - ("hidden", self.recurrent_hidden_state_size), - ), - torch.float32, - ) - ) - - def forward( # type:ignore - self, - observations: ObservationType, - memory: Memory, - prev_actions: torch.Tensor, - masks: torch.FloatTensor, - ) -> Tuple[ActorCriticOutput[DistributionType], Optional[Memory]]: - """Processes input batched observations to produce new actor and critic - values. Processes input batched observations (along with prior hidden - states, previous actions, and masks denoting which recurrent hidden - states should be masked) and returns an `ActorCriticOutput` object - containing the model's policy (distribution over actions) and - evaluation of the current state (value). - - # Parameters - observations : Batched input observations. - memory : `Memory` containing the hidden states from initial timepoints. - prev_actions : Tensor of previous actions taken. - masks : Masks applied to hidden states. See `RNNStateEncoder`. - # Returns - Tuple of the `ActorCriticOutput` and recurrent hidden state. - """ - - #we really need to switch to resnet now that visual features are actually important - - # pickup_bool = observations["pickedup_object"] - # after_pickup = pickup_bool == 1 - - visual_observation = observations['rgb_lowres'] - - visual_observation_encoding = compute_cnn_output(self.full_visual_encoder, visual_observation) - - - # arm_distance_to_obj_source = observations['arm_point_nav_source'] - # arm_distance_to_obj_destination = observations['arm_point_nav_destination'] - - # arm_distance_to_obj_source_embedding = self.pointnav_embedding(arm_distance_to_obj_source) - # arm_distance_to_obj_destination_embedding = self.pointnav_embedding(arm_distance_to_obj_destination) - # pointnav_embedding = arm_distance_to_obj_source_embedding - # pointnav_embedding[after_pickup] = arm_distance_to_obj_destination_embedding[after_pickup] - - #TODO remove as soon as the bug is resolved - # assert not torch.any(torch.isinf(pointnav_embedding) + torch.isnan(pointnav_embedding)), 'pointnav_embedding is nan' - assert not torch.any(torch.isinf(visual_observation) + torch.isnan(visual_observation)), 'visual_observation is nan' - - # arm_distance_to_obj = arm_distance_to_obj_source - # arm_distance_to_obj[after_pickup] = arm_distance_to_obj_destination[after_pickup] - # pointnav_embedding = self.pointnav_embedding(arm_distance_to_obj) - - visual_observation_encoding = torch.cat([visual_observation_encoding], dim=-1) - - - x_out, rnn_hidden_states = self.state_encoder( - visual_observation_encoding, memory.tensor("rnn"), masks - ) - - - # I think we need two model one for pick up and one for drop off - - actor_out_pickup = self.actor_pickup(x_out) - critic_out_pickup = self.critic_pickup(x_out) - - - actor_out_final = actor_out_pickup - critic_out_final = critic_out_pickup - - actor_out = CategoricalDistr(logits=actor_out_final) - - actor_critic_output = ActorCriticOutput( - distributions=actor_out, values=critic_out_final, extras={} - ) - - memory = memory.set_tensor("rnn", rnn_hidden_states) - - #TODO remove as soon as bug is resolved - actor_is_nan = torch.isinf(actor_out_final) + torch.isnan(actor_out_final) - if torch.any(actor_is_nan): - print('actor is nan', actor_is_nan.sum()) - print('scene number', observations['scene_number']) - - # TODO really bad design - if self.visualize: - hacky_visualization(observations, base_directory_to_right_images=self.starting_time) - - - return ( - actor_critic_output, - memory, - ) - From 7cc20d2dae3a3d4db382c437337d3d8ca9096034 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Wed, 25 May 2022 13:30:20 -0700 Subject: [PATCH 19/53] complete refactor 6 - n-camera model and preprocessors to mixin, proprocessor from allenact, better leverage class inheritance for procthor sampler args, trim down configs with inheritance, delete obselete files --- .../models/clip_preprocessors.py | 183 ------------------ 1 file changed, 183 deletions(-) delete mode 100644 manipulathor_baselines/procthor_baselines/models/clip_preprocessors.py diff --git a/manipulathor_baselines/procthor_baselines/models/clip_preprocessors.py b/manipulathor_baselines/procthor_baselines/models/clip_preprocessors.py deleted file mode 100644 index b676ab3..0000000 --- a/manipulathor_baselines/procthor_baselines/models/clip_preprocessors.py +++ /dev/null @@ -1,183 +0,0 @@ -from typing import Any, Dict, List, Optional, cast -from typing_extensions import Literal - -import clip -import gym -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F -from allenact.base_abstractions.preprocessor import Preprocessor -from allenact.utils.misc_utils import prepare_locals_for_super -from manipulathor_utils.debugger_util import ForkedPdb -# from utils.hacky_viz_utils import hacky_visualization -from datetime import datetime - - -class ClipResNetEmbedder(nn.Module): - def __init__(self, resnet, pool=True): - super().__init__() - self.model = resnet - self.pool = pool - self.eval() - - def forward(self, x): - m = self.model.visual - with torch.no_grad(): - - def stem(x): - for conv, bn in [(m.conv1, m.bn1), (m.conv2, m.bn2), (m.conv3, m.bn3)]: - x = m.relu(bn(conv(x))) - x = m.avgpool(x) - return x - - x = x.type(m.conv1.weight.dtype) - x = stem(x) - x = m.layer1(x) - x = m.layer2(x) - x = m.layer3(x) - x = m.layer4(x) - if self.pool: - x = F.adaptive_avg_pool2d(x, (1, 1)) - x = torch.flatten(x, 1) - return x - -class ClipResNetPreprocessor(Preprocessor): - """Preprocess RGB or depth image using a ResNet model with CLIP model - weights.""" - - def __init__( - self, - rgb_input_uuid: str, - clip_model_type: Literal["RN50", "RN50x16"], - pool: bool, - device: Optional[torch.device] = None, - device_ids: Optional[List[torch.device]] = None, - visualize=False, # TODO this is such a hack job - **kwargs: Any, - ): - assert clip_model_type in clip.available_models() - - if clip_model_type == "RN50": - output_shape = (2048, 7, 7) - elif clip_model_type == "RN50x16": - output_shape = (3072, 7, 7) - else: - raise NotImplementedError( - f"Currently `clip_model_type` must be one of 'RN50' or 'RN50x16'" - ) - - if pool: - output_shape = output_shape[:1] - - self.clip_model_type = clip_model_type - - self.pool = pool - - self.device = torch.device("cpu") if device is None else device - self.device_ids = device_ids or cast( - List[torch.device], list(range(torch.cuda.device_count())) - ) - self._resnet: Optional[ClipResNetEmbedder] = None - - low = -np.inf - high = np.inf - shape = output_shape - - input_uuids = [rgb_input_uuid] - assert ( - len(input_uuids) == 1 - ), "resnet preprocessor can only consume one observation type" - - observation_space = gym.spaces.Box(low=low, high=high, shape=shape) - self.starting_time = datetime.now().strftime("{}_%m_%d_%Y_%H_%M_%S_%f".format(self.__class__.__name__)) - self.visualize = visualize - - super().__init__(**prepare_locals_for_super(locals())) - - @property - def resnet(self) -> ClipResNetEmbedder: - import clip - - if self._resnet is None: - self._resnet = ClipResNetEmbedder( - clip.load(self.clip_model_type, device=self.device)[0], pool=self.pool - ).to(self.device) - return self._resnet - - def to(self, device: torch.device) -> "ClipResNetPreprocessor": - self._resnet = self.resnet.to(device) - self.device = device - return self - - def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: - x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) # bhwc -> bchw - # if self.visualize: - # obs['rgb_lowres'] = x.permute(0,2,3,1).unsqueeze(0) - # hacky_visualization(obs, base_directory_to_right_images=self.starting_time) - # If the input is depth, repeat it across all 3 channels - if x.shape[1] == 1: - x = x.repeat(1, 3, 1, 1) - x = self.resnet(x).float() - return x - - -class ClipTextPreprocessor(Preprocessor): - def __init__( - self, - goal_sensor_uuid: str, - object_types: List[str], - device: Optional[torch.device] = None, - device_ids: Optional[List[torch.device]] = None, - **kwargs: Any, - ): - try: - import clip - - self.clip = clip - except ImportError as _: - raise ImportError( - "Cannot `import clip` when instatiating `CLIPResNetPreprocessor`." - " Please install clip from the openai/CLIP git repository:" - "\n`pip install git+https://github.com/openai/CLIP.git@3b473b0e682c091a9e53623eebc1ca1657385717`" - ) - - output_shape = (1024,) - - self.object_types = object_types - - self.device = torch.device("cpu") if device is None else device - self.device_ids = device_ids or cast( - List[torch.device], list(range(torch.cuda.device_count())) - ) - - low = -np.inf - high = np.inf - shape = output_shape - - observation_space = gym.spaces.Box(low=low, high=high, shape=shape) - - input_uuids = [goal_sensor_uuid] - - super().__init__(**prepare_locals_for_super(locals())) - - @property - def text_encoder(self): - if self._clip_model is None: - self._clip_model = self.clip.load("RN50", device=self.device)[0] - self._clip_model.eval() - return self._clip_model.encode_text - - def to(self, device: torch.device): - self.device = device - self._clip_model = None - return self - - def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: - object_inds = obs[self.input_uuids[0]] - object_types = [self.object_types[ind] for ind in object_inds] - x = self.clip.tokenize([f"navigate to the {obj}" for obj in object_types]).to( - self.device - ) - with torch.no_grad(): - return self.text_encoder(x).float() From 13b40f8cd94059dbb759f3356b5a67e5b38f83a1 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Thu, 26 May 2022 09:27:05 -0700 Subject: [PATCH 20/53] update training pipeline for distributed nodes --- .../experiments/ithor/obj_nav_base_config.py | 11 ++++++----- .../experiments/ithor/obj_nav_for_procthor.py | 16 ---------------- ...bj_nav_for_procthor_clip_resnet50_rgb_only.py | 2 +- 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_base_config.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_base_config.py index ff32853..cc5a05d 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_base_config.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_base_config.py @@ -46,8 +46,8 @@ class ObjectNavBaseConfig(BringObjectThorBaseConfig): if torch.cuda.is_available() else (torch.device("cpu"),) ) + distributed_nodes = 1 - SENSORS: Sequence[Sensor] = [] POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] @@ -63,20 +63,21 @@ def __init__(self): "shaping_weight": 1.0, "positive_only_reward": False, } + def training_pipeline(self, **kwargs): log_interval_small = ( - self.NUM_PROCESSES * 32 * 10 + self.distributed_nodes*self.NUM_PROCESSES * 32 * 10 if torch.cuda.is_available else 1 ) log_interval_medium = ( - self.NUM_PROCESSES * 64 * 5 + self.distributed_nodes*self.NUM_PROCESSES * 64 * 5 if torch.cuda.is_available else 1 ) log_interval_large = ( - self.NUM_PROCESSES * 128 * 5 + self.distributed_nodes*self.NUM_PROCESSES * 128 * 5 if torch.cuda.is_available else 1 ) @@ -86,7 +87,7 @@ def training_pipeline(self, **kwargs): batch_steps_2 = int(1e9) - batch_steps_1 - batch_steps_0 return TrainingPipeline( - save_interval=5_000_000, + save_interval=2_000_000, metric_accumulate_interval=10_000, optimizer_builder=Builder(optim.Adam, dict(lr=0.0003)), num_mini_batch=1, diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py index bb36420..ae141ea 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py @@ -41,22 +41,6 @@ class ProcTHORObjectNavBaseConfig(ObjectNavBaseConfig): OBJECT_TYPES: Optional[Sequence[str]] - TRAIN_DEVICES = ( - tuple(range(torch.cuda.device_count())) - if torch.cuda.is_available() - else (torch.device("cpu"),) - ) - VAL_DEVICES = ( - (torch.cuda.device_count() - 1,) - if torch.cuda.is_available() - else (torch.device("cpu"),) - ) - TEST_DEVICES = ( - tuple(range(torch.cuda.device_count())) - if torch.cuda.is_available() - else (torch.device("cpu"),) - ) - SENSORS: Sequence[Sensor] = [] DISTANCE_TYPE = "l2" # "geo" # Can be "geo" or "l2" diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index e1a22f6..c1f593a 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -70,7 +70,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( NUM_PROCESSES = 40 TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = ObjectNavTask + TASK_TYPE = StretchObjectNavTask ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment CLIP_MODEL_TYPE = "RN50" From db88cc857bdd144aa760cc53cc8ed364624329bc Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Thu, 26 May 2022 11:28:58 -0700 Subject: [PATCH 21/53] reshuffle - new home for all objectnav --- .../experiments/ithor/obj_nav_2camera_ithor_wide.py | 4 ++-- .../experiments/ithor/obj_nav_2camera_robothor_narrow.py | 2 +- .../experiments}/obj_nav_base_config.py | 0 .../experiments/procthor}/obj_nav_2camera_procthor_narrow.py | 3 ++- .../experiments/procthor}/obj_nav_2camera_procthor_wide.py | 3 ++- .../procthor}/obj_nav_2camera_procthor_wide_distrib.py | 2 +- .../experiments/procthor}/obj_nav_for_procthor.py | 2 +- .../procthor}/obj_nav_for_procthor_clip_resnet50_rgb_only.py | 4 ++-- .../models/clip_objnav_ncamera_model.py | 0 .../models/clip_resnet_ncamera_preprocess_mixin.py | 2 +- 10 files changed, 12 insertions(+), 10 deletions(-) rename manipulathor_baselines/{procthor_baselines => object_nav_baselines}/experiments/ithor/obj_nav_2camera_ithor_wide.py (96%) rename manipulathor_baselines/{procthor_baselines => object_nav_baselines}/experiments/ithor/obj_nav_2camera_robothor_narrow.py (96%) rename manipulathor_baselines/{procthor_baselines/experiments/ithor => object_nav_baselines/experiments}/obj_nav_base_config.py (100%) rename manipulathor_baselines/{procthor_baselines/experiments/ithor => object_nav_baselines/experiments/procthor}/obj_nav_2camera_procthor_narrow.py (93%) rename manipulathor_baselines/{procthor_baselines/experiments/ithor => object_nav_baselines/experiments/procthor}/obj_nav_2camera_procthor_wide.py (94%) rename manipulathor_baselines/{procthor_baselines/experiments/ithor => object_nav_baselines/experiments/procthor}/obj_nav_2camera_procthor_wide_distrib.py (94%) rename manipulathor_baselines/{procthor_baselines/experiments/ithor => object_nav_baselines/experiments/procthor}/obj_nav_for_procthor.py (98%) rename manipulathor_baselines/{procthor_baselines/experiments/ithor => object_nav_baselines/experiments/procthor}/obj_nav_for_procthor_clip_resnet50_rgb_only.py (94%) rename manipulathor_baselines/{procthor_baselines => object_nav_baselines}/models/clip_objnav_ncamera_model.py (100%) rename manipulathor_baselines/{procthor_baselines => object_nav_baselines}/models/clip_resnet_ncamera_preprocess_mixin.py (97%) diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py b/manipulathor_baselines/object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py similarity index 96% rename from manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py rename to manipulathor_baselines/object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py index 75b70ef..7f45d3d 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py +++ b/manipulathor_baselines/object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py @@ -16,14 +16,14 @@ from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_base_config import ObjectNavBaseConfig +from manipulathor_baselines.object_nav_baselines.experiments.obj_nav_base_config import ObjectNavBaseConfig from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask from utils.stretch_utils.stretch_constants import STRETCH_MANIPULATHOR_COMMIT_ID, STRETCH_ENV_ARGS from manipulathor_utils.debugger_util import ForkedPdb # from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -from manipulathor_baselines.procthor_baselines.models.clip_resnet_ncamera_preprocess_mixin import \ +from manipulathor_baselines.object_nav_baselines.models.clip_resnet_ncamera_preprocess_mixin import \ ClipResNetPreprocessNCameraGRUActorCriticMixin from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact.base_abstractions.preprocessor import Preprocessor diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py b/manipulathor_baselines/object_nav_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py similarity index 96% rename from manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py rename to manipulathor_baselines/object_nav_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py index 31513cd..c213a0c 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py +++ b/manipulathor_baselines/object_nav_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py @@ -5,7 +5,7 @@ from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \ +from manipulathor_baselines.object_nav_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \ ithorObjectNavClipResnet50RGBOnly2CameraWideFOV from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_base_config.py b/manipulathor_baselines/object_nav_baselines/experiments/obj_nav_base_config.py similarity index 100% rename from manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_base_config.py rename to manipulathor_baselines/object_nav_baselines/experiments/obj_nav_base_config.py diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py b/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py similarity index 93% rename from manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py rename to manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py index 57cb9e6..3515c74 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_narrow.py +++ b/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py @@ -4,7 +4,8 @@ from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor_clip_resnet50_rgb_only import ProcTHORObjectNavClipResnet50RGBOnly +from manipulathor_baselines.object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only \ + import ProcTHORObjectNavClipResnet50RGBOnly from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py similarity index 94% rename from manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py rename to manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py index 6cb797b..f5f1160 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py @@ -7,7 +7,8 @@ from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchKinectBigFov from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor_clip_resnet50_rgb_only import ProcTHORObjectNavClipResnet50RGBOnly +from manipulathor_baselines.object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only \ + import ProcTHORObjectNavClipResnet50RGBOnly from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask from manipulathor_utils.debugger_util import ForkedPdb diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib.py b/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py similarity index 94% rename from manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib.py rename to manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py index 2dfc866..f26460c 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib.py +++ b/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py @@ -1,6 +1,6 @@ import torch -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_2camera_procthor_wide import \ +from manipulathor_baselines.object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide import \ ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py b/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py similarity index 98% rename from manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py rename to manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py index ae141ea..e28097c 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py @@ -23,7 +23,7 @@ Builder, ) -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_base_config import ObjectNavBaseConfig +from manipulathor_baselines.object_nav_baselines.experiments.obj_nav_base_config import ObjectNavBaseConfig from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler from utils.procthor_utils.procthor_object_nav_tasks import ObjectNavTask from utils.procthor_utils.procthor_helper import PROCTHOR_INVALID_SCENES diff --git a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py similarity index 94% rename from manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py rename to manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index c1f593a..88505e6 100644 --- a/manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -15,14 +15,14 @@ from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment -from manipulathor_baselines.procthor_baselines.experiments.ithor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig +from manipulathor_baselines.object_nav_baselines.experiments.procthor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask, ObjectNavTask from utils.stretch_utils.stretch_constants import STRETCH_ENV_ARGS from manipulathor_utils.debugger_util import ForkedPdb from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor -from manipulathor_baselines.procthor_baselines.models.clip_resnet_ncamera_preprocess_mixin import \ +from manipulathor_baselines.object_nav_baselines.models.clip_resnet_ncamera_preprocess_mixin import \ ClipResNetPreprocessNCameraGRUActorCriticMixin from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder diff --git a/manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py b/manipulathor_baselines/object_nav_baselines/models/clip_objnav_ncamera_model.py similarity index 100% rename from manipulathor_baselines/procthor_baselines/models/clip_objnav_ncamera_model.py rename to manipulathor_baselines/object_nav_baselines/models/clip_objnav_ncamera_model.py diff --git a/manipulathor_baselines/procthor_baselines/models/clip_resnet_ncamera_preprocess_mixin.py b/manipulathor_baselines/object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py similarity index 97% rename from manipulathor_baselines/procthor_baselines/models/clip_resnet_ncamera_preprocess_mixin.py rename to manipulathor_baselines/object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py index b66d014..f2be0b7 100644 --- a/manipulathor_baselines/procthor_baselines/models/clip_resnet_ncamera_preprocess_mixin.py +++ b/manipulathor_baselines/object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py @@ -12,7 +12,7 @@ from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor -from manipulathor_baselines.procthor_baselines.models.clip_objnav_ncamera_model import \ +from manipulathor_baselines.object_nav_baselines.models.clip_objnav_ncamera_model import \ ResnetTensorNavNCameraActorCritic from manipulathor_utils.debugger_util import ForkedPdb From 1296aa3a31eda9a41b574d6ae4653b1292f26d72 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Thu, 26 May 2022 12:54:00 -0700 Subject: [PATCH 22/53] start real robothor --- .../real_robothor/real_robothor_objectnav.py | 46 +++++++++++++++++++ utils/stretch_utils/real_stretch_sensors.py | 2 +- 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 manipulathor_baselines/object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py diff --git a/manipulathor_baselines/object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py b/manipulathor_baselines/object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py new file mode 100644 index 0000000..681ba65 --- /dev/null +++ b/manipulathor_baselines/object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py @@ -0,0 +1,46 @@ +from utils.stretch_utils.stretch_constants import INTEL_CAMERA_WIDTH +from manipulathor_utils.debugger_util import ForkedPdb + +from utils.stretch_utils.real_stretch_sensors import RealRGBSensorStretchIntel, RealRGBSensorStretchKinect +from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor + +from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler +from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask + + +from manipulathor_baselines.object_nav_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \ + ithorObjectNavClipResnet50RGBOnly2CameraWideFOV + + +class RealStretchObjectNav( + ithorObjectNavClipResnet50RGBOnly2CameraWideFOV +): + desired_screen_size = INTEL_CAMERA_WIDTH + SENSORS = [ + RealRGBSensorStretchIntel( + height=desired_screen_size, + width=desired_screen_size, + use_resnet_normalization=True, + uuid="rgb_lowres", + ), + RealRGBSensorStretchKinect( + height=desired_screen_size, + width=desired_screen_size, + use_resnet_normalization=True, + uuid="rgb_lowres_arm", + ), + GoalObjectTypeThorSensor( + object_types=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.OBJECT_TYPES, # doesn't matter for now, it's apples all the way dow + ), + ] + + MAX_STEPS = 200 + + TASK_SAMPLER = AllRoomsObjectNavTaskSampler#RealStretchDiverseBringObjectTaskSampler + TASK_TYPE = StretchObjectNavTask#RealStretchExploreWiseRewardTask + + NUM_PROCESSES = 20 + + + TRAIN_SCENES = ['RealRobothor'] + TEST_SCENES = ['RealRobothor'] diff --git a/utils/stretch_utils/real_stretch_sensors.py b/utils/stretch_utils/real_stretch_sensors.py index 09ad06d..1ca3495 100644 --- a/utils/stretch_utils/real_stretch_sensors.py +++ b/utils/stretch_utils/real_stretch_sensors.py @@ -250,7 +250,7 @@ def get_observation( # TODO remove # mask = np.zeros((224, 224, 1)) - return mask + # return mask img = env.controller.last_event.frame From f54a61fd9fa3cc302a8d9615300a80750c2207e6 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Thu, 26 May 2022 15:53:50 -0700 Subject: [PATCH 23/53] rename to reflect stretch only --- .../ithor/obj_nav_2camera_ithor_wide.py | 8 ++++---- .../ithor/obj_nav_2camera_robothor_narrow.py | 2 +- .../experiments/obj_nav_base_config.py | 4 ++-- .../obj_nav_2camera_procthor_narrow.py | 2 +- .../procthor/obj_nav_2camera_procthor_wide.py | 4 ++-- .../obj_nav_2camera_procthor_wide_distrib.py | 2 +- .../procthor/obj_nav_for_procthor.py | 4 ++-- ..._nav_for_procthor_clip_resnet50_rgb_only.py | 6 +++--- .../real_robothor/real_robothor_objectnav.py | 18 +++++++++++++----- .../models/clip_objnav_ncamera_model.py | 0 .../clip_resnet_ncamera_preprocess_mixin.py | 2 +- 11 files changed, 30 insertions(+), 22 deletions(-) rename manipulathor_baselines/{object_nav_baselines => stretch_object_nav_baselines}/experiments/ithor/obj_nav_2camera_ithor_wide.py (92%) rename manipulathor_baselines/{object_nav_baselines => stretch_object_nav_baselines}/experiments/ithor/obj_nav_2camera_robothor_narrow.py (96%) rename manipulathor_baselines/{object_nav_baselines => stretch_object_nav_baselines}/experiments/obj_nav_base_config.py (96%) rename manipulathor_baselines/{object_nav_baselines => stretch_object_nav_baselines}/experiments/procthor/obj_nav_2camera_procthor_narrow.py (94%) rename manipulathor_baselines/{object_nav_baselines => stretch_object_nav_baselines}/experiments/procthor/obj_nav_2camera_procthor_wide.py (93%) rename manipulathor_baselines/{object_nav_baselines => stretch_object_nav_baselines}/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py (94%) rename manipulathor_baselines/{object_nav_baselines => stretch_object_nav_baselines}/experiments/procthor/obj_nav_for_procthor.py (97%) rename manipulathor_baselines/{object_nav_baselines => stretch_object_nav_baselines}/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py (91%) rename manipulathor_baselines/{object_nav_baselines => stretch_object_nav_baselines}/experiments/real_robothor/real_robothor_objectnav.py (56%) rename manipulathor_baselines/{object_nav_baselines => stretch_object_nav_baselines}/models/clip_objnav_ncamera_model.py (100%) rename manipulathor_baselines/{object_nav_baselines => stretch_object_nav_baselines}/models/clip_resnet_ncamera_preprocess_mixin.py (97%) diff --git a/manipulathor_baselines/object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py similarity index 92% rename from manipulathor_baselines/object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py rename to manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py index 7f45d3d..8f6e50d 100644 --- a/manipulathor_baselines/object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py @@ -16,14 +16,14 @@ from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment -from manipulathor_baselines.object_nav_baselines.experiments.obj_nav_base_config import ObjectNavBaseConfig -from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask +from manipulathor_baselines.stretch_object_nav_baselines.experiments.obj_nav_base_config import ObjectNavBaseConfig +from utils.stretch_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask from utils.stretch_utils.stretch_constants import STRETCH_MANIPULATHOR_COMMIT_ID, STRETCH_ENV_ARGS from manipulathor_utils.debugger_util import ForkedPdb # from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor -from manipulathor_baselines.object_nav_baselines.models.clip_resnet_ncamera_preprocess_mixin import \ +from manipulathor_baselines.stretch_object_nav_baselines.models.clip_resnet_ncamera_preprocess_mixin import \ ClipResNetPreprocessNCameraGRUActorCriticMixin from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact.base_abstractions.preprocessor import Preprocessor diff --git a/manipulathor_baselines/object_nav_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py similarity index 96% rename from manipulathor_baselines/object_nav_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py rename to manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py index c213a0c..41c5f00 100644 --- a/manipulathor_baselines/object_nav_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py @@ -5,7 +5,7 @@ from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor -from manipulathor_baselines.object_nav_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \ +from manipulathor_baselines.stretch_object_nav_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \ ithorObjectNavClipResnet50RGBOnly2CameraWideFOV from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor diff --git a/manipulathor_baselines/object_nav_baselines/experiments/obj_nav_base_config.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py similarity index 96% rename from manipulathor_baselines/object_nav_baselines/experiments/obj_nav_base_config.py rename to manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py index cc5a05d..6a1bf9b 100644 --- a/manipulathor_baselines/object_nav_baselines/experiments/obj_nav_base_config.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py @@ -20,8 +20,8 @@ from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer from ithor_arm.ithor_arm_viz import TestMetricLogger -from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import ObjectNavTask +from utils.stretch_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler +from utils.stretch_utils.stretch_object_nav_tasks import ObjectNavTask from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID diff --git a/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py similarity index 94% rename from manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py rename to manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py index 3515c74..4a92490 100644 --- a/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py @@ -4,7 +4,7 @@ from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor -from manipulathor_baselines.object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only \ +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only \ import ProcTHORObjectNavClipResnet50RGBOnly from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor diff --git a/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py similarity index 93% rename from manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py rename to manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py index f5f1160..e9eb252 100644 --- a/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py @@ -7,9 +7,9 @@ from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchKinectBigFov from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor -from manipulathor_baselines.object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only \ +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only \ import ProcTHORObjectNavClipResnet50RGBOnly -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask from manipulathor_utils.debugger_util import ForkedPdb from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor diff --git a/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py similarity index 94% rename from manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py rename to manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py index f26460c..9305f61 100644 --- a/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_distrib.py @@ -1,6 +1,6 @@ import torch -from manipulathor_baselines.object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide import \ +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide import \ ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV diff --git a/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py similarity index 97% rename from manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py rename to manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py index e28097c..0c97a49 100644 --- a/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py @@ -23,9 +23,9 @@ Builder, ) -from manipulathor_baselines.object_nav_baselines.experiments.obj_nav_base_config import ObjectNavBaseConfig +from manipulathor_baselines.stretch_object_nav_baselines.experiments.obj_nav_base_config import ObjectNavBaseConfig from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import ObjectNavTask +from utils.stretch_utils.stretch_object_nav_tasks import ObjectNavTask from utils.procthor_utils.procthor_helper import PROCTHOR_INVALID_SCENES from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID diff --git a/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py similarity index 91% rename from manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py rename to manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index 88505e6..9c89b46 100644 --- a/manipulathor_baselines/object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -15,14 +15,14 @@ from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment -from manipulathor_baselines.object_nav_baselines.experiments.procthor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask, ObjectNavTask +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask from utils.stretch_utils.stretch_constants import STRETCH_ENV_ARGS from manipulathor_utils.debugger_util import ForkedPdb from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor -from manipulathor_baselines.object_nav_baselines.models.clip_resnet_ncamera_preprocess_mixin import \ +from manipulathor_baselines.stretch_object_nav_baselines.models.clip_resnet_ncamera_preprocess_mixin import \ ClipResNetPreprocessNCameraGRUActorCriticMixin from allenact.base_abstractions.preprocessor import Preprocessor from allenact.utils.experiment_utils import Builder diff --git a/manipulathor_baselines/object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py similarity index 56% rename from manipulathor_baselines/object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py rename to manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py index 681ba65..06bc5a8 100644 --- a/manipulathor_baselines/object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py @@ -4,11 +4,14 @@ from utils.stretch_utils.real_stretch_sensors import RealRGBSensorStretchIntel, RealRGBSensorStretchKinect from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor -from utils.procthor_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor +from utils.stretch_utils.real_stretch_environment import StretchRealEnvironment +from utils.stretch_utils.all_rooms_object_nav_task_sampler import RealStretchAllRoomsObjectNavTaskSampler +from utils.stretch_utils.stretch_object_nav_tasks import RealStretchObjectNavTask -from manipulathor_baselines.object_nav_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \ + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \ ithorObjectNavClipResnet50RGBOnly2CameraWideFOV @@ -21,12 +24,16 @@ class RealStretchObjectNav( height=desired_screen_size, width=desired_screen_size, use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres", ), RealRGBSensorStretchKinect( height=desired_screen_size, width=desired_screen_size, use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres_arm", ), GoalObjectTypeThorSensor( @@ -36,8 +43,9 @@ class RealStretchObjectNav( MAX_STEPS = 200 - TASK_SAMPLER = AllRoomsObjectNavTaskSampler#RealStretchDiverseBringObjectTaskSampler - TASK_TYPE = StretchObjectNavTask#RealStretchExploreWiseRewardTask + TASK_SAMPLER = RealStretchAllRoomsObjectNavTaskSampler #RealStretchDiverseBringObjectTaskSampler + TASK_TYPE = RealStretchObjectNavTask #RealStretchExploreWiseRewardTask + ENVIRONMENT_TYPE = StretchRealEnvironment # account for the super init NUM_PROCESSES = 20 diff --git a/manipulathor_baselines/object_nav_baselines/models/clip_objnav_ncamera_model.py b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_objnav_ncamera_model.py similarity index 100% rename from manipulathor_baselines/object_nav_baselines/models/clip_objnav_ncamera_model.py rename to manipulathor_baselines/stretch_object_nav_baselines/models/clip_objnav_ncamera_model.py diff --git a/manipulathor_baselines/object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py similarity index 97% rename from manipulathor_baselines/object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py rename to manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py index f2be0b7..14b40d7 100644 --- a/manipulathor_baselines/object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py @@ -12,7 +12,7 @@ from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor -from manipulathor_baselines.object_nav_baselines.models.clip_objnav_ncamera_model import \ +from manipulathor_baselines.stretch_object_nav_baselines.models.clip_objnav_ncamera_model import \ ResnetTensorNavNCameraActorCritic from manipulathor_utils.debugger_util import ForkedPdb From 7a5e8a4d826d6c0fb26fae377386b2e46190681c Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Thu, 26 May 2022 15:54:24 -0700 Subject: [PATCH 24/53] shuffle and rename task and sampler, and add real stretch versions of same --- .../all_rooms_object_nav_task_sampler.py | 67 +++++++++++++++++++ utils/stretch_utils/real_stretch_sensors.py | 3 +- .../stretch_object_nav_tasks.py} | 57 ++++++++++++++++ 3 files changed, 126 insertions(+), 1 deletion(-) rename utils/{procthor_utils => stretch_utils}/all_rooms_object_nav_task_sampler.py (87%) rename utils/{procthor_utils/procthor_object_nav_tasks.py => stretch_utils/stretch_object_nav_tasks.py} (89%) diff --git a/utils/procthor_utils/all_rooms_object_nav_task_sampler.py b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py similarity index 87% rename from utils/procthor_utils/all_rooms_object_nav_task_sampler.py rename to utils/stretch_utils/all_rooms_object_nav_task_sampler.py index 1aaae8f..d200240 100644 --- a/utils/procthor_utils/all_rooms_object_nav_task_sampler.py +++ b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py @@ -18,8 +18,14 @@ from scripts.dataset_generation.find_categories_to_use import ROBOTHOR_TRAIN, KITCHEN_TRAIN, KITCHEN_TEST, KITCHEN_VAL from utils.manipulathor_data_loader_utils import get_random_query_image, get_random_query_feature_from_img_adr from scripts.stretch_jupyter_helper import get_reachable_positions, transport_wrapper +from utils.stretch_utils.stretch_object_nav_tasks import ObjectNavTask from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer +from utils.stretch_utils.real_stretch_environment import StretchRealEnvironment +from ithor_arm.ithor_arm_environment import ManipulaTHOREnvironment + + + class AllRoomsObjectNavTaskSampler(TaskSampler): @@ -419,3 +425,64 @@ def get_source_target_indices(self): return data_point + +class RealStretchAllRoomsObjectNavTaskSampler(AllRoomsObjectNavTaskSampler): + + def _create_environment(self, **kwargs) -> ManipulaTHOREnvironment: + env = StretchRealEnvironment( + make_agents_visible=False, + object_open_speed=0.05, + env_args=self.env_args, + ) + return env + + def __init__(self, **kwargs) -> None: + + super().__init__(**kwargs) + + self.all_possible_points = {} + + if self.sampler_mode == "test": + self.max_tasks = self.reset_tasks = 200 + + def next_task( + self, force_advance_scene: bool = False + ) -> Optional[ObjectNavTask]: + + if self.max_tasks is not None and self.max_tasks <= 0: + return None + + if self.sampler_mode != "train" and self.length <= 0: + return None + + if self.env is None: + self.env = self._create_environment() + self.env.reset(scene_name='RealRobothor') + + #TODO apples forever. consider branching out someday + target_object = {'object_id': 'Apple|1|1|1', 'object_type':"Apple"} + + task_info = { + 'target_object_ids': [target_object['object_id']], + 'object_type': target_object['object_type'], + 'starting_pose': {}, + 'mode': self.env_args['agentMode'], + 'house_name': 'RealRobothor', + 'scene_name': 'RealRobothor', + 'mirrored': False # yeah no + } + + self._last_sampled_task = self.TASK_TYPE( + env=self.env, + sensors=self.sensors, + task_info=task_info, + max_steps=self.max_steps, + action_space=self._action_space, + visualizers=self.visualizers, + reward_config=self.rewards_config, + distance_type=self.distance_type, + distance_cache=self.distance_cache, + additional_visualize=False # not implemented for non-procthor + ) + + return self._last_sampled_task \ No newline at end of file diff --git a/utils/stretch_utils/real_stretch_sensors.py b/utils/stretch_utils/real_stretch_sensors.py index 1ca3495..6f28d73 100644 --- a/utils/stretch_utils/real_stretch_sensors.py +++ b/utils/stretch_utils/real_stretch_sensors.py @@ -13,7 +13,8 @@ import torch from PIL import Image import matplotlib.pyplot as plt -from allenact.base_abstractions.sensor import DepthSensor, Sensor, RGBSensor +from allenact.base_abstractions.sensor import Sensor #, DepthSensor, RGBSensor + from allenact.base_abstractions.task import Task from allenact.utils.misc_utils import prepare_locals_for_super from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment diff --git a/utils/procthor_utils/procthor_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py similarity index 89% rename from utils/procthor_utils/procthor_object_nav_tasks.py rename to utils/stretch_utils/stretch_object_nav_tasks.py index 33b8b83..76c8517 100644 --- a/utils/procthor_utils/procthor_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -427,7 +427,64 @@ def judge(self) -> float: return float(reward) +class RealStretchObjectNavTask(StretchObjectNavTask): + + + def _step(self, action: int) -> RLStepResult: + + action_str = self.class_action_names()[action] + print('Model Said', action_str) + + self.manual_action = False + + print('Action Called', action_str) + + if action_str == "Done": + self._took_end_action = True + + self._last_action_str = action_str + action_dict = {"action": action_str} + # object_id = self.task_info["source_object_id"] + # if action_str == PICKUP: + # action_dict = {**action_dict, "object_id": object_id} + self.env.step(action_dict) + self.last_action_success = self.env.last_action_success + + last_action_name = self._last_action_str + # last_action_success = float(self.last_action_success) + # self.action_sequence_and_success.append((last_action_name, last_action_success)) + self.visualize(last_action_name) + + step_result = RLStepResult( + observation=self.get_observations(), + reward=self.judge(), + done=self.is_done(), + info={"last_action_success": self.last_action_success}, + ) + return step_result + + + + + def metrics(self) -> Dict[str, Any]: + if self.is_done(): + result={} # placeholder for future + metrics = super().metrics() + metrics["success"] = self._success + result["success"] = self._success + self.finish_visualizer_metrics(result) + self.finish_visualizer(self._success) + # self.action_sequence_and_success = [] + self._metrics = metrics + return metrics + else: + return {} + + def judge(self) -> float: + """Compute the reward after having taken a step.""" + reward = 0 + return reward From eca9eebf310f085cbd88e497e1efc34c60b5001a Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Thu, 26 May 2022 16:33:19 -0700 Subject: [PATCH 25/53] apparently functional real stretch objectnav --- .../experiments/real_robothor/real_robothor_objectnav.py | 2 +- utils/stretch_utils/all_rooms_object_nav_task_sampler.py | 2 +- utils/stretch_utils/stretch_object_nav_tasks.py | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py index 06bc5a8..6da15ac 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py @@ -48,7 +48,7 @@ class RealStretchObjectNav( ENVIRONMENT_TYPE = StretchRealEnvironment # account for the super init NUM_PROCESSES = 20 - + VISUALIZE = False TRAIN_SCENES = ['RealRobothor'] TEST_SCENES = ['RealRobothor'] diff --git a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py index d200240..b82e1ef 100644 --- a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py +++ b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py @@ -480,7 +480,7 @@ def next_task( action_space=self._action_space, visualizers=self.visualizers, reward_config=self.rewards_config, - distance_type=self.distance_type, + distance_type="real_world", distance_cache=self.distance_cache, additional_visualize=False # not implemented for non-procthor ) diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index 76c8517..764bccf 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -1,3 +1,4 @@ +from dis import dis import random from typing import Any, Dict, List, Optional, Tuple, Union from typing_extensions import Literal @@ -84,6 +85,8 @@ def __init__( self.dist_to_target_func = self.min_geo_distance_to_target elif distance_type == "l2": self.dist_to_target_func = self.min_l2_distance_to_target + elif distance_type == "real_world": + self.dist_to_target_func = self.dummy_distance_to_target # maybe placeholder here for estimation later else: raise NotImplementedError @@ -147,6 +150,8 @@ def min_geo_distance_to_target(self) -> float: return -1.0 return min_dist + def dummy_distance_to_target(self) -> float: + return float("inf") def start_visualize(self): for visualizer in self.visualizers: @@ -428,6 +433,9 @@ def judge(self) -> float: class RealStretchObjectNavTask(StretchObjectNavTask): + + # def __init__(self, **kwargs) -> None: + # super().__init__(self,distance_type = "real_world", **kwargs) def _step(self, action: int) -> RLStepResult: From bd5fb68fbb98c0641c531e1721713f9d505fa77d Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Thu, 26 May 2022 17:25:01 -0700 Subject: [PATCH 26/53] working robothor + visualizer --- .../ithor/obj_nav_2camera_ithor_wide.py | 2 +- .../real_robothor/real_robothor_objectnav.py | 21 +++++++++++++- .../stretch_utils/stretch_object_nav_tasks.py | 28 +------------------ utils/stretch_utils/stretch_visualizer.py | 23 ++++----------- 4 files changed, 27 insertions(+), 47 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py index 8f6e50d..91ab5a6 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py @@ -87,7 +87,7 @@ class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( MAX_STEPS = 500 if platform.system() == "Darwin": MAX_STEPS = 100 - SENSORS += [ + SENSORS += [ #TODO FIX ORDER HERE RGBSensorStretchKinectBigFov( height=ObjectNavBaseConfig.SCREEN_SIZE, width=ObjectNavBaseConfig.SCREEN_SIZE, diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py index 6da15ac..19588d1 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py @@ -41,6 +41,26 @@ class RealStretchObjectNav( ), ] + # this should only ever be run on darwin anyway - sensors for visualization + SENSORS += [ + RealRGBSensorStretchIntel( + height=desired_screen_size, + width=desired_screen_size, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres_only_viz", + ), + RealRGBSensorStretchKinect( + height=desired_screen_size, + width=desired_screen_size, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres_arm_only_viz", + ), + ] + MAX_STEPS = 200 TASK_SAMPLER = RealStretchAllRoomsObjectNavTaskSampler #RealStretchDiverseBringObjectTaskSampler @@ -48,7 +68,6 @@ class RealStretchObjectNav( ENVIRONMENT_TYPE = StretchRealEnvironment # account for the super init NUM_PROCESSES = 20 - VISUALIZE = False TRAIN_SCENES = ['RealRobothor'] TEST_SCENES = ['RealRobothor'] diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index 764bccf..68dff0d 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -433,10 +433,6 @@ def judge(self) -> float: class RealStretchObjectNavTask(StretchObjectNavTask): - - # def __init__(self, **kwargs) -> None: - # super().__init__(self,distance_type = "real_world", **kwargs) - def _step(self, action: int) -> RLStepResult: @@ -449,18 +445,14 @@ def _step(self, action: int) -> RLStepResult: if action_str == "Done": self._took_end_action = True + # TODO do something here - set trace in env to get time to reset room? self._last_action_str = action_str action_dict = {"action": action_str} - # object_id = self.task_info["source_object_id"] - # if action_str == PICKUP: - # action_dict = {**action_dict, "object_id": object_id} self.env.step(action_dict) self.last_action_success = self.env.last_action_success last_action_name = self._last_action_str - # last_action_success = float(self.last_action_success) - # self.action_sequence_and_success.append((last_action_name, last_action_success)) self.visualize(last_action_name) step_result = RLStepResult( @@ -471,24 +463,6 @@ def _step(self, action: int) -> RLStepResult: ) return step_result - - - - def metrics(self) -> Dict[str, Any]: - if self.is_done(): - result={} # placeholder for future - metrics = super().metrics() - metrics["success"] = self._success - result["success"] = self._success - self.finish_visualizer_metrics(result) - self.finish_visualizer(self._success) - # self.action_sequence_and_success = [] - - self._metrics = metrics - return metrics - else: - return {} - def judge(self) -> float: """Compute the reward after having taken a step.""" reward = 0 diff --git a/utils/stretch_utils/stretch_visualizer.py b/utils/stretch_utils/stretch_visualizer.py index d8c3cf4..a56f23a 100644 --- a/utils/stretch_utils/stretch_visualizer.py +++ b/utils/stretch_utils/stretch_visualizer.py @@ -183,24 +183,17 @@ def finish_episode(self, environment, episode_info, task_info): source_object_id = task_info["target_object_ids"][0] episode_success = episode_info._success - # Put back if you want the images - # for i, img in enumerate(self.log_queue): - # image_dir = os.path.join(self.log_dir, time_to_write + '_seq{}.png'.format(str(i))) - # cv2.imwrite(image_dir, img[:,:,[2,1,0]]) - episode_success_offset = "succ" if episode_success else "fail" source_obj_type = task_info['object_type'] if source_obj_type == 'small': source_obj_type = task_info['object_type'] - # goal_obj_type = task_info['goal_object_type'] room_name = task_info['scene_name'] gif_name = ( f"{time_to_write}_room_{room_name}_to_{source_obj_type}_episode_{episode_success_offset}.gif" ) - # ForkedPdb().set_trace() self.log_queue = put_action_on_image(self.log_queue, self.action_queue[1:]) addition_texts = ['xxx'] + [str(x) for x in episode_info.agent_body_dist_to_obj] if not addition_texts == ['xxx']: @@ -209,18 +202,12 @@ def finish_episode(self, environment, episode_info, task_info): concat_all_images = np.expand_dims(np.stack(self.log_queue, axis=0), axis=1) save_image_list_to_gif(concat_all_images, gif_name, self.log_dir) this_controller = environment.controller - scene = this_controller.last_event.metadata[ - "sceneName" - ] - reset_environment_and_additional_commands(this_controller, scene) - - additional_observation_start = [] - additional_observation_goal = [] - if 'target_object_mask' in episode_info.get_observations(): - additional_observation_start.append('target_object_mask') - if 'target_location_mask' in episode_info.get_observations(): - additional_observation_goal.append('target_location_mask') + if room_name == 'RealRobothor': + scene = None # reset doesn't do anything + else: + scene = this_controller.last_event.metadata["sceneName"] + reset_environment_and_additional_commands(this_controller, scene) self.log_queue = [] self.action_queue = [] From a1ffbe9ec77526ecc657844a11196d9306b4dbcf Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 31 May 2022 11:00:07 -0700 Subject: [PATCH 27/53] real robothor eval fixes + conveniences --- .../procthor/obj_nav_for_procthor.py | 3 +- .../real_robothor/real_robothor_objectnav.py | 2 +- .../all_rooms_object_nav_task_sampler.py | 26 +++++++++++++-- .../stretch_utils/stretch_object_nav_tasks.py | 33 ++++++++++++++++--- 4 files changed, 55 insertions(+), 9 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py index 0c97a49..56d2fb1 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py @@ -136,6 +136,7 @@ def _get_sampler_args_for_scene_split( out["env_args"]["x_display"] = x_display out["env_args"]['commit_id'] = PROCTHOR_COMMIT_ID out["env_args"]['scene'] = 'Procedural' + out["env_args"]["branch"] = "nanna" if allow_flipping is not None: out["env_args"]['allow_flipping'] = allow_flipping @@ -147,6 +148,7 @@ def train_task_sampler_args( **kwargs ) -> Dict[str, Any]: train_houses = self.HOUSE_DATASET["train"] + if self.NUM_TRAIN_HOUSES: train_houses = train_houses.select(range(self.NUM_TRAIN_HOUSES)) @@ -172,7 +174,6 @@ def valid_task_sampler_args(self, **kwargs) -> Dict[str, Any]: def test_task_sampler_args(self, **kwargs) -> Dict[str, Any]: if self.TEST_ON_VALIDATION: return self.valid_task_sampler_args(**kwargs) - test_houses = self.HOUSE_DATASET["test"] return self._get_sampler_args_for_scene_split( houses=test_houses.select(range(100)), diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py index 19588d1..b1e9211 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py @@ -61,7 +61,7 @@ class RealStretchObjectNav( ), ] - MAX_STEPS = 200 + MAX_STEPS = 60 TASK_SAMPLER = RealStretchAllRoomsObjectNavTaskSampler #RealStretchDiverseBringObjectTaskSampler TASK_TYPE = RealStretchObjectNavTask #RealStretchExploreWiseRewardTask diff --git a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py index b82e1ef..e7c3cac 100644 --- a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py +++ b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py @@ -442,6 +442,25 @@ def __init__(self, **kwargs) -> None: self.all_possible_points = {} + # This can be done more elegantly from datasets/objects/robothor_habitat2022.yaml and exclude toilet + self.possible_real_objects = [ + {'object_id': 'AlarmClock|1|1|1', 'object_type':"AlarmClock"}, + {'object_id': 'Apple|1|1|1', 'object_type':"Apple"}, + {'object_id': 'BaseballBat|1|1|1', 'object_type':"BaseballBat"}, + {'object_id': 'BasketBall|1|1|1', 'object_type':"BasketBall"}, + {'object_id': 'Bed|1|1|1', 'object_type':"Bed"}, + {'object_id': 'Bowl|1|1|1', 'object_type':"Bowl"}, + {'object_id': 'Chair|1|1|1', 'object_type':"Chair"}, + {'object_id': 'GarbageCan|1|1|1', 'object_type':"GarbageCan"}, + {'object_id': 'HousePlant|1|1|1', 'object_type':"HousePlant"}, + {'object_id': 'Laptop|1|1|1', 'object_type':"Laptop"}, + {'object_id': 'Mug|1|1|1', 'object_type':"Mug"}, + {'object_id': 'Sofa|1|1|1', 'object_type':"Sofa"}, + {'object_id': 'SprayBottle|1|1|1', 'object_type':"SprayBottle"}, + {'object_id': 'Television|1|1|1', 'object_type':"Television"}, + {'object_id': 'Vase|1|1|1', 'object_type':"Vase"} + ] + if self.sampler_mode == "test": self.max_tasks = self.reset_tasks = 200 @@ -459,8 +478,11 @@ def next_task( self.env = self._create_environment() self.env.reset(scene_name='RealRobothor') - #TODO apples forever. consider branching out someday - target_object = {'object_id': 'Apple|1|1|1', 'object_type':"Apple"} + skip_object = True + while skip_object: + target_object = random.choice(self.possible_real_objects) + print('I am now seeking a ', target_object['object_type'], '. Continue by setting skip_object=False') + ForkedPdb().set_trace() task_info = { 'target_object_ids': [target_object['object_id']], diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index 68dff0d..15e2796 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -1,9 +1,11 @@ +from ast import For from dis import dis import random from typing import Any, Dict, List, Optional, Tuple, Union from typing_extensions import Literal import gym +from importlib_metadata import Lookup import numpy as np import torch from allenact.base_abstractions.misc import RLStepResult @@ -124,7 +126,7 @@ def min_l2_distance_to_target(self) -> float: if min_dist == float("inf"): get_logger().error( f"No target object {self.task_info['object_type']} found" - f" in house {self.task_info['house_name']}." + f" in house {self.task_info['scene_name']}." ) return -1.0 return min_dist @@ -140,7 +142,7 @@ def min_geo_distance_to_target(self) -> float: env=self.env, distance_cache=self.distance_cache, object_id=object_id, - house_name=self.task_info["house_name"], + house_name=self.task_info["scene_name"], ) if (min_dist is None and geo_dist >= 0) or ( geo_dist >= 0 and geo_dist < min_dist @@ -393,6 +395,24 @@ class StretchObjectNavTask(ObjectNavTask): DONE, ) + # TODO: change this to find the object via segementation or some other way - account for FOV cropping + # def _is_goal_in_range(self) -> bool: + # candidates = [obj + # for obj in self.env.last_event.metadata["objects"] + # if obj["visible"] and obj["objectType"] == self.task_info["object_type"]] + # return any(candidates) + +class StretchNeckedObjectNavTask(ObjectNavTask): + _actions = ( + MOVE_AHEAD, + MOVE_BACK, + ROTATE_RIGHT, + ROTATE_LEFT, + "LookUp", + "LookDown", + DONE, + ) + class ExploreWiseObjectNavTask(ObjectNavTask): def __init__(self, **kwargs): super().__init__(**kwargs) @@ -437,15 +457,18 @@ class RealStretchObjectNavTask(StretchObjectNavTask): def _step(self, action: int) -> RLStepResult: action_str = self.class_action_names()[action] - print('Model Said', action_str) + print('Model Said', action_str, ' as action ', str(self.num_steps_taken())) self.manual_action = False - print('Action Called', action_str) + # print('Action Called', action_str) if action_str == "Done": self._took_end_action = True - # TODO do something here - set trace in env to get time to reset room? + print('I think I found a ', self.task_info['object_type']) + # self._success = bool(int(input('Was I correct? 0 for no, 1 for yes: '))) + print('Was I correct? Set self._success in trace.') + ForkedPdb().set_trace() self._last_action_str = action_str action_dict = {"action": action_str} From bdcea11ea3c2da639073ee3a7047d2db84734214 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 31 May 2022 11:14:25 -0700 Subject: [PATCH 28/53] more trying to debug --- ...nav_for_procthor_clip_resnet50_rgb_only.py | 6 +- .../test_robothor_procthorstyle.py | 73 ++ .../procthor_object_nav_task_samplers.py | 100 ++- utils/stretch_utils/real_stretch_sensors.py | 749 +++++++++--------- .../stretch_ithor_arm_environment.py | 16 +- utils/stretch_utils/stretch_visualizer.py | 2 +- 6 files changed, 555 insertions(+), 391 deletions(-) create mode 100644 manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/test_robothor_procthorstyle.py diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index 9c89b46..a3f44f5 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -17,7 +17,7 @@ from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler -from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask, ObjectNavTask, StretchNeckedObjectNavTask from utils.stretch_utils.stretch_constants import STRETCH_ENV_ARGS from manipulathor_utils.debugger_util import ForkedPdb @@ -70,9 +70,11 @@ class ProcTHORObjectNavClipResnet50RGBOnly( NUM_PROCESSES = 40 TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = StretchObjectNavTask + TASK_TYPE = StretchNeckedObjectNavTask ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment + # NUM_TRAIN_HOUSES = 500 + CLIP_MODEL_TYPE = "RN50" diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/test_robothor_procthorstyle.py new file mode 100644 index 0000000..3b93d5e --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/test_robothor_procthorstyle.py @@ -0,0 +1,73 @@ +import datasets +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_narrow \ + import ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide import \ + ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV +from utils.procthor_utils.procthor_object_nav_task_samplers import RoboThorObjectNavTestTaskSampler +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask + +from manipulathor_utils.debugger_util import ForkedPdb + + + +class ObjectNavRoboTHORConfig(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): + EVAL_TASKS = datasets.load_dataset( + f"allenai/robothor-objectnav-eval", use_auth_token=True + ) + + TASK_SAMPLER = RoboThorObjectNavTestTaskSampler + TASK_TYPE = StretchObjectNavTask + TEST_ON_VALIDATION = True + + @classmethod + def tag(cls): + return super().tag() + "-RoboTHOR-Target" + + # def make_sampler_fn(self, task_sampler_args: TaskSamplerArgs, **kwargs): + # if task_sampler_args.mode == "eval": + # return ObjectNavTestTaskSampler(args=task_sampler_args) + # else: + # return ObjectNavTaskSampler(args=task_sampler_args) + + @classmethod + def make_sampler_fn(cls, **kwargs): + from datetime import datetime + + now = datetime.now() + + exp_name_w_time = cls.__name__ + "_" + now.strftime("%m_%d_%Y_%H_%M_%S_%f") + if cls.VISUALIZE: + visualizers = [ + viz(exp_name=exp_name_w_time) for viz in cls.POTENTIAL_VISUALIZERS + ] + kwargs["visualizers"] = visualizers + kwargs["exp_name"] = exp_name_w_time + # ForkedPdb().set_trace() + + assert kwargs["sampler_mode"] == "eval" + return cls.TASK_SAMPLER(**kwargs) + + def valid_task_sampler_args(self, **kwargs): + out = self._get_sampler_args_for_scene_split( + houses=self.EVAL_TASKS["validation"], + mode="eval", + max_tasks=150, + allow_flipping=False, + resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # ignored + **kwargs, + ) + return out + + def test_task_sampler_args(self, **kwargs): + if self.TEST_ON_VALIDATION: + return self.valid_task_sampler_args(**kwargs) + + out = self._get_sampler_args_for_scene_split( + houses=self.EVAL_TASKS["test"].shuffle(), + mode="eval", + max_tasks=150, + allow_flipping=False, + resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # ignored + **kwargs, + ) + return out \ No newline at end of file diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index fd67c9b..a57a277 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -1,8 +1,4 @@ -import glob -from optparse import Option -from attrs import define -from typing import Any, Dict, List, Optional, Sequence -from typing_extensions import Literal +from typing import Any, Dict, List, Optional import platform import pickle import random @@ -23,7 +19,7 @@ from utils.procthor_utils.procthor_types import AgentPose, Vector3 -from utils.procthor_utils.procthor_object_nav_tasks import StretchObjectNavTask +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask from utils.stretch_utils.stretch_constants import ADITIONAL_ARM_ARGS from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment from scripts.stretch_jupyter_helper import make_all_objects_unbreakable @@ -102,6 +98,8 @@ def __init__( self.max_agent_positions = 6 self.p_greedy_target_object = 0.8 + self.last_house_idx = None + self.reset() @@ -415,3 +413,93 @@ def reset(self): self.episode_index = 0 self.max_tasks = self.reset_tasks self.house_inds_index = 0 + + + +class RoboThorObjectNavTestTaskSampler(ProcTHORObjectNavTaskSampler): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.last_scene = None + + # # visualize 1/10 episodes + # self.epids_to_visualize = set( + # np.linspace( + # 0, self.reset_tasks, num=min(self.reset_tasks // 10, 4), dtype=np.uint8 + # ).tolist() + # ) + # self.args.controller_args = self.args.controller_args.copy() + # self.env_args["procedural"] = False + + def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObjectNavTask]: + while True: + # NOTE: Stopping condition + if self.env is None: + self.env = self._create_environment() + + # NOTE: Stopping condition + if self.max_tasks <= 0: + return None + + + epidx = self.house_inds[self.max_tasks - 1] + ep = self.houses[epidx] + # ForkedPdb().set_trace() + + if self.last_house_idx is None or self.last_house_idx != ep["scene"]: + self.last_scene = ep["scene"] + self.env.reset(ep["scene"]) + + # NOTE: not using ep["targetObjectIds"] due to floating points with + # target objects moving. + event = self.env.controller.step(action="ResetObjectFilter") + target_object_ids = [ + obj["objectId"] + for obj in event.metadata["objects"] + if obj["objectType"] == ep["targetObjectType"] + ] + self.env.controller.step( + action="SetObjectFilter", + objectIds=target_object_ids, + raise_for_failure=True, + ) + ep["agentPose"]["horizon"] = 0 # reset for stretch agent + ep["agentPose"]["standing"] = True + event = self.env.controller.step(action="TeleportFull", **ep["agentPose"]) + if not event: + # NOTE: Skip scenes where TeleportFull fails. + # This is added from a bug in the RoboTHOR eval dataset. + get_logger().error( + f"Teleport failing {event.metadata['actionReturn']} in {epidx}." + ) + self.max_tasks -= 1 + self.episode_index += 1 + continue + + difficulty = {"difficulty": ep["difficulty"]} if "difficulty" in ep else {} + self._last_sampled_task = self.TASK_TYPE( + # visualize=self.episode_index in self.epids_to_visualize, + env=self.env, + sensors=self.sensors, + max_steps=self.max_steps, + reward_config=self.rewards_config, + distance_type=self.distance_type, + distance_cache=self.distance_cache, + visualizers=self.visualizers, + task_info={ + "mode": self.env_args['agentMode'], + "scene_name": ep["scene"], + "target_object_ids": target_object_ids, + "object_type": ep["targetObjectType"], + "starting_pose": ep["agentPose"], + "mirrored": False, + "id": f"{ep['scene']}__global{epidx}__{ep['targetObjectType']}", + **difficulty, + }, + ) + + self.max_tasks -= 1 + self.episode_index += 1 + + return self._last_sampled_task + + diff --git a/utils/stretch_utils/real_stretch_sensors.py b/utils/stretch_utils/real_stretch_sensors.py index 6f28d73..25339f6 100644 --- a/utils/stretch_utils/real_stretch_sensors.py +++ b/utils/stretch_utils/real_stretch_sensors.py @@ -19,11 +19,11 @@ from allenact.utils.misc_utils import prepare_locals_for_super from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor -from detectron2.config import get_cfg -from detectron2.data import MetadataCatalog -from detectron2.engine import DefaultPredictor -from detectron2.model_zoo import model_zoo -from detectron2.utils.visualizer import Visualizer +# from detectron2.config import get_cfg +# from detectron2.data import MetadataCatalog +# from detectron2.engine import DefaultPredictor +# from detectron2.model_zoo import model_zoo +# from detectron2.utils.visualizer import Visualizer import matplotlib.pyplot as plt from torch.distributions.utils import lazy_property @@ -39,7 +39,8 @@ from utils.calculation_utils import calc_world_coordinates from utils.detection_translator_util import THOR2COCO from utils.noise_in_motion_util import squeeze_bool_mask -from utils.real_stretch_utils import get_binary_mask_of_arm, get_mid_point_of_object_from_depth_and_mask + +# from utils.real_stretch_utils import get_binary_mask_of_arm, get_mid_point_of_object_from_depth_and_mask from utils.stretch_utils.stretch_constants import INTEL_RESIZED_H, INTEL_RESIZED_W, KINECT_REAL_W, KINECT_REAL_H, \ MAX_INTEL_DEPTH, MIN_INTEL_DEPTH, MAX_KINECT_DEPTH, MIN_KINECT_DEPTH, INTEL_FOV_W, INTEL_FOV_H, KINECT_FOV_W, \ KINECT_FOV_H @@ -130,376 +131,376 @@ def normalize_real_kinect_image(frame,size=224): result[result < MIN_KINECT_DEPTH] = 0 return result.astype(frame.dtype) -class StretchDetectronObjectMask(Sensor): - def __init__(self, type: str,noise, source_camera, uuid: str = "object_mask", **kwargs: Any): - observation_space = gym.spaces.Box( - low=0, high=1, shape=(1,), dtype=np.float32 - ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) - self.type = type - uuid = '{}_{}'.format(uuid, type) - self.noise = noise - self.cache = {'object_name': '', 'image':None, 'mask':None} - super().__init__(**prepare_locals_for_super(locals())) - self.source_camera = source_camera - - @lazy_property - def predictor(self): - self.cfg = get_cfg() - if platform.system() == "Darwin": - self.cfg.MODEL.DEVICE = "cpu" - # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library - self.cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) # is this the model we want? Yes it is a pretty good model - self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05 # set threshold for this model #TODO good number? - # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well - self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") - # self.predictor = DefaultPredictor(self.cfg) - self.class_labels = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]).thing_classes - return DefaultPredictor(self.cfg) +# class StretchDetectronObjectMask(Sensor): +# def __init__(self, type: str,noise, source_camera, uuid: str = "object_mask", **kwargs: Any): +# observation_space = gym.spaces.Box( +# low=0, high=1, shape=(1,), dtype=np.float32 +# ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) +# self.type = type +# uuid = '{}_{}'.format(uuid, type) +# self.noise = noise +# self.cache = {'object_name': '', 'image':None, 'mask':None} +# super().__init__(**prepare_locals_for_super(locals())) +# self.source_camera = source_camera + +# @lazy_property +# def predictor(self): +# self.cfg = get_cfg() +# if platform.system() == "Darwin": +# self.cfg.MODEL.DEVICE = "cpu" +# # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library +# self.cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) # is this the model we want? Yes it is a pretty good model +# self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05 # set threshold for this model #TODO good number? +# # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well +# self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") +# # self.predictor = DefaultPredictor(self.cfg) +# self.class_labels = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]).thing_classes +# return DefaultPredictor(self.cfg) - def get_observation( - self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any - ) -> Any: - - if self.type == 'source': - info_to_search = 'source_object_id' - elif self.type == 'destination': - info_to_search = 'goal_object_id' - #TODO remove - mask = torch.zeros((224,224,1)) - return mask - - original_im = self.source_camera.get_observation(env, task, *args, **kwargs) - - if info_to_search == self.cache['object_name']: - if np.all(self.cache['image'] == original_im): - return self.cache['mask'] - else: - self.cache = {} - - - # TODO VERYYYYYY IMPORTANT - im = original_im[:,:,::-1] - # the detection requires BGR??? - - outputs = self.predictor(im * 255.) - - # remove - # outputs = self.predictor(im * 255.) - # cv2.imwrite('my_image.png',im * 255.) - - # - def visualize_detections(im, outputs ): - v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]), scale=1.2) - out = v.draw_instance_predictions(outputs["instances"].to("cpu")) - dir = 'experiment_output/visualization_predictions' - timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f.png") - os.makedirs(dir, exist_ok=True) - print('saving all detections in ', os.path.join(dir, timestamp)) - plt.imsave(os.path.join(dir, timestamp), out.get_image()[:, :, ::-1]) - - - - category = task.task_info[info_to_search].split('|')[0] - assert category in THOR2COCO - class_ind_to_look_for = self.class_labels.index(THOR2COCO[category]) - all_predicted_labels = outputs['instances'].pred_classes - all_predicted_bbox = outputs['instances'].pred_boxes #TODO switch to segmentation - - - - mask = torch.zeros((im.shape[0], im.shape[1])) - valid_boxes = [all_predicted_bbox[i] for i in range(len(all_predicted_labels)) if all_predicted_labels[i] == class_ind_to_look_for] - - for box in valid_boxes: - x1, y1, x2, y2 = [int(x) for x in box.tensor.squeeze()] - mask[y1:y2, x1:x2] = 1 - mask = torch.nn.functional.interpolate(mask.unsqueeze(0).unsqueeze(0), size=(224, 224)).squeeze(0).squeeze(0) - - #TODO save all the masks so we don't pass the image through detectyion multiple timesact - - # print('camera', self.source_camera, 'type', self.type, 'category', category) - # print('observed objects', [self.class_labels[x] for x in all_predicted_labels]) - - #TODO this can be optimized because we are passing this image twice and no point - - # use these later for visualization purposes? - - # plt.imsave('something.png', im) - mask = mask.unsqueeze(-1) - self.cache = {'object_name': info_to_search, 'image':original_im, 'mask':mask} - - return mask - - -class StretchObjectMask(Sensor): - def __init__(self, type: str,noise, uuid: str = "object_mask", **kwargs: Any): - observation_space = gym.spaces.Box( - low=0, high=1, shape=(1,), dtype=np.float32 - ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) - self.type = type - uuid = '{}_{}'.format(uuid, type) - self.noise = noise - self.cache = None - super().__init__(**prepare_locals_for_super(locals())) - - def get_observation( - self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any - ) -> Any: - if env.last_image_changed is False and self.cache is not None: - return self.cache - - # TODO remove - # mask = np.zeros((224, 224, 1)) - # return mask - - - img = env.controller.last_event.frame - resized_image = cv2.resize(img, dsize=(224,224)) - global input_received, center_x, center_y - center_x, center_y = -1, -1 - input_received = False - - def normalize_number(x, w): - if x < 0: - x = 0 - if x > w: - x = w - return x - - def clear_plot(): - fig.clear(); fig.clf(); plt.clf(); ax.cla(); fig.clear(True) - def close_plot(): - clear_plot() - plt.close(fig) - plt.close('all') - def onclick(event): - global center_x, center_y, input_received - if event.button == 3: #Right click - center_x = -1 - center_y = -1 - input_received = True - close_plot() - if event.button == 1: #Left click - center_x = event.xdata - center_y = event.ydata - input_received = True - close_plot() - - import matplotlib - matplotlib.use('MacOSX') - fig, ax = plt.subplots() - # clear_plot() - ax.imshow(resized_image) - cid = fig.canvas.mpl_connect('button_press_event', onclick) - plt.show() - fig.canvas.mpl_disconnect(cid) - close_plot() - - - self.window_size = 20 #TODO do I want to change the size of this one maybe? - mask = np.zeros((224, 224, 1)) - if center_y == -1 and center_x == -1: - mask[:,:] = 0. - else: - offset = self.window_size / 2 - object_boundaries = center_x - offset, center_y - offset, center_x + offset, center_y + offset - x1, y1, x2, y2 = [int(normalize_number(i, 224)) for i in object_boundaries] - mask[y1:y2, x1:x2] = 1. - self.cache = mask - return mask - - - -class RealStretchPickedUpObjSensor(Sensor): - def __init__(self, uuid: str = "pickedup_object", **kwargs: Any): - observation_space = gym.spaces.Box( - low=0, high=1, shape=(1,), dtype=np.float32 - ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) - super().__init__(**prepare_locals_for_super(locals())) - - def get_observation( - self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any - ) -> Any: - return False - -class RealKinectArmPointNavEmulSensor(Sensor): - - def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, arm_mask_sensor:Sensor, uuid: str = "arm_point_nav_emul", **kwargs: Any): - observation_space = gym.spaces.Box( - low=0, high=1, shape=(1,), dtype=np.float32 - ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) - self.type = type - self.mask_sensor = mask_sensor - self.depth_sensor = depth_sensor - self.arm_mask_sensor = arm_mask_sensor - uuid = '{}_{}'.format(uuid, type) - - self.min_xyz = np.zeros((3)) - - self.dummy_answer = torch.zeros(3) - self.dummy_answer[:] = 4 # is this good enough? - self.device = torch.device("cpu") - super().__init__(**prepare_locals_for_super(locals())) - def get_camera_int_ext(self,env): - #TODO all these values need to be checked - fov=max(KINECT_FOV_W, KINECT_FOV_H)#TODO are you sure? it should be smaller one I think - agent_state = env.controller.last_event.metadata['agent'] - camera_horizon = 45 - camera_xyz = np.array([agent_state['position'][k] for k in ['x','y','z']]) - camera_rotation = (agent_state['rotation']['y'] + 90) % 360 - return fov, camera_horizon, camera_xyz, camera_rotation - def get_observation( - self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any - ) -> Any: - #TODO remove - if self.type == 'destination': - return self.dummy_answer - mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) #TODO this is called multiple times? - depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1) - - if task.num_steps_taken() == 0: - self.pointnav_history_aggr = [] - self.real_prev_location = None - self.belief_prev_location = None - - - fov, camera_horizon, camera_xyz, camera_rotation = self.get_camera_int_ext(env) - arm_world_coord = None - - if mask.sum() != 0: - - midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) - if not torch.any(torch.isnan(midpoint_agent_coord) + torch.isinf(midpoint_agent_coord)): - self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken())) - - arm_mask = self.arm_mask_sensor.get_observation(env, task, *args, **kwargs) #TODO this is also called twice - if arm_mask.sum() == 0: #Do we want to do some approximations or no? - arm_world_coord = None #TODO approax for this - else: - arm_world_coord = get_mid_point_of_object_from_depth_and_mask(arm_mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) - # distance_in_agent_coord = midpoint_agent_coord - arm_location_in_camera - # result = distance_in_agent_coord.cpu() - # if arm_mask.sum() != 0 or mask.sum() != 0: - # - # import cv2 - # cv2.imwrite('/Users/kianae/Desktop/image.png', env.kinect_frame[:,:,::-1]) - # cv2.imwrite('/Users/kianae/Desktop/mask.png', mask.squeeze().numpy() * 255) - # cv2.imwrite('/Users/kianae/Desktop/arm_mask.png', arm_mask * 255) - result = self.history_aggregation(camera_xyz, camera_rotation, arm_world_coord, task.num_steps_taken()) - return result - def history_aggregation(self, camera_xyz, camera_rotation, arm_world_coord, current_step_number): - if len(self.pointnav_history_aggr) == 0 or arm_world_coord is None: - return self.dummy_answer - else: - weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr] - total_weights = sum(weights) - total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr] - total_sum = sum(total_sum) - midpoint = total_sum / total_weights - agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0)) - midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0)) - midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state) - midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']]) - - arm_world_coord = dict(position=dict(x=arm_world_coord[0], y=arm_world_coord[1], z=arm_world_coord[2]), rotation=dict(x=0,y=0,z=0)) - arm_state_agent_coord = convert_world_to_agent_coordinate(arm_world_coord, agent_state) - arm_state_agent_coord = torch.Tensor([arm_state_agent_coord['position'][k] for k in ['x','y','z']]) - - distance_in_agent_coord = midpoint_agent_coord - arm_state_agent_coord - - return distance_in_agent_coord.cpu() - - -class KinectArmMaskSensor(Sensor): - def __init__(self, uuid: str = "arm_mask_kinect", **kwargs: Any): - observation_space = gym.spaces.Box( - low=0, high=1, shape=(1,), dtype=np.float32 - ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) - super().__init__(**prepare_locals_for_super(locals())) - - def get_observation( - self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any - ) -> Any: - kinect_raw_value = env.kinect_raw_frame - mask = get_binary_mask_of_arm(kinect_raw_value) - mask = normalize_real_kinect_image(mask) - return np.expand_dims(mask, axis=-1) - -class RealIntelAgentBodyPointNavEmulSensor(Sensor): - - def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "point_nav_emul", **kwargs: Any): - observation_space = gym.spaces.Box( - low=0, high=1, shape=(1,), dtype=np.float32 - ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) - self.type = type - self.mask_sensor = mask_sensor - self.depth_sensor = depth_sensor - uuid = '{}_{}'.format(uuid, type) - - self.min_xyz = np.zeros((3)) - self.dummy_answer = torch.zeros(3) - self.dummy_answer[:] = 4 # is this good enough? - self.device = torch.device("cpu") - - - super().__init__(**prepare_locals_for_super(locals())) - def get_camera_int_ext(self,env): - #TODO all these values need to be checked - fov=max(INTEL_FOV_W, INTEL_FOV_H) #TODO are you sure? it should be smaller one I think - agent_state = env.controller.last_event.metadata['agent'] - camera_horizon = 0 - camera_xyz = np.array([agent_state['position'][k] for k in ['x','y','z']]) - camera_rotation = agent_state['rotation']['y'] - return fov, camera_horizon, camera_xyz, camera_rotation - - - - - def get_observation( - self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any - ) -> Any: - - mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) - - if task.num_steps_taken() == 0: - self.pointnav_history_aggr = [] - self.real_prev_location = None - self.belief_prev_location = None - - - fov, camera_horizon, camera_xyz, camera_rotation = self.get_camera_int_ext(env) - - - if mask.sum() != 0: - depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1) - middle_of_object = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) - self.pointnav_history_aggr.append((middle_of_object.cpu(), 1, task.num_steps_taken())) - - # result = middle_of_object.cpu() - # else: - # result = self.dummy_answer - result = self.history_aggregation(camera_xyz, camera_rotation, task.num_steps_taken()) - return result - def history_aggregation(self, camera_xyz, camera_rotation, current_step_number): - if len(self.pointnav_history_aggr) == 0: - return self.dummy_answer - else: - weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr] - total_weights = sum(weights) - total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr] - total_sum = sum(total_sum) - midpoint = total_sum / total_weights - agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0)) - midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0)) - midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state) - - distance_in_agent_coord = dict(x=midpoint_agent_coord['position']['x'], y=midpoint_agent_coord['position']['y'], z=midpoint_agent_coord['position']['z']) - - agent_centric_middle_of_object = torch.Tensor([distance_in_agent_coord['x'], distance_in_agent_coord['y'], distance_in_agent_coord['z']]) - - agent_centric_middle_of_object = agent_centric_middle_of_object - return agent_centric_middle_of_object +# def get_observation( +# self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any +# ) -> Any: + +# if self.type == 'source': +# info_to_search = 'source_object_id' +# elif self.type == 'destination': +# info_to_search = 'goal_object_id' +# #TODO remove +# mask = torch.zeros((224,224,1)) +# return mask + +# original_im = self.source_camera.get_observation(env, task, *args, **kwargs) + +# if info_to_search == self.cache['object_name']: +# if np.all(self.cache['image'] == original_im): +# return self.cache['mask'] +# else: +# self.cache = {} + + +# # TODO VERYYYYYY IMPORTANT +# im = original_im[:,:,::-1] +# # the detection requires BGR??? + +# outputs = self.predictor(im * 255.) + +# # remove +# # outputs = self.predictor(im * 255.) +# # cv2.imwrite('my_image.png',im * 255.) + +# # +# def visualize_detections(im, outputs ): +# v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]), scale=1.2) +# out = v.draw_instance_predictions(outputs["instances"].to("cpu")) +# dir = 'experiment_output/visualization_predictions' +# timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f.png") +# os.makedirs(dir, exist_ok=True) +# print('saving all detections in ', os.path.join(dir, timestamp)) +# plt.imsave(os.path.join(dir, timestamp), out.get_image()[:, :, ::-1]) + + + +# category = task.task_info[info_to_search].split('|')[0] +# assert category in THOR2COCO +# class_ind_to_look_for = self.class_labels.index(THOR2COCO[category]) +# all_predicted_labels = outputs['instances'].pred_classes +# all_predicted_bbox = outputs['instances'].pred_boxes #TODO switch to segmentation + + + +# mask = torch.zeros((im.shape[0], im.shape[1])) +# valid_boxes = [all_predicted_bbox[i] for i in range(len(all_predicted_labels)) if all_predicted_labels[i] == class_ind_to_look_for] + +# for box in valid_boxes: +# x1, y1, x2, y2 = [int(x) for x in box.tensor.squeeze()] +# mask[y1:y2, x1:x2] = 1 +# mask = torch.nn.functional.interpolate(mask.unsqueeze(0).unsqueeze(0), size=(224, 224)).squeeze(0).squeeze(0) + +# #TODO save all the masks so we don't pass the image through detectyion multiple timesact + +# # print('camera', self.source_camera, 'type', self.type, 'category', category) +# # print('observed objects', [self.class_labels[x] for x in all_predicted_labels]) + +# #TODO this can be optimized because we are passing this image twice and no point + +# # use these later for visualization purposes? + +# # plt.imsave('something.png', im) +# mask = mask.unsqueeze(-1) +# self.cache = {'object_name': info_to_search, 'image':original_im, 'mask':mask} + +# return mask + + +# class StretchObjectMask(Sensor): +# def __init__(self, type: str,noise, uuid: str = "object_mask", **kwargs: Any): +# observation_space = gym.spaces.Box( +# low=0, high=1, shape=(1,), dtype=np.float32 +# ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) +# self.type = type +# uuid = '{}_{}'.format(uuid, type) +# self.noise = noise +# self.cache = None +# super().__init__(**prepare_locals_for_super(locals())) + +# def get_observation( +# self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any +# ) -> Any: +# if env.last_image_changed is False and self.cache is not None: +# return self.cache + +# # TODO remove +# # mask = np.zeros((224, 224, 1)) +# # return mask + + +# img = env.controller.last_event.frame +# resized_image = cv2.resize(img, dsize=(224,224)) +# global input_received, center_x, center_y +# center_x, center_y = -1, -1 +# input_received = False + +# def normalize_number(x, w): +# if x < 0: +# x = 0 +# if x > w: +# x = w +# return x + +# def clear_plot(): +# fig.clear(); fig.clf(); plt.clf(); ax.cla(); fig.clear(True) +# def close_plot(): +# clear_plot() +# plt.close(fig) +# plt.close('all') +# def onclick(event): +# global center_x, center_y, input_received +# if event.button == 3: #Right click +# center_x = -1 +# center_y = -1 +# input_received = True +# close_plot() +# if event.button == 1: #Left click +# center_x = event.xdata +# center_y = event.ydata +# input_received = True +# close_plot() + +# import matplotlib +# matplotlib.use('MacOSX') +# fig, ax = plt.subplots() +# # clear_plot() +# ax.imshow(resized_image) +# cid = fig.canvas.mpl_connect('button_press_event', onclick) +# plt.show() +# fig.canvas.mpl_disconnect(cid) +# close_plot() + + +# self.window_size = 20 #TODO do I want to change the size of this one maybe? +# mask = np.zeros((224, 224, 1)) +# if center_y == -1 and center_x == -1: +# mask[:,:] = 0. +# else: +# offset = self.window_size / 2 +# object_boundaries = center_x - offset, center_y - offset, center_x + offset, center_y + offset +# x1, y1, x2, y2 = [int(normalize_number(i, 224)) for i in object_boundaries] +# mask[y1:y2, x1:x2] = 1. +# self.cache = mask +# return mask + + + +# class RealStretchPickedUpObjSensor(Sensor): +# def __init__(self, uuid: str = "pickedup_object", **kwargs: Any): +# observation_space = gym.spaces.Box( +# low=0, high=1, shape=(1,), dtype=np.float32 +# ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) +# super().__init__(**prepare_locals_for_super(locals())) + +# def get_observation( +# self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any +# ) -> Any: +# return False + +# class RealKinectArmPointNavEmulSensor(Sensor): + +# def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, arm_mask_sensor:Sensor, uuid: str = "arm_point_nav_emul", **kwargs: Any): +# observation_space = gym.spaces.Box( +# low=0, high=1, shape=(1,), dtype=np.float32 +# ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) +# self.type = type +# self.mask_sensor = mask_sensor +# self.depth_sensor = depth_sensor +# self.arm_mask_sensor = arm_mask_sensor +# uuid = '{}_{}'.format(uuid, type) + +# self.min_xyz = np.zeros((3)) + +# self.dummy_answer = torch.zeros(3) +# self.dummy_answer[:] = 4 # is this good enough? +# self.device = torch.device("cpu") +# super().__init__(**prepare_locals_for_super(locals())) +# def get_camera_int_ext(self,env): +# #TODO all these values need to be checked +# fov=max(KINECT_FOV_W, KINECT_FOV_H)#TODO are you sure? it should be smaller one I think +# agent_state = env.controller.last_event.metadata['agent'] +# camera_horizon = 45 +# camera_xyz = np.array([agent_state['position'][k] for k in ['x','y','z']]) +# camera_rotation = (agent_state['rotation']['y'] + 90) % 360 +# return fov, camera_horizon, camera_xyz, camera_rotation +# def get_observation( +# self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any +# ) -> Any: +# #TODO remove +# if self.type == 'destination': +# return self.dummy_answer +# mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) #TODO this is called multiple times? +# depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1) + +# if task.num_steps_taken() == 0: +# self.pointnav_history_aggr = [] +# self.real_prev_location = None +# self.belief_prev_location = None + + +# fov, camera_horizon, camera_xyz, camera_rotation = self.get_camera_int_ext(env) +# arm_world_coord = None + +# if mask.sum() != 0: + +# midpoint_agent_coord = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) +# if not torch.any(torch.isnan(midpoint_agent_coord) + torch.isinf(midpoint_agent_coord)): +# self.pointnav_history_aggr.append((midpoint_agent_coord.cpu(), 1, task.num_steps_taken())) + +# arm_mask = self.arm_mask_sensor.get_observation(env, task, *args, **kwargs) #TODO this is also called twice +# if arm_mask.sum() == 0: #Do we want to do some approximations or no? +# arm_world_coord = None #TODO approax for this +# else: +# arm_world_coord = get_mid_point_of_object_from_depth_and_mask(arm_mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) +# # distance_in_agent_coord = midpoint_agent_coord - arm_location_in_camera +# # result = distance_in_agent_coord.cpu() +# # if arm_mask.sum() != 0 or mask.sum() != 0: +# # +# # import cv2 +# # cv2.imwrite('/Users/kianae/Desktop/image.png', env.kinect_frame[:,:,::-1]) +# # cv2.imwrite('/Users/kianae/Desktop/mask.png', mask.squeeze().numpy() * 255) +# # cv2.imwrite('/Users/kianae/Desktop/arm_mask.png', arm_mask * 255) +# result = self.history_aggregation(camera_xyz, camera_rotation, arm_world_coord, task.num_steps_taken()) +# return result +# def history_aggregation(self, camera_xyz, camera_rotation, arm_world_coord, current_step_number): +# if len(self.pointnav_history_aggr) == 0 or arm_world_coord is None: +# return self.dummy_answer +# else: +# weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr] +# total_weights = sum(weights) +# total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr] +# total_sum = sum(total_sum) +# midpoint = total_sum / total_weights +# agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0)) +# midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0)) +# midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state) +# midpoint_agent_coord = torch.Tensor([midpoint_agent_coord['position'][k] for k in ['x','y','z']]) + +# arm_world_coord = dict(position=dict(x=arm_world_coord[0], y=arm_world_coord[1], z=arm_world_coord[2]), rotation=dict(x=0,y=0,z=0)) +# arm_state_agent_coord = convert_world_to_agent_coordinate(arm_world_coord, agent_state) +# arm_state_agent_coord = torch.Tensor([arm_state_agent_coord['position'][k] for k in ['x','y','z']]) + +# distance_in_agent_coord = midpoint_agent_coord - arm_state_agent_coord + +# return distance_in_agent_coord.cpu() + + +# class KinectArmMaskSensor(Sensor): +# def __init__(self, uuid: str = "arm_mask_kinect", **kwargs: Any): +# observation_space = gym.spaces.Box( +# low=0, high=1, shape=(1,), dtype=np.float32 +# ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) +# super().__init__(**prepare_locals_for_super(locals())) + +# def get_observation( +# self, env: ManipulaTHOREnvironment, task: Task, *args: Any, **kwargs: Any +# ) -> Any: +# kinect_raw_value = env.kinect_raw_frame +# mask = get_binary_mask_of_arm(kinect_raw_value) +# mask = normalize_real_kinect_image(mask) +# return np.expand_dims(mask, axis=-1) + +# class RealIntelAgentBodyPointNavEmulSensor(Sensor): + +# def __init__(self, type: str, mask_sensor:Sensor, depth_sensor:Sensor, uuid: str = "point_nav_emul", **kwargs: Any): +# observation_space = gym.spaces.Box( +# low=0, high=1, shape=(1,), dtype=np.float32 +# ) # (low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) +# self.type = type +# self.mask_sensor = mask_sensor +# self.depth_sensor = depth_sensor +# uuid = '{}_{}'.format(uuid, type) + +# self.min_xyz = np.zeros((3)) +# self.dummy_answer = torch.zeros(3) +# self.dummy_answer[:] = 4 # is this good enough? +# self.device = torch.device("cpu") + + +# super().__init__(**prepare_locals_for_super(locals())) +# def get_camera_int_ext(self,env): +# #TODO all these values need to be checked +# fov=max(INTEL_FOV_W, INTEL_FOV_H) #TODO are you sure? it should be smaller one I think +# agent_state = env.controller.last_event.metadata['agent'] +# camera_horizon = 0 +# camera_xyz = np.array([agent_state['position'][k] for k in ['x','y','z']]) +# camera_rotation = agent_state['rotation']['y'] +# return fov, camera_horizon, camera_xyz, camera_rotation + + + + +# def get_observation( +# self, env: IThorEnvironment, task: Task, *args: Any, **kwargs: Any +# ) -> Any: + +# mask = (self.mask_sensor.get_observation(env, task, *args, **kwargs)) + +# if task.num_steps_taken() == 0: +# self.pointnav_history_aggr = [] +# self.real_prev_location = None +# self.belief_prev_location = None + + +# fov, camera_horizon, camera_xyz, camera_rotation = self.get_camera_int_ext(env) + + +# if mask.sum() != 0: +# depth_frame_original = self.depth_sensor.get_observation(env, task, *args, **kwargs).squeeze(-1) +# middle_of_object = get_mid_point_of_object_from_depth_and_mask(mask, depth_frame_original, self.min_xyz, camera_xyz, camera_rotation, camera_horizon, fov, self.device) +# self.pointnav_history_aggr.append((middle_of_object.cpu(), 1, task.num_steps_taken())) + +# # result = middle_of_object.cpu() +# # else: +# # result = self.dummy_answer +# result = self.history_aggregation(camera_xyz, camera_rotation, task.num_steps_taken()) +# return result +# def history_aggregation(self, camera_xyz, camera_rotation, current_step_number): +# if len(self.pointnav_history_aggr) == 0: +# return self.dummy_answer +# else: +# weights = [1. / (current_step_number + 1 - num_steps) for mid,num_pixels,num_steps in self.pointnav_history_aggr] +# total_weights = sum(weights) +# total_sum = [mid * (1. / (current_step_number + 1 - num_steps)) for mid,num_pixels,num_steps in self.pointnav_history_aggr] +# total_sum = sum(total_sum) +# midpoint = total_sum / total_weights +# agent_state = dict(position=dict(x=camera_xyz[0], y=camera_xyz[1], z=camera_xyz[2], ), rotation=dict(x=0, y=camera_rotation, z=0)) +# midpoint_position_rotation = dict(position=dict(x=midpoint[0], y=midpoint[1], z=midpoint[2]), rotation=dict(x=0,y=0,z=0)) +# midpoint_agent_coord = convert_world_to_agent_coordinate(midpoint_position_rotation, agent_state) + +# distance_in_agent_coord = dict(x=midpoint_agent_coord['position']['x'], y=midpoint_agent_coord['position']['y'], z=midpoint_agent_coord['position']['z']) + +# agent_centric_middle_of_object = torch.Tensor([distance_in_agent_coord['x'], distance_in_agent_coord['y'], distance_in_agent_coord['z']]) + +# agent_centric_middle_of_object = agent_centric_middle_of_object +# return agent_centric_middle_of_object diff --git a/utils/stretch_utils/stretch_ithor_arm_environment.py b/utils/stretch_utils/stretch_ithor_arm_environment.py index 51e3394..450a411 100644 --- a/utils/stretch_utils/stretch_ithor_arm_environment.py +++ b/utils/stretch_utils/stretch_ithor_arm_environment.py @@ -217,14 +217,14 @@ def check_controller_version(self, controller=None): controller_to_check = controller if controller_to_check is None: controller_to_check = self.controller - if STRETCH_MANIPULATHOR_COMMIT_ID is not None: - assert ( - self.env_args['commit_id'] in controller_to_check._build.url - ), "Build number is not right, {} vs {}, use pip3 install -e git+https://github.com/allenai/ai2thor.git@{}#egg=ai2thor".format( - controller_to_check._build.url, - STRETCH_MANIPULATHOR_COMMIT_ID, - STRETCH_MANIPULATHOR_COMMIT_ID, - ) + # if STRETCH_MANIPULATHOR_COMMIT_ID is not None: + # assert ( + # self.env_args['commit_id'] in controller_to_check._build.url + # ), "Build number is not right, {} vs {}, use pip3 install -e git+https://github.com/allenai/ai2thor.git@{}#egg=ai2thor".format( + # controller_to_check._build.url, + # STRETCH_MANIPULATHOR_COMMIT_ID, + # STRETCH_MANIPULATHOR_COMMIT_ID, + # ) def create_controller(self): assert 'commit_id' in self.env_args diff --git a/utils/stretch_utils/stretch_visualizer.py b/utils/stretch_utils/stretch_visualizer.py index a56f23a..6141c4a 100644 --- a/utils/stretch_utils/stretch_visualizer.py +++ b/utils/stretch_utils/stretch_visualizer.py @@ -194,7 +194,7 @@ def finish_episode(self, environment, episode_info, task_info): gif_name = ( f"{time_to_write}_room_{room_name}_to_{source_obj_type}_episode_{episode_success_offset}.gif" ) - self.log_queue = put_action_on_image(self.log_queue, self.action_queue[1:]) + # self.log_queue = put_action_on_image(self.log_queue, self.action_queue[1:]) addition_texts = ['xxx'] + [str(x) for x in episode_info.agent_body_dist_to_obj] if not addition_texts == ['xxx']: self.log_queue = put_additional_text_on_image(self.log_queue, addition_texts) From 1e020b8d40e59b5a284a6c8a40e4a79333a5be18 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Wed, 1 Jun 2022 17:05:44 -0700 Subject: [PATCH 29/53] support for default and locobot agents on objectnav, addition of single-camera ithor-style robothor testing to compare with procthor-style robothor testing --- ...nav_for_procthor_clip_resnet50_rgb_only.py | 8 +- .../obj_nav_2camera_robothor_narrow.py | 0 .../real_robothor_objectnav.py | 0 .../robothor/test_robothor_ithorstyle.py | 74 ++++++++ .../test_robothor_procthorstyle.py | 12 +- .../procthor_object_nav_task_samplers.py | 58 ++++-- utils/stretch_utils/stretch_constants.py | 1 + .../stretch_ithor_arm_environment.py | 45 +++-- .../stretch_ithor_arm_environment_minimal.py | 178 ++++++++++++++++++ .../stretch_utils/stretch_object_nav_tasks.py | 2 +- utils/stretch_utils/stretch_visualizer.py | 2 +- 11 files changed, 343 insertions(+), 37 deletions(-) rename manipulathor_baselines/stretch_object_nav_baselines/experiments/{ithor => robothor}/obj_nav_2camera_robothor_narrow.py (100%) rename manipulathor_baselines/stretch_object_nav_baselines/experiments/{real_robothor => robothor}/real_robothor_objectnav.py (100%) create mode 100644 manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_ithorstyle.py rename manipulathor_baselines/stretch_object_nav_baselines/experiments/{real_robothor => robothor}/test_robothor_procthorstyle.py (84%) create mode 100644 utils/stretch_utils/stretch_ithor_arm_environment_minimal.py diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index a3f44f5..0b8890c 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -14,6 +14,7 @@ from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment +from utils.stretch_utils.stretch_ithor_arm_environment_minimal import MinimalStretchManipulaTHOREnvironment from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler @@ -54,7 +55,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( MAX_STEPS = 500 if platform.system() == "Darwin": - MAX_STEPS = 100 + MAX_STEPS = 200 NUM_TRAIN_HOUSES = 100 SENSORS += [ RGBSensorThor( @@ -67,7 +68,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( ), ] - NUM_PROCESSES = 40 + NUM_PROCESSES = 32 TASK_SAMPLER = ProcTHORObjectNavTaskSampler TASK_TYPE = StretchNeckedObjectNavTask @@ -82,12 +83,13 @@ def __init__(self): super().__init__() self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) + self.ENV_ARGS['agentMode']='locobot' self.ENV_ARGS['p_randomize_material'] = 0.8 self.ENV_ARGS['visibilityDistance'] = self.distance_thr self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice self.ENV_ARGS['renderInstanceSegmentation'] = False self.ENV_ARGS['renderDepthImage'] = False - self.ENV_ARGS['allow_flipping'] = False + self.ENV_ARGS['allow_flipping'] = True self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin( sensors=self.SENSORS, diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/obj_nav_2camera_robothor_narrow.py similarity index 100% rename from manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_robothor_narrow.py rename to manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/obj_nav_2camera_robothor_narrow.py diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py similarity index 100% rename from manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/real_robothor_objectnav.py rename to manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_ithorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_ithorstyle.py new file mode 100644 index 0000000..3066a0c --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_ithorstyle.py @@ -0,0 +1,74 @@ +import platform +import random +import yaml + +from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor +from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor + +from utils.stretch_utils.stretch_object_nav_tasks import StretchNeckedObjectNavTask + + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.ithor.obj_nav_2camera_ithor_wide import \ + ithorObjectNavClipResnet50RGBOnly2CameraWideFOV +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor + +from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL + + + +class RoboTHORObjectNavTestiTHORstyleSingleCam( + ithorObjectNavClipResnet50RGBOnly2CameraWideFOV +): + """An Object Navigation experiment configuration in iThor with RGB + input.""" + + TRAIN_SCENES = ROBOTHOR_TRAIN + TEST_SCENES = ROBOTHOR_VAL + # OBJECT_TYPES = list(set([v for room_typ, obj_list in FULL_LIST_OF_OBJECTS.items() for v in obj_list if room_typ == 'robothor'])) + # OBJECT_TYPES.sort() + + with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: + OBJECT_TYPES=yaml.safe_load(f) + + random.shuffle(TRAIN_SCENES) + random.shuffle(TEST_SCENES) + + SENSORS = [ + RGBSensorThor( + height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres", + ), + GoalObjectTypeThorSensor( + object_types=OBJECT_TYPES, + ), + ] + + if platform.system() == "Darwin": + SENSORS += [ + RGBSensorThor( + height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + use_resnet_normalization=True, + mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + uuid="rgb_lowres_only_viz", + ), + ] + + TASK_TYPE = StretchNeckedObjectNavTask + MAX_STEPS = 200 + + def __init__(self): + super().__init__() + self.ENV_ARGS['agentMode']='default' + # maybe - include updated procthor commit ID? check if it makes a difference + + + + + + diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py similarity index 84% rename from manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/test_robothor_procthorstyle.py rename to manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index 3b93d5e..8e2b0c2 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/real_robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -3,20 +3,26 @@ import ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide import \ ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only import \ + ProcTHORObjectNavClipResnet50RGBOnly from utils.procthor_utils.procthor_object_nav_task_samplers import RoboThorObjectNavTestTaskSampler -from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask, ObjectNavTask, StretchNeckedObjectNavTask + +from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment + from manipulathor_utils.debugger_util import ForkedPdb -class ObjectNavRoboTHORConfig(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): +class ObjectNavTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): EVAL_TASKS = datasets.load_dataset( f"allenai/robothor-objectnav-eval", use_auth_token=True ) TASK_SAMPLER = RoboThorObjectNavTestTaskSampler - TASK_TYPE = StretchObjectNavTask + TASK_TYPE = StretchNeckedObjectNavTask + ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment TEST_ON_VALIDATION = True @classmethod diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index a57a277..e763eb8 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -160,7 +160,10 @@ def get_nearest_positions(self, world_position: Vector3) -> List[Vector3]: ] def get_nearest_agent_height(self, y_coordinate: float) -> float: - return 1.5759992 # from default agent - is guess. TODO check stretch + if self.env_args['agentMode'] == 'locobot': + return 0.8697997 + else: + return 1.5759992 # from default agent - is guess. TODO check stretch @property def house_index(self) -> int: @@ -300,20 +303,39 @@ def increment_scene(self) -> bool: self.env.controller.step( action="CreateHouse", house=self.house, raise_for_failure=True ) - self.env.controller.step("ResetObjectFilter") - - #TODO dude this is ugly! - pose = self.house["metadata"]["agent"].copy() - event = self.env.controller.step(action="TeleportFull", **pose) - if not event: - get_logger().warning(f"Initial teleport failing in {self.house_index}.") # clear logger noise - return False #TODO this can mess FPS - self.env.controller.step(action="MakeAllObjectsMoveable") - self.env.controller.step(action="MakeObjectsStaticKinematicMassThreshold") - make_all_objects_unbreakable(self.env.controller) + # self.env.controller.step("ResetObjectFilter") + + # #TODO dude this is ugly! + # pose = self.house["metadata"]["agent"].copy() + # event = self.env.controller.step(action="TeleportFull", **pose) + # if not event: + # get_logger().warning(f"Initial teleport failing in {self.house_index}.") # clear logger noise + # return False #TODO this can mess FPS + # self.env.controller.step(action="MakeAllObjectsMoveable") + # self.env.controller.step(action="MakeObjectsStaticKinematicMassThreshold") + # make_all_objects_unbreakable(self.env.controller) # NOTE: Set reachable positions + # if self.house_index not in self.reachable_positions_map: + # rp_event = self.env.controller.step(action="GetReachablePositions") + # if not rp_event: + # # NOTE: Skip scenes where GetReachablePositions fails + # get_logger().warning( + # f"GetReachablePositions failed in {self.house_index}" + # ) + # return False + # reachable_positions = rp_event.metadata["actionReturn"] + # self.reachable_positions_map[self.house_index] = reachable_positions + if self.house_index not in self.reachable_positions_map: + pose = self.house["metadata"]["agent"].copy() + # ForkedPdb().set_trace() + if self.env_args['agentMode'] == 'locobot': + del pose["standing"] + event = self.env.controller.step(action="TeleportFull", **pose) + if not event: + get_logger().warning(f"Initial teleport failing in {self.house_index}.") + return False rp_event = self.env.controller.step(action="GetReachablePositions") if not rp_event: # NOTE: Skip scenes where GetReachablePositions fails @@ -323,8 +345,8 @@ def increment_scene(self) -> bool: return False reachable_positions = rp_event.metadata["actionReturn"] self.reachable_positions_map[self.house_index] = reachable_positions - return True + def increment_scene_index(self): self.house_inds_index = (self.house_inds_index + 1) % len(self.house_inds) @@ -376,8 +398,11 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject position=random.choice(self.reachable_positions), rotation=Vector3(x=0, y=random.choice(self.valid_rotations), z=0), horizon=0, - standing=True, + # standing=standing, ) + # ForkedPdb().set_trace() + if self.env_args['agentMode'] != 'locobot': + starting_pose['standing']=True event = self.env.controller.step(action="TeleportFull", **starting_pose) if attempts > 10: get_logger().error(f"Teleport failed {attempts-1} times in house {self.house_index} - something may be wrong") @@ -447,7 +472,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject if self.last_house_idx is None or self.last_house_idx != ep["scene"]: self.last_scene = ep["scene"] - self.env.reset(ep["scene"]) + self.env.controller.reset(ep["scene"]) # NOTE: not using ep["targetObjectIds"] due to floating points with # target objects moving. @@ -463,7 +488,8 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject raise_for_failure=True, ) ep["agentPose"]["horizon"] = 0 # reset for stretch agent - ep["agentPose"]["standing"] = True + if self.env_args['agentMode'] != 'locobot': + ep["agentPose"]["standing"] = True event = self.env.controller.step(action="TeleportFull", **ep["agentPose"]) if not event: # NOTE: Skip scenes where TeleportFull fails. diff --git a/utils/stretch_utils/stretch_constants.py b/utils/stretch_utils/stretch_constants.py index 5cdbc06..88ad531 100644 --- a/utils/stretch_utils/stretch_constants.py +++ b/utils/stretch_utils/stretch_constants.py @@ -16,6 +16,7 @@ STRETCH_MANIPULATHOR_COMMIT_ID = '09b6ccf74558395a231927dee8be3b8c83b52ef7' #bigger fov # PROCTHOR_COMMIT_ID = '0eddca46783a788bfce69b146c496d931c981ae4' PROCTHOR_COMMIT_ID = '996a369b5484c7037d3737906be81b84a52473a0' #after the arm destroy bug +UPDATED_PROCTHOR_COMMIT_ID = 'fd95db6135273a8ab2e35d4d350dd556c8ced655' STRETCH_ENV_ARGS = dict( gridSize=0.25, diff --git a/utils/stretch_utils/stretch_ithor_arm_environment.py b/utils/stretch_utils/stretch_ithor_arm_environment.py index 450a411..21af8a6 100644 --- a/utils/stretch_utils/stretch_ithor_arm_environment.py +++ b/utils/stretch_utils/stretch_ithor_arm_environment.py @@ -121,6 +121,8 @@ def __init__( self.MEMORY_SIZE = 5 # self.memory_frames = [] + self.list_of_actions_so_far = [] + if "quality" not in self.env_args: self.env_args["quality"] = self._quality @@ -185,19 +187,23 @@ def reset( scene_name = self.controller.last_event.metadata["sceneName"] # self.reset_init_params()#**kwargs) removing this fixes one of the crashing problem - # to solve the crash issue - # why do we still have this crashing problem? - try: - reset_environment_and_additional_commands(self.controller, scene_name) - except Exception as e: - print("RESETTING THE SCENE,", scene_name, 'because of', str(e)) - self.controller = self.create_controller() - reset_environment_and_additional_commands(self.controller, scene_name) - - if self.object_open_speed != 1.0: - self.controller.step( - {"action": "ChangeOpenSpeed", "x": self.object_open_speed} - ) + if self.env_args['agentMode'] is not 'stretch': + self.controller.reset(scene_name) + else: + + # to solve the crash issue + # why do we still have this crashing problem? + try: + reset_environment_and_additional_commands(self.controller, scene_name) + except Exception as e: + print("RESETTING THE SCENE,", scene_name, 'because of', str(e)) + self.controller = self.create_controller() + reset_environment_and_additional_commands(self.controller, scene_name) + + if self.object_open_speed != 1.0: + self.controller.step( + {"action": "ChangeOpenSpeed", "x": self.object_open_speed} + ) self._initially_reachable_points = None self._initially_reachable_points_set = None @@ -300,6 +306,19 @@ def step( action = typing.cast(str, action_dict["action"]) + if self.env_args['agentMode']=='locobot' or self.env_args['agentMode']=='default': + sr = self.controller.step(action_dict) + self.list_of_actions_so_far.append(action_dict) + + if self._verbose: + print(self.controller.last_event) + + if self.restrict_to_initially_reachable_points: + self._snap_agent_to_initially_reachable() + + return sr + + skip_render = "renderImage" in action_dict and not action_dict["renderImage"] last_frame: Optional[np.ndarray] = None if skip_render: diff --git a/utils/stretch_utils/stretch_ithor_arm_environment_minimal.py b/utils/stretch_utils/stretch_ithor_arm_environment_minimal.py new file mode 100644 index 0000000..687685f --- /dev/null +++ b/utils/stretch_utils/stretch_ithor_arm_environment_minimal.py @@ -0,0 +1,178 @@ +import typing +from typing import Dict, Union, Optional +import numpy as np + +import ai2thor.server +from ai2thor.controller import Controller +from allenact.utils.system import get_logger + +from ithor_arm.ithor_arm_environment import ManipulaTHOREnvironment +from allenact_plugins.ithor_plugin.ithor_environment import IThorEnvironment + +from utils.stretch_utils.stretch_constants import ( + DONE, MOVE_AHEAD, ROTATE_RIGHT, ROTATE_LEFT, MOVE_BACK, +) +from scripts.stretch_jupyter_helper import AGENT_MOVEMENT_CONSTANT, AGENT_ROTATION_DEG, remove_nan_inf_for_frames +from utils.stretch_utils.stretch_sim2real_utils import kinect_reshape, intel_reshape + + +class MinimalStretchManipulaTHOREnvironment(ManipulaTHOREnvironment): + + @property + def kinect_frame(self) -> np.ndarray: + """Returns rgb image corresponding to the agent's egocentric view.""" + frame = self.controller.last_event.third_party_camera_frames[0].copy() + frame = remove_nan_inf_for_frames(frame, 'kinect_frame') + return kinect_reshape(frame) + @property + def kinect_depth(self) -> np.ndarray: + """Returns rgb image corresponding to the agent's egocentric view.""" + if self.controller.last_event.third_party_depth_frames[0]: + return None + depth_frame = self.controller.last_event.third_party_depth_frames[0].copy() + depth_frame = remove_nan_inf_for_frames(depth_frame, 'depth_kinect') + + # #TODO remove + if np.sum(depth_frame != self.controller.last_event.third_party_depth_frames[0].copy()) > 10: + raise Exception('Depth is nan again even after removing nan?') + + return kinect_reshape(depth_frame) + + @property + def intel_frame(self) -> np.ndarray: + """Returns rgb image corresponding to the agent's egocentric view.""" + frame = self.controller.last_event.frame.copy() + frame = remove_nan_inf_for_frames(frame, 'intel_frame') + return intel_reshape(frame) + @property + def intel_depth(self) -> np.ndarray: + """Returns rgb image corresponding to the agent's egocentric view.""" + if self.controller.last_event.depth_frame is None: + return None + depth_frame = self.controller.last_event.depth_frame.copy() + depth_frame = remove_nan_inf_for_frames(depth_frame, 'depth_intel') + return intel_reshape(depth_frame) + + def step( + self, action_dict: Dict[str, Union[str, int, float]] + ) -> ai2thor.server.Event: + """Take a step in the ai2thor environment.""" + + action = typing.cast(str, action_dict["action"]) + + skip_render = "renderImage" in action_dict and not action_dict["renderImage"] + last_frame: Optional[np.ndarray] = None + if skip_render: + last_frame = self.current_frame + + if self.simplify_physics: + action_dict["simplifyOPhysics"] = True + if action in [DONE]: + action_dict = { + 'action': 'Pass' + } # we have to change the last action success if the pik up fails, we do that in the task now + + elif action in [MOVE_AHEAD, MOVE_BACK, ROTATE_LEFT, ROTATE_RIGHT]: + if action == MOVE_AHEAD: + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = AGENT_MOVEMENT_CONSTANT + elif action == MOVE_BACK: + action_dict["action"] = "MoveAgent" + action_dict["ahead"] = -AGENT_MOVEMENT_CONSTANT + elif action == ROTATE_RIGHT: + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = AGENT_ROTATION_DEG + + elif action == ROTATE_LEFT: + action_dict["action"] = "RotateAgent" + action_dict["degrees"] = -AGENT_ROTATION_DEG + + + sr = self.controller.step(action_dict) + self.list_of_actions_so_far.append(action_dict) + + if self._verbose: + print(self.controller.last_event) + + if self.restrict_to_initially_reachable_points: + self._snap_agent_to_initially_reachable() + + if skip_render: + assert last_frame is not None + self.last_event.frame = last_frame + + return sr + + def create_controller(self): + assert 'commit_id' in self.env_args, 'No commit id is specified' + controller = Controller(**self.env_args) + return controller + + def start( + self, + scene_name: Optional[str], + move_mag: float = 0.25, + **kwargs, + ) -> None: + """Starts the ai2thor controller if it was previously stopped. + + After starting, `reset` will be called with the scene name and move magnitude. + + # Parameters + + scene_name : The scene to load. + move_mag : The amount of distance the agent moves in a single `MoveAhead` step. + kwargs : additional kwargs, passed to reset. + """ + if self._started: + raise RuntimeError( + "Trying to start the environment but it is already started." + ) + + self.controller = self.create_controller() + + if ( + self._start_player_screen_height, + self._start_player_screen_width, + ) != self.current_frame.shape[:2]: + self.controller.step( + { + "action": "ChangeResolution", + "x": self._start_player_screen_width, + "y": self._start_player_screen_height, + } + ) + + self._started = True + self.reset(scene_name=scene_name, move_mag=move_mag, **kwargs) + + def reset( + self, + scene_name: Optional[str], + move_mag: float = 0.25, + **kwargs, + ): + self._move_mag = move_mag + self._grid_size = self._move_mag + # self.memory_frames = [] + + if scene_name is None: + scene_name = self.controller.last_event.metadata["sceneName"] + + if self.object_open_speed != 1.0: + self.controller.step( + {"action": "ChangeOpenSpeed", "x": self.object_open_speed} + ) + + self._initially_reachable_points = None + self._initially_reachable_points_set = None + self.controller.step({"action": "GetReachablePositions"}) + if not self.controller.last_event.metadata["lastActionSuccess"]: + get_logger().warning( + "Error when getting reachable points: {}".format( + self.controller.last_event.metadata["errorMessage"] + ) + ) + self._initially_reachable_points = self.last_action_return + + self.list_of_actions_so_far = [] \ No newline at end of file diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index 15e2796..5ea5c7c 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -405,7 +405,7 @@ class StretchObjectNavTask(ObjectNavTask): class StretchNeckedObjectNavTask(ObjectNavTask): _actions = ( MOVE_AHEAD, - MOVE_BACK, + # MOVE_BACK, ROTATE_RIGHT, ROTATE_LEFT, "LookUp", diff --git a/utils/stretch_utils/stretch_visualizer.py b/utils/stretch_utils/stretch_visualizer.py index 6141c4a..2ab6ea6 100644 --- a/utils/stretch_utils/stretch_visualizer.py +++ b/utils/stretch_utils/stretch_visualizer.py @@ -207,7 +207,7 @@ def finish_episode(self, environment, episode_info, task_info): else: scene = this_controller.last_event.metadata["sceneName"] - reset_environment_and_additional_commands(this_controller, scene) + # reset_environment_and_additional_commands(this_controller, scene) # RH: why is there a reset in the visualizer? self.log_queue = [] self.action_queue = [] From b3731d04f2c7f77ab4d7129707061bdfbbf6d8ce Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Fri, 3 Jun 2022 10:04:09 -0700 Subject: [PATCH 30/53] add multiserver training, update experiment tagging and easily swapped agents, verify arm stowed and increase visibility distance for stretch --- .../procthor/obj_nav_for_procthor.py | 4 +- ...nav_for_procthor_clip_resnet50_rgb_only.py | 11 ++-- .../robothor/test_robothor_procthorstyle.py | 19 +++---- .../test_robothor_procthorstyle_distrib.py | 54 +++++++++++++++++++ scripts/run_same_commands_servers.py | 15 +++++- .../procthor_object_nav_task_samplers.py | 45 ++++------------ .../stretch_utils/stretch_object_nav_tasks.py | 1 + 7 files changed, 94 insertions(+), 55 deletions(-) create mode 100644 manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py index 56d2fb1..e33148f 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py @@ -27,7 +27,7 @@ from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler from utils.stretch_utils.stretch_object_nav_tasks import ObjectNavTask from utils.procthor_utils.procthor_helper import PROCTHOR_INVALID_SCENES -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID +from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, UPDATED_PROCTHOR_COMMIT_ID from manipulathor_utils.debugger_util import ForkedPdb @@ -134,7 +134,7 @@ def _get_sampler_args_for_scene_split( out["cap_training"] = self.CAP_TRAINING out["env_args"]["x_display"] = x_display - out["env_args"]['commit_id'] = PROCTHOR_COMMIT_ID + out["env_args"]['commit_id'] = UPDATED_PROCTHOR_COMMIT_ID#PROCTHOR_COMMIT_ID out["env_args"]['scene'] = 'Procedural' out["env_args"]["branch"] = "nanna" if allow_flipping is not None: diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index 0b8890c..594ad70 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -38,7 +38,9 @@ class ProcTHORObjectNavClipResnet50RGBOnly( OBJECT_TYPES=yaml.safe_load(f) NOISE_LEVEL = 0 - distance_thr = 1.0 # match procthor config + distance_thr = 4.0 # set larger for stretch work + WHICH_AGENT = 'stretch' # 'locobot' 'default' + SENSORS = [ RGBSensorThor( height=ProcTHORObjectNavBaseConfig.SCREEN_SIZE, @@ -83,7 +85,7 @@ def __init__(self): super().__init__() self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) - self.ENV_ARGS['agentMode']='locobot' + self.ENV_ARGS['agentMode']=self.WHICH_AGENT self.ENV_ARGS['p_randomize_material'] = 0.8 self.ENV_ARGS['visibilityDistance'] = self.distance_thr self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice @@ -107,6 +109,9 @@ def create_model(self, **kwargs) -> nn.Module: visualize=self.VISUALIZE ) + def get_agent(self): + return self.ENV_ARGS['agentMode'] + @classmethod def tag(cls): - return cls.__name__ + return cls.TASK_TYPE.__name__ + '-RGB-SingleCam-ProcTHOR' + '-' + cls.WHICH_AGENT diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index 8e2b0c2..ab10138 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -15,25 +15,19 @@ -class ObjectNavTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): +class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): EVAL_TASKS = datasets.load_dataset( f"allenai/robothor-objectnav-eval", use_auth_token=True ) - TASK_SAMPLER = RoboThorObjectNavTestTaskSampler + TEST_TASK_SAMPLER = RoboThorObjectNavTestTaskSampler TASK_TYPE = StretchNeckedObjectNavTask ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment TEST_ON_VALIDATION = True @classmethod def tag(cls): - return super().tag() + "-RoboTHOR-Target" - - # def make_sampler_fn(self, task_sampler_args: TaskSamplerArgs, **kwargs): - # if task_sampler_args.mode == "eval": - # return ObjectNavTestTaskSampler(args=task_sampler_args) - # else: - # return ObjectNavTaskSampler(args=task_sampler_args) + return super().tag() + "-RoboTHOR-Test" @classmethod def make_sampler_fn(cls, **kwargs): @@ -48,10 +42,11 @@ def make_sampler_fn(cls, **kwargs): ] kwargs["visualizers"] = visualizers kwargs["exp_name"] = exp_name_w_time - # ForkedPdb().set_trace() - assert kwargs["sampler_mode"] == "eval" - return cls.TASK_SAMPLER(**kwargs) + if kwargs["sampler_mode"] == "train": + return cls.TASK_SAMPLER(**kwargs) + else: + return cls.TEST_TASK_SAMPLER(**kwargs) def valid_task_sampler_args(self, **kwargs): out = self._get_sampler_args_for_scene_split( diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py new file mode 100644 index 0000000..1c38824 --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py @@ -0,0 +1,54 @@ +import torch + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.robothor.test_robothor_procthorstyle import \ + ObjectNavRoboTHORTestProcTHORstyle + + +class ObjectNavRoboTHORTestProcTHORstyleDistrib( + ObjectNavRoboTHORTestProcTHORstyle +): + NUM_PROCESSES = 40 # one them crashed for space? + def __init__( + self, + distributed_nodes: int = 1, + ): + super().__init__() + self.distributed_nodes = distributed_nodes + self.train_gpu_ids = tuple(range(torch.cuda.device_count())) # should I do this for everyone?, should i add val + + def machine_params(self, mode="train", **kwargs): + params = super().machine_params(mode, **kwargs) + + if mode == "train": + params.devices = params.devices * self.distributed_nodes + params.nprocesses = params.nprocesses * self.distributed_nodes + params.sampler_devices = params.sampler_devices * self.distributed_nodes + + if "machine_id" in kwargs: + machine_id = kwargs["machine_id"] + assert ( + 0 <= machine_id < self.distributed_nodes + ), f"machine_id {machine_id} out of range [0, {self.distributed_nodes - 1}]" + + local_worker_ids = list( + range( + len(self.train_gpu_ids) * machine_id, + len(self.train_gpu_ids) * (machine_id + 1), + ) + ) + + params.set_local_worker_ids(local_worker_ids) + + # Confirm we're setting up train params nicely: + print( + f"devices {params.devices}" + f"\nnprocesses {params.nprocesses}" + f"\nsampler_devices {params.sampler_devices}" + f"\nlocal_worker_ids {params.local_worker_ids}" + ) + elif mode == "valid": + # Use all GPUs at their maximum capacity for training + # (you may run validation in a separate machine) + params.nprocesses = (0,) + + return params \ No newline at end of file diff --git a/scripts/run_same_commands_servers.py b/scripts/run_same_commands_servers.py index 00377e9..a9b9032 100644 --- a/scripts/run_same_commands_servers.py +++ b/scripts/run_same_commands_servers.py @@ -267,10 +267,21 @@ # --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ # --seed 10 --machine_id 0 --extra_tag armpointnav_with_ProcTHOR_RGBonly_after_fix_clip ' -command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib \ +# command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/procthor_baselines/experiments/ithor/obj_nav_2camera_procthor_wide_distrib \ +# --distributed_ip_and_port IP_ADR:6060 \ +# --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ +# --seed 10 --machine_id 0 --extra_tag prcthor_obj_nav ' + +# command_aws5 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOVDistrib/finetune_robothor/2022-05-27_21-00-03/exp_RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOVDistrib_finetune_robothor__stage_02__steps_000160201650.pt ~/; ./manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch/experiments/ithor/kiana_obj_nav_2camera_procthor_wide_distrib \ +# --distributed_ip_and_port IP_ADR:6060 \ +# --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ +# --seed 10 --machine_id 0 --extra_tag kiana_objnav_outside_room_wide -c ~/exp_ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVDistrib_prcthor_obj_nav__stage_02__steps_000100123710.pt ' + +command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ --distributed_ip_and_port IP_ADR:6060 \ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ - --seed 10 --machine_id 0 --extra_tag prcthor_obj_nav ' + --seed 10 --machine_id 0' + # command = command_aws5 diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index e763eb8..1fa9228 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -22,7 +22,7 @@ from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask from utils.stretch_utils.stretch_constants import ADITIONAL_ARM_ARGS from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment -from scripts.stretch_jupyter_helper import make_all_objects_unbreakable +from scripts.stretch_jupyter_helper import get_relative_stretch_current_arm_state from manipulathor_utils.debugger_util import ForkedPdb @@ -162,6 +162,8 @@ def get_nearest_positions(self, world_position: Vector3) -> List[Vector3]: def get_nearest_agent_height(self, y_coordinate: float) -> float: if self.env_args['agentMode'] == 'locobot': return 0.8697997 + elif self.env_args['agentMode'] == 'stretch': + return 1.27 # to intel camera, measured physical else: return 1.5759992 # from default agent - is guess. TODO check stretch @@ -303,33 +305,9 @@ def increment_scene(self) -> bool: self.env.controller.step( action="CreateHouse", house=self.house, raise_for_failure=True ) - # self.env.controller.step("ResetObjectFilter") - - # #TODO dude this is ugly! - # pose = self.house["metadata"]["agent"].copy() - # event = self.env.controller.step(action="TeleportFull", **pose) - # if not event: - # get_logger().warning(f"Initial teleport failing in {self.house_index}.") # clear logger noise - # return False #TODO this can mess FPS - # self.env.controller.step(action="MakeAllObjectsMoveable") - # self.env.controller.step(action="MakeObjectsStaticKinematicMassThreshold") - # make_all_objects_unbreakable(self.env.controller) - - # NOTE: Set reachable positions - # if self.house_index not in self.reachable_positions_map: - # rp_event = self.env.controller.step(action="GetReachablePositions") - # if not rp_event: - # # NOTE: Skip scenes where GetReachablePositions fails - # get_logger().warning( - # f"GetReachablePositions failed in {self.house_index}" - # ) - # return False - # reachable_positions = rp_event.metadata["actionReturn"] - # self.reachable_positions_map[self.house_index] = reachable_positions if self.house_index not in self.reachable_positions_map: pose = self.house["metadata"]["agent"].copy() - # ForkedPdb().set_trace() if self.env_args['agentMode'] == 'locobot': del pose["standing"] event = self.env.controller.step(action="TeleportFull", **pose) @@ -345,6 +323,12 @@ def increment_scene(self) -> bool: return False reachable_positions = rp_event.metadata["actionReturn"] self.reachable_positions_map[self.house_index] = reachable_positions + + # verify the stretch arm is stowed + if self.env_args['agentMode'] == 'stretch': + arm_pos = get_relative_stretch_current_arm_state(self.env.controller) + assert abs(sum(arm_pos.values())) < 0.001 + return True @@ -398,9 +382,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject position=random.choice(self.reachable_positions), rotation=Vector3(x=0, y=random.choice(self.valid_rotations), z=0), horizon=0, - # standing=standing, ) - # ForkedPdb().set_trace() if self.env_args['agentMode'] != 'locobot': starting_pose['standing']=True event = self.env.controller.step(action="TeleportFull", **starting_pose) @@ -446,15 +428,6 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.last_scene = None - # # visualize 1/10 episodes - # self.epids_to_visualize = set( - # np.linspace( - # 0, self.reset_tasks, num=min(self.reset_tasks // 10, 4), dtype=np.uint8 - # ).tolist() - # ) - # self.args.controller_args = self.args.controller_args.copy() - # self.env_args["procedural"] = False - def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObjectNavTask]: while True: # NOTE: Stopping condition diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index 5ea5c7c..642ced6 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -218,6 +218,7 @@ def _is_goal_in_range(self) -> bool: obj for obj in self.env.last_event.metadata["objects"] if obj["visible"] and obj["objectType"] == self.task_info["object_type"] + and self.dist_to_target_func() < 1.5 # only applies if viz distance is greater than 1.5 ) def shaping(self) -> float: From 8d5f5deb682b275138afee00b239ec3f2392322b Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Mon, 6 Jun 2022 10:18:19 -0700 Subject: [PATCH 31/53] debugging updates, distributed eval, update server machine param def to be modeled on procthor repo instead as experiment, add exact procthor model + preprocessor and action order for weights loading --- .../procthor/obj_nav_for_procthor.py | 8 +- ...nav_for_procthor_clip_resnet50_rgb_only.py | 8 +- .../robothor/test_robothor_procthorstyle.py | 10 +- .../test_robothor_procthorstyle_distrib.py | 150 ++++++++++++++---- .../clip_resnet_ncamera_preprocess_mixin.py | 141 +++++++++++++++- scripts/run_same_commands_servers.py | 11 +- .../procthor_object_nav_task_samplers.py | 2 + .../stretch_utils/stretch_object_nav_tasks.py | 11 ++ 8 files changed, 293 insertions(+), 48 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py index e33148f..e3cff44 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py @@ -109,7 +109,11 @@ def _get_sampler_args_for_scene_split( ) -> Dict[str, Any]: house_inds = list(range(len(houses))) - scenes = [str(h) for h in house_inds if h not in PROCTHOR_INVALID_SCENES] + if mode is "train": + scenes = [str(h) for h in house_inds if h not in PROCTHOR_INVALID_SCENES] + else: + scenes = [str(h) for h in house_inds] + general_args = super()._get_sampler_args_for_scene_split(scenes=scenes, # scenes=scenes, process_ind=process_ind, **kwargs) @@ -122,7 +126,7 @@ def _get_sampler_args_for_scene_split( "house_inds": list(map(int,general_args['scenes'])), "process_ind": process_ind, "target_object_types": self.OBJECT_TYPES, - "max_tasks": max_tasks if max_tasks is not None else len(house_inds), + "max_tasks": max_tasks if max_tasks is not None else len(general_args['scenes']), "distance_type": self.DISTANCE_TYPE, "resample_same_scene_freq": resample_same_scene_freq, "scene_name": "Procedural" diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index 594ad70..da12c02 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -38,8 +38,8 @@ class ProcTHORObjectNavClipResnet50RGBOnly( OBJECT_TYPES=yaml.safe_load(f) NOISE_LEVEL = 0 - distance_thr = 4.0 # set larger for stretch work - WHICH_AGENT = 'stretch' # 'locobot' 'default' + distance_thr = 1.0 # set larger for stretch work + WHICH_AGENT = 'locobot' # 'locobot' 'default' 'stretch' SENSORS = [ RGBSensorThor( @@ -70,7 +70,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( ), ] - NUM_PROCESSES = 32 + NUM_PROCESSES = 56 TASK_SAMPLER = ProcTHORObjectNavTaskSampler TASK_TYPE = StretchNeckedObjectNavTask @@ -92,6 +92,8 @@ def __init__(self): self.ENV_ARGS['renderInstanceSegmentation'] = False self.ENV_ARGS['renderDepthImage'] = False self.ENV_ARGS['allow_flipping'] = True + # if self.WHICH_AGENT == 'stretch': + # self.ENV_ARGS['gridSize']=0.2 self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin( sensors=self.SENSORS, diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index ab10138..a013e61 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -1,4 +1,5 @@ import datasets +import torch from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_narrow \ import ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide import \ @@ -6,7 +7,7 @@ from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only import \ ProcTHORObjectNavClipResnet50RGBOnly from utils.procthor_utils.procthor_object_nav_task_samplers import RoboThorObjectNavTestTaskSampler -from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask, ObjectNavTask, StretchNeckedObjectNavTask +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask, ObjectNavTask, StretchNeckedObjectNavTask, StretchNeckedObjectNavTaskUpdateOrder from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment @@ -21,9 +22,10 @@ class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): ) TEST_TASK_SAMPLER = RoboThorObjectNavTestTaskSampler - TASK_TYPE = StretchNeckedObjectNavTask + TASK_TYPE = StretchNeckedObjectNavTaskUpdateOrder ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment TEST_ON_VALIDATION = True + TEST_GPU_IDS = list(range(torch.cuda.device_count())) # uncomment for faster testing @classmethod def tag(cls): @@ -52,7 +54,7 @@ def valid_task_sampler_args(self, **kwargs): out = self._get_sampler_args_for_scene_split( houses=self.EVAL_TASKS["validation"], mode="eval", - max_tasks=150, + max_tasks=None, allow_flipping=False, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # ignored **kwargs, @@ -66,7 +68,7 @@ def test_task_sampler_args(self, **kwargs): out = self._get_sampler_args_for_scene_split( houses=self.EVAL_TASKS["test"].shuffle(), mode="eval", - max_tasks=150, + max_tasks=None, allow_flipping=False, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # ignored **kwargs, diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py index 1c38824..6b614a0 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py @@ -1,54 +1,140 @@ +from typing import Any, Dict, List, Optional, Sequence +from typing_extensions import Literal import torch from manipulathor_baselines.stretch_object_nav_baselines.experiments.robothor.test_robothor_procthorstyle import \ ObjectNavRoboTHORTestProcTHORstyle +from allenact.base_abstractions.experiment_config import MachineParams +from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph +from allenact.utils.experiment_utils import evenly_distribute_count_into_bins +from allenact.base_abstractions.sensor import ( + ExpertActionSensor, + SensorSuite, +) class ObjectNavRoboTHORTestProcTHORstyleDistrib( ObjectNavRoboTHORTestProcTHORstyle ): - NUM_PROCESSES = 40 # one them crashed for space? + NUM_PROCESSES = 56 # one them crashed for space? + NUM_TRAIN_PROCESSES = 64 + NUM_VAL_PROCESSES = 8 + NUM_TEST_PROCESSES = 0 + + TRAIN_DEVICES = ( + tuple(range(torch.cuda.device_count())) + if torch.cuda.is_available() + else (torch.device("cpu"),) + ) + VAL_DEVICES = ( + (torch.cuda.device_count() - 1,) + if torch.cuda.is_available() + else (torch.device("cpu"),) + ) + TEST_DEVICES = ( + tuple(range(torch.cuda.device_count())) + if torch.cuda.is_available() + else (torch.device("cpu"),) + ) def __init__( self, distributed_nodes: int = 1, ): super().__init__() self.distributed_nodes = distributed_nodes - self.train_gpu_ids = tuple(range(torch.cuda.device_count())) # should I do this for everyone?, should i add val + # self.train_gpu_ids = tuple(range(torch.cuda.device_count())) # should I do this for everyone?, should i add val + + + # def machine_params(self, mode="train", **kwargs): + # params = super().machine_params(mode, **kwargs) + + # if mode == "train": + # params.devices = params.devices * self.distributed_nodes + # params.nprocesses = params.nprocesses * self.distributed_nodes + # params.sampler_devices = params.sampler_devices * self.distributed_nodes + + # if "machine_id" in kwargs: + # machine_id = kwargs["machine_id"] + # assert ( + # 0 <= machine_id < self.distributed_nodes + # ), f"machine_id {machine_id} out of range [0, {self.distributed_nodes - 1}]" - def machine_params(self, mode="train", **kwargs): - params = super().machine_params(mode, **kwargs) + # local_worker_ids = list( + # range( + # len(self.train_gpu_ids) * machine_id, + # len(self.train_gpu_ids) * (machine_id + 1), + # ) + # ) + # params.set_local_worker_ids(local_worker_ids) + + # # Confirm we're setting up train params nicely: + # print( + # f"devices {params.devices}" + # f"\nnprocesses {params.nprocesses}" + # f"\nsampler_devices {params.sampler_devices}" + # f"\nlocal_worker_ids {params.local_worker_ids}" + # ) + # elif mode == "valid": + # # Use all GPUs at their maximum capacity for training + # # (you may run validation in a separate machine) + # params.nprocesses = (0,) + + # return params + + def machine_params(self, mode: Literal["train", "valid", "test"], **kwargs): + devices: Sequence[torch.device] + nprocesses: int if mode == "train": - params.devices = params.devices * self.distributed_nodes - params.nprocesses = params.nprocesses * self.distributed_nodes - params.sampler_devices = params.sampler_devices * self.distributed_nodes - - if "machine_id" in kwargs: - machine_id = kwargs["machine_id"] - assert ( - 0 <= machine_id < self.distributed_nodes - ), f"machine_id {machine_id} out of range [0, {self.distributed_nodes - 1}]" - - local_worker_ids = list( - range( - len(self.train_gpu_ids) * machine_id, - len(self.train_gpu_ids) * (machine_id + 1), - ) - ) + devices = self.TRAIN_DEVICES * self.distributed_nodes + nprocesses = self.NUM_TRAIN_PROCESSES * self.distributed_nodes + elif mode == "valid": + devices = self.VAL_DEVICES + nprocesses = self.NUM_VAL_PROCESSES + elif mode == "test": + devices = self.TEST_DEVICES + nprocesses = self.NUM_TEST_PROCESSES + + nprocesses = evenly_distribute_count_into_bins( + count=nprocesses, nbins=len(devices) + ) - params.set_local_worker_ids(local_worker_ids) + sensors = [*self.SENSORS] + if mode != "train": + sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)] - # Confirm we're setting up train params nicely: - print( - f"devices {params.devices}" - f"\nnprocesses {params.nprocesses}" - f"\nsampler_devices {params.sampler_devices}" - f"\nlocal_worker_ids {params.local_worker_ids}" + sensor_preprocessor_graph = ( + SensorPreprocessorGraph( + source_observation_spaces=SensorSuite(sensors).observation_spaces, + preprocessors=self.preprocessors(), ) - elif mode == "valid": - # Use all GPUs at their maximum capacity for training - # (you may run validation in a separate machine) - params.nprocesses = (0,) + if mode == "train" + or ( + (isinstance(nprocesses, int) and nprocesses > 0) + or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) + ) + else None + ) + + params = MachineParams( + nprocesses=nprocesses, + devices=devices, + sampler_devices=devices, + sensor_preprocessor_graph=sensor_preprocessor_graph, + ) + + # NOTE: for distributed setup + if mode == "train" and "machine_id" in kwargs: + machine_id = kwargs["machine_id"] + assert ( + 0 <= machine_id < self.distributed_nodes + ), f"machine_id {machine_id} out of range [0, {self.distributed_nodes} - 1]" + local_worker_ids = list( + range( + len(self.TRAIN_DEVICES) * machine_id, + len(self.TRAIN_DEVICES) * (machine_id + 1), + ) + ) + params.set_local_worker_ids(local_worker_ids) - return params \ No newline at end of file + return params \ No newline at end of file diff --git a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py index 14b40d7..c5177e6 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py @@ -12,10 +12,132 @@ from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor +from allenact_plugins.navigation_plugin.objectnav.models import ( + ResnetTensorNavActorCritic, +) + from manipulathor_baselines.stretch_object_nav_baselines.models.clip_objnav_ncamera_model import \ ResnetTensorNavNCameraActorCritic from manipulathor_utils.debugger_util import ForkedPdb +from typing import Any, Dict, List, Optional, cast + +import clip +import gym +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +# from allenact.base_abstractions.preprocessor import Preprocessor +from allenact.utils.misc_utils import prepare_locals_for_super + + +class ClipResNetEmbedder(nn.Module): + def __init__(self, resnet, pool=True): + super().__init__() + self.model = resnet + self.pool = pool + self.eval() + + def forward(self, x): + m = self.model.visual + with torch.no_grad(): + + def stem(x): + for conv, bn in [(m.conv1, m.bn1), (m.conv2, m.bn2), (m.conv3, m.bn3)]: + x = m.relu(bn(conv(x))) + x = m.avgpool(x) + return x + + x = x.type(m.conv1.weight.dtype) + x = stem(x) + x = m.layer1(x) + x = m.layer2(x) + x = m.layer3(x) + x = m.layer4(x) + if self.pool: + x = F.adaptive_avg_pool2d(x, (1, 1)) + x = torch.flatten(x, 1) + return x + + +class ClipResNetPreprocessor(Preprocessor): + """Preprocess RGB or depth image using a ResNet model with CLIP model + weights.""" + + CLIP_RGB_MEANS = (0.48145466, 0.4578275, 0.40821073) + CLIP_RGB_STDS = (0.26862954, 0.26130258, 0.27577711) + + def __init__( + self, + rgb_input_uuid: str, + clip_model_type: str, + pool: bool, + device: Optional[torch.device] = None, + device_ids: Optional[List[torch.device]] = None, + **kwargs: Any, + ): + assert clip_model_type in clip.available_models() + + if clip_model_type == "RN50": + output_shape = (2048, 7, 7) + elif clip_model_type == "RN50x16": + output_shape = (3072, 7, 7) + else: + raise NotImplementedError( + f"Currently `clip_model_type` must be one of 'RN50' or 'RN50x16'" + ) + + if pool: + output_shape = output_shape[:1] + + self.clip_model_type = clip_model_type + + self.pool = pool + + self.device = torch.device("cpu") if device is None else device + self.device_ids = device_ids or cast( + List[torch.device], list(range(torch.cuda.device_count())) + ) + self._resnet: Optional[ClipResNetEmbedder] = None + + low = -np.inf + high = np.inf + shape = output_shape + + input_uuids = [rgb_input_uuid] + assert ( + len(input_uuids) == 1 + ), "resnet preprocessor can only consume one observation type" + + observation_space = gym.spaces.Box(low=low, high=high, shape=shape) + + super().__init__(**prepare_locals_for_super(locals())) + + @property + def resnet(self) -> ClipResNetEmbedder: + import clip + + if self._resnet is None: + self._resnet = ClipResNetEmbedder( + clip.load(self.clip_model_type, device=self.device)[0], pool=self.pool + ).to(self.device) + return self._resnet + + def to(self, device: torch.device) -> "ClipResNetPreprocessor": + self._resnet = self.resnet.to(device) + self.device = device + return self + + def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: + x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) # bhwc -> bchw + # If the input is depth, repeat it across all 3 channels + if x.shape[1] == 1: + x = x.repeat(1, 3, 1, 1) + x = self.resnet(x).float() + return x + + @attr.s(kw_only=True) class ClipResNetPreprocessNCameraGRUActorCriticMixin: @@ -69,13 +191,24 @@ def create_model(self, num_actions: int, visualize: bool, **kwargs) -> nn.Module # display or assert sensor order here? possible source for sneaky failure if they're not the same # as in pretraining when loading weights. - return ResnetTensorNavNCameraActorCritic( + return ResnetTensorNavActorCritic( action_space=gym.spaces.Discrete(num_actions), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid=goal_sensor_uuid, - resnet_preprocessor_uuids=self.resnet_preprocessor_uuids, + rgb_resnet_preprocessor_uuid="rgb_lowres_clip_resnet", + # depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None, hidden_size=512, goal_dims=32, add_prev_actions=True, - visualize=visualize - ) \ No newline at end of file + ) + + # return ResnetTensorNavNCameraActorCritic( + # action_space=gym.spaces.Discrete(num_actions), + # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, + # goal_sensor_uuid=goal_sensor_uuid, + # resnet_preprocessor_uuids=self.resnet_preprocessor_uuids, + # hidden_size=512, + # goal_dims=32, + # add_prev_actions=True, + # visualize=visualize + # ) \ No newline at end of file diff --git a/scripts/run_same_commands_servers.py b/scripts/run_same_commands_servers.py index a9b9032..5f73ac3 100644 --- a/scripts/run_same_commands_servers.py +++ b/scripts/run_same_commands_servers.py @@ -277,11 +277,15 @@ # --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ # --seed 10 --machine_id 0 --extra_tag kiana_objnav_outside_room_wide -c ~/exp_ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVDistrib_prcthor_obj_nav__stage_02__steps_000100123710.pt ' -command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ +# command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ +# --distributed_ip_and_port IP_ADR:6060 \ +# --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ +# --seed 10 --machine_id 0' + +command_aws5 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchNeckedObjectNavTask-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test/2022-06-06_06-58-41/exp_StretchNeckedObjectNavTask-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000020007984.pt ~/; ./manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ --distributed_ip_and_port IP_ADR:6060 \ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ - --seed 10 --machine_id 0' - + --seed 10 --machine_id 0 -c ~/exp_StretchNeckedObjectNavTask-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000020007984.pt ' # command = command_aws5 @@ -320,6 +324,7 @@ def parse_args(): args.command = server['command'] args.command = args.command.replace('IP_ADR', ip_adr) args.command = args.command.replace('NUM_MACHINES', str(len(args.servers))) + args.command = args.command.replace('MAIN_SERVER', ip_adr) return args def main(args): diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index 1fa9228..8b84f8a 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -246,6 +246,7 @@ def sample_target_object_ids(self) -> Tuple[str, List[str]]: for obj_type, count in reversed(self.obj_type_counter.most_common()): instances_of_type = self.target_objects_in_scene.get(obj_type, []) + # NOTE: object type doesn't appear in the scene. if not instances_of_type: continue @@ -429,6 +430,7 @@ def __init__(self, *args, **kwargs): self.last_scene = None def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObjectNavTask]: + # ForkedPdb().set_trace() while True: # NOTE: Stopping condition if self.env is None: diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index 642ced6..fad4fe2 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -414,6 +414,17 @@ class StretchNeckedObjectNavTask(ObjectNavTask): DONE, ) +class StretchNeckedObjectNavTaskUpdateOrder(ObjectNavTask): + # for evaluating weights from Matt + _actions = ( + MOVE_AHEAD, + ROTATE_LEFT, + ROTATE_RIGHT, + DONE, + "LookUp", + "LookDown" + ) + class ExploreWiseObjectNavTask(ObjectNavTask): def __init__(self, **kwargs): super().__init__(**kwargs) From 9a089f1cf0a5e7ef6ea2bc6081305a06180ea196 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Mon, 6 Jun 2022 15:46:21 -0700 Subject: [PATCH 32/53] debugging --- .../procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py | 4 ++-- .../experiments/robothor/test_robothor_procthorstyle.py | 5 ++++- .../robothor/test_robothor_procthorstyle_distrib.py | 4 ++-- .../models/clip_resnet_ncamera_preprocess_mixin.py | 2 +- utils/procthor_utils/procthor_helper.py | 4 ++-- utils/procthor_utils/procthor_object_nav_task_samplers.py | 2 +- utils/stretch_utils/stretch_object_nav_tasks.py | 2 +- 7 files changed, 13 insertions(+), 10 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index da12c02..979d736 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -92,8 +92,8 @@ def __init__(self): self.ENV_ARGS['renderInstanceSegmentation'] = False self.ENV_ARGS['renderDepthImage'] = False self.ENV_ARGS['allow_flipping'] = True - # if self.WHICH_AGENT == 'stretch': - # self.ENV_ARGS['gridSize']=0.2 + if self.WHICH_AGENT == 'locobot': + self.ENV_ARGS['fieldOfView'] = 63.453048374758716 self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin( sensors=self.SENSORS, diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index a013e61..768d0d6 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -25,7 +25,10 @@ class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): TASK_TYPE = StretchNeckedObjectNavTaskUpdateOrder ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment TEST_ON_VALIDATION = True - TEST_GPU_IDS = list(range(torch.cuda.device_count())) # uncomment for faster testing + TEST_GPU_IDS = list(range(torch.cuda.device_count())) # uncomment for vision server testing + + # POTENTIAL_VISUALIZERS = [] + # DISTANCE_TYPE = "geo" @classmethod def tag(cls): diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py index 6b614a0..5f4d3e4 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py @@ -18,8 +18,8 @@ class ObjectNavRoboTHORTestProcTHORstyleDistrib( ): NUM_PROCESSES = 56 # one them crashed for space? NUM_TRAIN_PROCESSES = 64 - NUM_VAL_PROCESSES = 8 - NUM_TEST_PROCESSES = 0 + NUM_VAL_PROCESSES = 10 + NUM_TEST_PROCESSES = 16 TRAIN_DEVICES = ( tuple(range(torch.cuda.device_count())) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py index c5177e6..146987a 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py @@ -9,7 +9,7 @@ from allenact.base_abstractions.sensor import Sensor from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor from allenact.utils.experiment_utils import Builder -from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor +# from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor from allenact_plugins.navigation_plugin.objectnav.models import ( diff --git a/utils/procthor_utils/procthor_helper.py b/utils/procthor_utils/procthor_helper.py index 505c95a..af8d402 100644 --- a/utils/procthor_utils/procthor_helper.py +++ b/utils/procthor_utils/procthor_helper.py @@ -63,7 +63,7 @@ def distance_to_object_id( def path_from_point_to_object_id( point: Dict[str, float], object_id: str, allowed_error: float ) -> Optional[List[Dict[str, float]]]: - event = env.step( + event = env.controller.step( action="GetShortestPath", objectId=object_id, position=point, @@ -109,7 +109,7 @@ def retry_dist(position: Dict[str, float], object_id: str) -> float: else: break if d < 0: - get_logger().warning( + get_logger().debug( f"In house {house_name}, could not find a path from {position} to {object_id}" f" with {allowed_error} error tolerance. Returning a distance of -1." ) diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index 8b84f8a..2f86a8f 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -462,9 +462,9 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject objectIds=target_object_ids, raise_for_failure=True, ) - ep["agentPose"]["horizon"] = 0 # reset for stretch agent if self.env_args['agentMode'] != 'locobot': ep["agentPose"]["standing"] = True + ep["agentPose"]["horizon"] = 0 # reset for stretch agent event = self.env.controller.step(action="TeleportFull", **ep["agentPose"]) if not event: # NOTE: Skip scenes where TeleportFull fails. diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index fad4fe2..f4504f8 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -218,7 +218,7 @@ def _is_goal_in_range(self) -> bool: obj for obj in self.env.last_event.metadata["objects"] if obj["visible"] and obj["objectType"] == self.task_info["object_type"] - and self.dist_to_target_func() < 1.5 # only applies if viz distance is greater than 1.5 + # and self.dist_to_target_func() < 1.5 # only applies if viz distance is greater than 1.5 ) def shaping(self) -> float: From 3029dfde66333fca409e5e4070a291ca820c63ed Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Wed, 8 Jun 2022 15:32:12 -0700 Subject: [PATCH 33/53] working imported weights replication --- .../experiments/bring_object_thor_base.py | 6 +++--- .../experiments/procthor/obj_nav_for_procthor.py | 2 +- .../obj_nav_for_procthor_clip_resnet50_rgb_only.py | 9 ++++++++- .../robothor/test_robothor_procthorstyle.py | 5 ++++- .../test_robothor_procthorstyle_distrib.py | 4 ++-- scripts/run_same_commands_servers.py | 14 +++++++------- .../procthor_object_nav_task_samplers.py | 4 +--- 7 files changed, 26 insertions(+), 18 deletions(-) diff --git a/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py b/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py index b310a12..768b7b9 100644 --- a/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py +++ b/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py @@ -55,9 +55,9 @@ class BringObjectThorBaseConfig(BringObjectBaseConfig, ABC): def __init__(self): super().__init__() assert ( - self.CAMERA_WIDTH == 224 - and self.CAMERA_HEIGHT == 224 - and self.VISIBILITY_DISTANCE == 1 + # self.CAMERA_WIDTH == 224 + # and self.CAMERA_HEIGHT == 224 + self.VISIBILITY_DISTANCE == 1 and self.STEP_SIZE == 0.25 ) self.ENV_ARGS = {**MANIPULATHOR_ENV_ARGS, "renderDepthImage": True} diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py index e3cff44..9674a44 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py @@ -57,7 +57,7 @@ class ProcTHORObjectNavBaseConfig(ObjectNavBaseConfig): if platform.system() == "Darwin": RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN = 1 - RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 1 # TODO apparently this won't work with 100 (why?) + RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 100 # TODO apparently this won't work with 100 (why?) TEST_ON_VALIDATION = False diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index 979d736..f45be24 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -84,7 +84,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( def __init__(self): super().__init__() - self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) + # self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) self.ENV_ARGS['agentMode']=self.WHICH_AGENT self.ENV_ARGS['p_randomize_material'] = 0.8 self.ENV_ARGS['visibilityDistance'] = self.distance_thr @@ -94,6 +94,13 @@ def __init__(self): self.ENV_ARGS['allow_flipping'] = True if self.WHICH_AGENT == 'locobot': self.ENV_ARGS['fieldOfView'] = 63.453048374758716 + self.ENV_ARGS['rotateStepDegrees'] = 30.0 + self.ENV_ARGS["snapToGrid"] = False + self.ENV_ARGS["quality"] = 'Ultra' + self.ENV_ARGS['width'] = 400 + self.ENV_ARGS['height'] = 300 + + self.REWARD_CONFIG['shaping_weight'] = 0.0 self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin( sensors=self.SENSORS, diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index 768d0d6..ba970c2 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -27,8 +27,11 @@ class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): TEST_ON_VALIDATION = True TEST_GPU_IDS = list(range(torch.cuda.device_count())) # uncomment for vision server testing - # POTENTIAL_VISUALIZERS = [] + POTENTIAL_VISUALIZERS = [] # DISTANCE_TYPE = "geo" + CAMERA_WIDTH = 400 + CAMERA_HEIGHT = 300 + # ROTATION_DEGREES = 30 @classmethod def tag(cls): diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py index 5f4d3e4..1959dd3 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py @@ -18,8 +18,8 @@ class ObjectNavRoboTHORTestProcTHORstyleDistrib( ): NUM_PROCESSES = 56 # one them crashed for space? NUM_TRAIN_PROCESSES = 64 - NUM_VAL_PROCESSES = 10 - NUM_TEST_PROCESSES = 16 + NUM_VAL_PROCESSES = 8 + NUM_TEST_PROCESSES = 60 TRAIN_DEVICES = ( tuple(range(torch.cuda.device_count())) diff --git a/scripts/run_same_commands_servers.py b/scripts/run_same_commands_servers.py index 5f73ac3..6d90e11 100644 --- a/scripts/run_same_commands_servers.py +++ b/scripts/run_same_commands_servers.py @@ -277,15 +277,15 @@ # --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ # --seed 10 --machine_id 0 --extra_tag kiana_objnav_outside_room_wide -c ~/exp_ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVDistrib_prcthor_obj_nav__stage_02__steps_000100123710.pt ' -# command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ -# --distributed_ip_and_port IP_ADR:6060 \ -# --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ -# --seed 10 --machine_id 0' - -command_aws5 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchNeckedObjectNavTask-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test/2022-06-06_06-58-41/exp_StretchNeckedObjectNavTask-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000020007984.pt ~/; ./manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ +command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ --distributed_ip_and_port IP_ADR:6060 \ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ - --seed 10 --machine_id 0 -c ~/exp_StretchNeckedObjectNavTask-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000020007984.pt ' + --seed 10 --machine_id 0' + +# command_aws5 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchNeckedObjectNavTask-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test/2022-06-06_06-58-41/exp_StretchNeckedObjectNavTask-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000020007984.pt ~/; ./manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ +# --distributed_ip_and_port IP_ADR:6060 \ +# --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ +# --seed 10 --machine_id 0 -c ~/exp_StretchNeckedObjectNavTask-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000020007984.pt ' # command = command_aws5 diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index 2f86a8f..c62a7ad 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -98,8 +98,6 @@ def __init__( self.max_agent_positions = 6 self.p_greedy_target_object = 0.8 - self.last_house_idx = None - self.reset() @@ -445,7 +443,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject ep = self.houses[epidx] # ForkedPdb().set_trace() - if self.last_house_idx is None or self.last_house_idx != ep["scene"]: + if self.last_scene is None or self.last_scene != ep["scene"]: self.last_scene = ep["scene"] self.env.controller.reset(ep["scene"]) From 28017d5ff8fbc9d06a48f643dda663ffd4402f9f Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Thu, 9 Jun 2022 12:36:47 -0700 Subject: [PATCH 34/53] class inheritance refactor/separation for objectnav, procthor locobot training replication experiments, still some cleanup and refactor to do --- .../experiments/bring_object_thor_base.py | 6 +- .../experiments/obj_nav_base_config.py | 269 +++++++++++++++++- .../procthor/obj_nav_for_procthor.py | 4 +- ...nav_for_procthor_clip_resnet50_rgb_only.py | 18 +- .../robothor/test_robothor_procthorstyle.py | 10 +- .../clip_resnet_ncamera_preprocess_mixin.py | 264 ++++++++--------- scripts/run_same_commands_servers.py | 14 +- scripts/update_tensorboards.py | 2 +- .../procthor_object_nav_task_samplers.py | 2 +- 9 files changed, 422 insertions(+), 167 deletions(-) diff --git a/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py b/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py index 768b7b9..b310a12 100644 --- a/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py +++ b/manipulathor_baselines/bring_object_baselines/experiments/bring_object_thor_base.py @@ -55,9 +55,9 @@ class BringObjectThorBaseConfig(BringObjectBaseConfig, ABC): def __init__(self): super().__init__() assert ( - # self.CAMERA_WIDTH == 224 - # and self.CAMERA_HEIGHT == 224 - self.VISIBILITY_DISTANCE == 1 + self.CAMERA_WIDTH == 224 + and self.CAMERA_HEIGHT == 224 + and self.VISIBILITY_DISTANCE == 1 and self.STEP_SIZE == 0.25 ) self.ENV_ARGS = {**MANIPULATHOR_ENV_ARGS, "renderDepthImage": True} diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py index 6a1bf9b..bcbf6a0 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py @@ -1,35 +1,90 @@ -from typing import Sequence +from abc import ABC +import platform +from math import ceil +from typing import Optional, Sequence, Dict, Any, List, Union +import gym +import numpy as np import torch import torch.optim as optim # from ai2thor.platform import CloudRendering # from allenact.embodiedai.sensors.vision_sensors import DepthSensor -from manipulathor_baselines.bring_object_baselines.experiments.bring_object_thor_base import BringObjectThorBaseConfig +from allenact.base_abstractions.experiment_config import ExperimentConfig +import ai2thor.fifo_server + +# from manipulathor_baselines.bring_object_baselines.experiments.bring_object_thor_base import BringObjectThorBaseConfig from allenact.utils.experiment_utils import ( Builder, PipelineStage, TrainingPipeline, TrainingSettings, + evenly_distribute_count_into_bins ) +from allenact.base_abstractions.experiment_config import MachineParams from allenact.algorithms.onpolicy_sync.losses import PPO from allenact.algorithms.onpolicy_sync.losses.ppo import PPOConfig from allenact.base_abstractions.sensor import Sensor +from allenact.base_abstractions.preprocessor import Preprocessor +from allenact.base_abstractions.preprocessor import SensorPreprocessorGraph +from allenact.base_abstractions.sensor import SensorSuite, ExpertActionSensor from utils.stretch_utils.stretch_visualizer import StretchObjNavImageVisualizer from ithor_arm.ithor_arm_viz import TestMetricLogger from utils.stretch_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler from utils.stretch_utils.stretch_object_nav_tasks import ObjectNavTask -from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID +from utils.stretch_utils.stretch_constants import PROCTHOR_COMMIT_ID, STRETCH_ENV_ARGS, UPDATED_PROCTHOR_COMMIT_ID + +LOCOBOT_ENV_ARGS = dict( + gridSize=0.25, + width=224, + height=224, + visibilityDistance=1.0, + agentMode="locobot", + fieldOfView=63.453048374758716, + rotateStepDegrees=30.0, + snapToGrid=False, + renderInstanceSegmentation=False, + renderDepthImage=False, + commit_id=UPDATED_PROCTHOR_COMMIT_ID +) + +STRETCH_ENV_ARGS = dict( + gridSize=0.25, + width=224, + height=224, + visibilityDistance=1.0, + agentMode='stretch', + fieldOfView=69, + # agentControllerType="mid-level", + # server_class=ai2thor.fifo_server.FifoServer, + snapToGrid=False, + useMassThreshold=True, + massThreshold=10, + autoSimulation=False, + autoSyncTransforms=True, + renderInstanceSegmentation=False, + renderDepthImage=False, + commit_id=UPDATED_PROCTHOR_COMMIT_ID #### TODO not sure if this works for stetch +) -class ObjectNavBaseConfig(BringObjectThorBaseConfig): +class ObjectNavBaseConfig(ExperimentConfig, ABC): """The base config for ProcTHOR ObjectNav experiments.""" - TASK_SAMPLER = AllRoomsObjectNavTaskSampler - TASK_TYPE = ObjectNavTask + ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[int] = None + SENSORS: Optional[Sequence[Sensor]] = None + + # STEP_SIZE = 0.25 + # ROTATION_DEGREES = 30.0 + # VISIBILITY_DISTANCE = 1.0 + # STOCHASTIC = False + + VISUALIZE = False + if platform.system() == "Darwin": + VISUALIZE = True TRAIN_DEVICES = ( tuple(range(torch.cuda.device_count())) @@ -48,13 +103,26 @@ class ObjectNavBaseConfig(BringObjectThorBaseConfig): ) distributed_nodes = 1 + NUM_PROCESSES: Optional[int] = None + TRAIN_GPU_IDS = list(range(torch.cuda.device_count())) + SAMPLER_GPU_IDS = TRAIN_GPU_IDS + VALID_GPU_IDS = [] + TEST_GPU_IDS = [] + + WHICH_AGENT = None + + CAMERA_WIDTH = 224 + CAMERA_HEIGHT = 224 + SCREEN_SIZE = 224 POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] + TASK_SAMPLER = AllRoomsObjectNavTaskSampler + TASK_TYPE = ObjectNavTask DISTANCE_TYPE = "l2" # "geo" # Can be "geo" or "l2" def __init__(self): - super().__init__() + # super().__init__() self.REWARD_CONFIG = { "step_penalty": -0.01, "goal_success_reward": 10.0, @@ -62,7 +130,138 @@ def __init__(self): "failed_stop_reward": 0.0, "shaping_weight": 1.0, "positive_only_reward": False, + } # shaping weight removed for eval in task sampler + if self.WHICH_AGENT == 'locobot': + self.ENV_ARGS = LOCOBOT_ENV_ARGS + elif self.WHICH_AGENT == 'stretch': + self.ENV_ARGS = STRETCH_ENV_ARGS + else: + raise NotImplementedError + + + @classmethod + def preprocessors(cls) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: + return tuple() + + @staticmethod + def _partition_inds(n: int, num_parts: int): + return np.round(np.linspace(0, n, num_parts + 1, endpoint=True)).astype( + np.int32 + ) + + def _get_sampler_args_for_scene_split( + self, + scenes: List[str], + process_ind: int, + total_processes: int, + seeds: Optional[List[int]] = None, + deterministic_cudnn: bool = False, + ) -> Dict[str, Any]: + if total_processes > len(scenes): # oversample some scenes -> bias + if total_processes % len(scenes) != 0: + print( + "Warning: oversampling some of the scenes to feed all processes." + " You can avoid this by setting a number of workers divisible by the number of scenes" + ) + scenes = scenes * int(ceil(total_processes / len(scenes))) + scenes = scenes[: total_processes * (len(scenes) // total_processes)] + else: + if len(scenes) % total_processes != 0: + print( + "Warning: oversampling some of the scenes to feed all processes." + " You can avoid this by setting a number of workers divisor of the number of scenes" + ) + inds = self._partition_inds(len(scenes), total_processes) + + return { + "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], + "env_args": self.ENV_ARGS, + "max_steps": self.MAX_STEPS, + "sensors": self.SENSORS, + "action_space": gym.spaces.Discrete( + len(self.TASK_TYPE.class_action_names()) + ), + "seed": seeds[process_ind] if seeds is not None else None, + "deterministic_cudnn": deterministic_cudnn, + "rewards_config": self.REWARD_CONFIG, } + + # TODO clean these up + def train_task_sampler_args( + self, + process_ind: int, + total_processes: int, + devices: Optional[List[int]] = None, + seeds: Optional[List[int]] = None, + deterministic_cudnn: bool = False, + ) -> Dict[str, Any]: + res = self._get_sampler_args_for_scene_split( + self.TRAIN_SCENES, + process_ind, + total_processes, + seeds=seeds, + deterministic_cudnn=deterministic_cudnn, + ) + res["scene_period"] = "manual" + res["sampler_mode"] = "train" + res["cap_training"] = self.CAP_TRAINING + res["env_args"] = {} + res["env_args"].update(self.ENV_ARGS) + res["env_args"]["x_display"] = ( + ("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None + ) + return res + + def valid_task_sampler_args( + self, + process_ind: int, + total_processes: int, + devices: Optional[List[int]], + seeds: Optional[List[int]] = None, + deterministic_cudnn: bool = False, + ) -> Dict[str, Any]: + res = self._get_sampler_args_for_scene_split( + self.VALID_SCENES, + process_ind, + total_processes, + seeds=seeds, + deterministic_cudnn=deterministic_cudnn, + ) + res["scene_period"] = self.VALID_SAMPLES_IN_SCENE + res["sampler_mode"] = "val" + res["cap_training"] = self.CAP_TRAINING + res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"]) + res["env_args"] = {} + res["env_args"].update(self.ENV_ARGS) + res["env_args"]["x_display"] = ( + ("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None + ) + return res + + def test_task_sampler_args( + self, + process_ind: int, + total_processes: int, + devices: Optional[List[int]], + seeds: Optional[List[int]] = None, + deterministic_cudnn: bool = False, + ) -> Dict[str, Any]: + res = self._get_sampler_args_for_scene_split( + self.TEST_SCENES, + process_ind, + total_processes, + seeds=seeds, + deterministic_cudnn=deterministic_cudnn, + ) + res["scene_period"] = self.TEST_SAMPLES_IN_SCENE + res["sampler_mode"] = "test" + res["env_args"] = {} + res["cap_training"] = self.CAP_TRAINING + res["env_args"].update(self.ENV_ARGS) + res["env_args"]["x_display"] = ( + ("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None + ) + return res def training_pipeline(self, **kwargs): @@ -123,4 +322,60 @@ def training_pipeline(self, **kwargs): ), ], ) + + def machine_params(self, mode="train", **kwargs): + sampler_devices: Sequence[int] = [] + if mode == "train": + workers_per_device = 1 + gpu_ids = ( + [] + if not torch.cuda.is_available() + else self.TRAIN_GPU_IDS * workers_per_device + ) + nprocesses = ( + 1 + if not torch.cuda.is_available() + else evenly_distribute_count_into_bins(self.NUM_PROCESSES, len(gpu_ids)) + ) + sampler_devices = self.SAMPLER_GPU_IDS + elif mode == "valid": + nprocesses = 1 + gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS + elif mode == "test": + # nprocesses = self.NUMBER_OF_TEST_PROCESS if torch.cuda.is_available() else 1 + gpu_ids = [] if not torch.cuda.is_available() else self.TEST_GPU_IDS + nprocesses = ( + 1 + if not torch.cuda.is_available() + else evenly_distribute_count_into_bins(self.NUMBER_OF_TEST_PROCESS, len(gpu_ids)) + ) + + # print('Test Mode', gpu_ids, 'because cuda is',torch.cuda.is_available(), 'number of workers', nprocesses) + else: + raise NotImplementedError("mode must be 'train', 'valid', or 'test'.") + + sensors = [*self.SENSORS] + if mode != "train": + sensors = [s for s in sensors if not isinstance(s, ExpertActionSensor)] + + sensor_preprocessor_graph = ( + SensorPreprocessorGraph( + source_observation_spaces=SensorSuite(sensors).observation_spaces, + preprocessors=self.preprocessors(), + ) + if mode == "train" + or ( + (isinstance(nprocesses, int) and nprocesses > 0) + or (isinstance(nprocesses, Sequence) and sum(nprocesses) > 0) + ) + else None + ) + + # remove + # print('MACHINE PARAM', 'nprocesses',nprocesses,'devices',gpu_ids,'sampler_devices',sampler_devices,'mode = train', mode == "train",'gpu ids', gpu_ids,) # ignored with > 1 gpu_ids) + return MachineParams(nprocesses=nprocesses, + devices=gpu_ids, + sampler_devices=sampler_devices if mode == "train" else gpu_ids, # ignored with > 1 gpu_ids + sensor_preprocessor_graph=sensor_preprocessor_graph) + diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py index 9674a44..30ec6ac 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py @@ -45,7 +45,7 @@ class ProcTHORObjectNavBaseConfig(ObjectNavBaseConfig): DISTANCE_TYPE = "l2" # "geo" # Can be "geo" or "l2" - CAP_TRAINING = None + # CAP_TRAINING = None ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[ int @@ -135,7 +135,7 @@ def _get_sampler_args_for_scene_split( out = {**general_args,**procthor_specific} out["task_type"] = self.TASK_TYPE - out["cap_training"] = self.CAP_TRAINING + # out["cap_training"] = self.CAP_TRAINING out["env_args"]["x_display"] = x_display out["env_args"]['commit_id'] = UPDATED_PROCTHOR_COMMIT_ID#PROCTHOR_COMMIT_ID diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index f45be24..4072fea 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -85,22 +85,22 @@ def __init__(self): super().__init__() # self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) - self.ENV_ARGS['agentMode']=self.WHICH_AGENT + # self.ENV_ARGS['agentMode']=self.WHICH_AGENT self.ENV_ARGS['p_randomize_material'] = 0.8 self.ENV_ARGS['visibilityDistance'] = self.distance_thr self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice self.ENV_ARGS['renderInstanceSegmentation'] = False self.ENV_ARGS['renderDepthImage'] = False self.ENV_ARGS['allow_flipping'] = True - if self.WHICH_AGENT == 'locobot': - self.ENV_ARGS['fieldOfView'] = 63.453048374758716 - self.ENV_ARGS['rotateStepDegrees'] = 30.0 - self.ENV_ARGS["snapToGrid"] = False - self.ENV_ARGS["quality"] = 'Ultra' - self.ENV_ARGS['width'] = 400 - self.ENV_ARGS['height'] = 300 + # if self.WHICH_AGENT == 'locobot': + # self.ENV_ARGS['fieldOfView'] = 63.453048374758716 + # self.ENV_ARGS['rotateStepDegrees'] = 30.0 + # self.ENV_ARGS["snapToGrid"] = False + # self.ENV_ARGS["quality"] = 'Ultra' + # self.ENV_ARGS['width'] = 400 + # self.ENV_ARGS['height'] = 300 - self.REWARD_CONFIG['shaping_weight'] = 0.0 + # self.REWARD_CONFIG['shaping_weight'] = 0.0 self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin( sensors=self.SENSORS, diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index ba970c2..734506c 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -25,12 +25,12 @@ class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): TASK_TYPE = StretchNeckedObjectNavTaskUpdateOrder ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment TEST_ON_VALIDATION = True - TEST_GPU_IDS = list(range(torch.cuda.device_count())) # uncomment for vision server testing + # TEST_GPU_IDS = list(range(torch.cuda.device_count())) # uncomment for vision server testing POTENTIAL_VISUALIZERS = [] # DISTANCE_TYPE = "geo" - CAMERA_WIDTH = 400 - CAMERA_HEIGHT = 300 + # CAMERA_WIDTH = 400 + # CAMERA_HEIGHT = 300 # ROTATION_DEGREES = 30 @classmethod @@ -60,7 +60,7 @@ def valid_task_sampler_args(self, **kwargs): out = self._get_sampler_args_for_scene_split( houses=self.EVAL_TASKS["validation"], mode="eval", - max_tasks=None, + max_tasks=15, allow_flipping=False, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # ignored **kwargs, @@ -74,7 +74,7 @@ def test_task_sampler_args(self, **kwargs): out = self._get_sampler_args_for_scene_split( houses=self.EVAL_TASKS["test"].shuffle(), mode="eval", - max_tasks=None, + max_tasks=15, allow_flipping=False, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # ignored **kwargs, diff --git a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py index 146987a..8e9bd0c 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py @@ -9,7 +9,7 @@ from allenact.base_abstractions.sensor import Sensor from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor from allenact.utils.experiment_utils import Builder -# from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor from allenact_plugins.navigation_plugin.objectnav.models import ( @@ -20,122 +20,122 @@ ResnetTensorNavNCameraActorCritic from manipulathor_utils.debugger_util import ForkedPdb -from typing import Any, Dict, List, Optional, cast - -import clip -import gym -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F -# from allenact.base_abstractions.preprocessor import Preprocessor -from allenact.utils.misc_utils import prepare_locals_for_super - - -class ClipResNetEmbedder(nn.Module): - def __init__(self, resnet, pool=True): - super().__init__() - self.model = resnet - self.pool = pool - self.eval() - - def forward(self, x): - m = self.model.visual - with torch.no_grad(): - - def stem(x): - for conv, bn in [(m.conv1, m.bn1), (m.conv2, m.bn2), (m.conv3, m.bn3)]: - x = m.relu(bn(conv(x))) - x = m.avgpool(x) - return x - - x = x.type(m.conv1.weight.dtype) - x = stem(x) - x = m.layer1(x) - x = m.layer2(x) - x = m.layer3(x) - x = m.layer4(x) - if self.pool: - x = F.adaptive_avg_pool2d(x, (1, 1)) - x = torch.flatten(x, 1) - return x - - -class ClipResNetPreprocessor(Preprocessor): - """Preprocess RGB or depth image using a ResNet model with CLIP model - weights.""" - - CLIP_RGB_MEANS = (0.48145466, 0.4578275, 0.40821073) - CLIP_RGB_STDS = (0.26862954, 0.26130258, 0.27577711) - - def __init__( - self, - rgb_input_uuid: str, - clip_model_type: str, - pool: bool, - device: Optional[torch.device] = None, - device_ids: Optional[List[torch.device]] = None, - **kwargs: Any, - ): - assert clip_model_type in clip.available_models() - - if clip_model_type == "RN50": - output_shape = (2048, 7, 7) - elif clip_model_type == "RN50x16": - output_shape = (3072, 7, 7) - else: - raise NotImplementedError( - f"Currently `clip_model_type` must be one of 'RN50' or 'RN50x16'" - ) - - if pool: - output_shape = output_shape[:1] - - self.clip_model_type = clip_model_type - - self.pool = pool - - self.device = torch.device("cpu") if device is None else device - self.device_ids = device_ids or cast( - List[torch.device], list(range(torch.cuda.device_count())) - ) - self._resnet: Optional[ClipResNetEmbedder] = None - - low = -np.inf - high = np.inf - shape = output_shape - - input_uuids = [rgb_input_uuid] - assert ( - len(input_uuids) == 1 - ), "resnet preprocessor can only consume one observation type" - - observation_space = gym.spaces.Box(low=low, high=high, shape=shape) - - super().__init__(**prepare_locals_for_super(locals())) - - @property - def resnet(self) -> ClipResNetEmbedder: - import clip - - if self._resnet is None: - self._resnet = ClipResNetEmbedder( - clip.load(self.clip_model_type, device=self.device)[0], pool=self.pool - ).to(self.device) - return self._resnet - - def to(self, device: torch.device) -> "ClipResNetPreprocessor": - self._resnet = self.resnet.to(device) - self.device = device - return self - - def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: - x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) # bhwc -> bchw - # If the input is depth, repeat it across all 3 channels - if x.shape[1] == 1: - x = x.repeat(1, 3, 1, 1) - x = self.resnet(x).float() - return x +# from typing import Any, Dict, List, Optional, cast + +# import clip +# import gym +# import numpy as np +# import torch +# import torch.nn as nn +# import torch.nn.functional as F +# # from allenact.base_abstractions.preprocessor import Preprocessor +# from allenact.utils.misc_utils import prepare_locals_for_super + + +# class ClipResNetEmbedder(nn.Module): +# def __init__(self, resnet, pool=True): +# super().__init__() +# self.model = resnet +# self.pool = pool +# self.eval() + +# def forward(self, x): +# m = self.model.visual +# with torch.no_grad(): + +# def stem(x): +# for conv, bn in [(m.conv1, m.bn1), (m.conv2, m.bn2), (m.conv3, m.bn3)]: +# x = m.relu(bn(conv(x))) +# x = m.avgpool(x) +# return x + +# x = x.type(m.conv1.weight.dtype) +# x = stem(x) +# x = m.layer1(x) +# x = m.layer2(x) +# x = m.layer3(x) +# x = m.layer4(x) +# if self.pool: +# x = F.adaptive_avg_pool2d(x, (1, 1)) +# x = torch.flatten(x, 1) +# return x + + +# class ClipResNetPreprocessor(Preprocessor): +# """Preprocess RGB or depth image using a ResNet model with CLIP model +# weights.""" + +# CLIP_RGB_MEANS = (0.48145466, 0.4578275, 0.40821073) +# CLIP_RGB_STDS = (0.26862954, 0.26130258, 0.27577711) + +# def __init__( +# self, +# rgb_input_uuid: str, +# clip_model_type: str, +# pool: bool, +# device: Optional[torch.device] = None, +# device_ids: Optional[List[torch.device]] = None, +# **kwargs: Any, +# ): +# assert clip_model_type in clip.available_models() + +# if clip_model_type == "RN50": +# output_shape = (2048, 7, 7) +# elif clip_model_type == "RN50x16": +# output_shape = (3072, 7, 7) +# else: +# raise NotImplementedError( +# f"Currently `clip_model_type` must be one of 'RN50' or 'RN50x16'" +# ) + +# if pool: +# output_shape = output_shape[:1] + +# self.clip_model_type = clip_model_type + +# self.pool = pool + +# self.device = torch.device("cpu") if device is None else device +# self.device_ids = device_ids or cast( +# List[torch.device], list(range(torch.cuda.device_count())) +# ) +# self._resnet: Optional[ClipResNetEmbedder] = None + +# low = -np.inf +# high = np.inf +# shape = output_shape + +# input_uuids = [rgb_input_uuid] +# assert ( +# len(input_uuids) == 1 +# ), "resnet preprocessor can only consume one observation type" + +# observation_space = gym.spaces.Box(low=low, high=high, shape=shape) + +# super().__init__(**prepare_locals_for_super(locals())) + +# @property +# def resnet(self) -> ClipResNetEmbedder: +# import clip + +# if self._resnet is None: +# self._resnet = ClipResNetEmbedder( +# clip.load(self.clip_model_type, device=self.device)[0], pool=self.pool +# ).to(self.device) +# return self._resnet + +# def to(self, device: torch.device) -> "ClipResNetPreprocessor": +# self._resnet = self.resnet.to(device) +# self.device = device +# return self + +# def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: +# x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) # bhwc -> bchw +# # If the input is depth, repeat it across all 3 channels +# if x.shape[1] == 1: +# x = x.repeat(1, 3, 1, 1) +# x = self.resnet(x).float() +# return x @@ -191,24 +191,24 @@ def create_model(self, num_actions: int, visualize: bool, **kwargs) -> nn.Module # display or assert sensor order here? possible source for sneaky failure if they're not the same # as in pretraining when loading weights. - return ResnetTensorNavActorCritic( - action_space=gym.spaces.Discrete(num_actions), - observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - goal_sensor_uuid=goal_sensor_uuid, - rgb_resnet_preprocessor_uuid="rgb_lowres_clip_resnet", - # depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None, - hidden_size=512, - goal_dims=32, - add_prev_actions=True, - ) - - # return ResnetTensorNavNCameraActorCritic( + # return ResnetTensorNavActorCritic( # action_space=gym.spaces.Discrete(num_actions), # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, # goal_sensor_uuid=goal_sensor_uuid, - # resnet_preprocessor_uuids=self.resnet_preprocessor_uuids, + # rgb_resnet_preprocessor_uuid="rgb_lowres_clip_resnet", + # # depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None, # hidden_size=512, # goal_dims=32, # add_prev_actions=True, - # visualize=visualize - # ) \ No newline at end of file + # ) + + return ResnetTensorNavNCameraActorCritic( + action_space=gym.spaces.Discrete(num_actions), + observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, + goal_sensor_uuid=goal_sensor_uuid, + resnet_preprocessor_uuids=self.resnet_preprocessor_uuids, + hidden_size=512, + goal_dims=32, + add_prev_actions=True, + visualize=visualize + ) \ No newline at end of file diff --git a/scripts/run_same_commands_servers.py b/scripts/run_same_commands_servers.py index 6d90e11..6e6916b 100644 --- a/scripts/run_same_commands_servers.py +++ b/scripts/run_same_commands_servers.py @@ -277,15 +277,15 @@ # --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ # --seed 10 --machine_id 0 --extra_tag kiana_objnav_outside_room_wide -c ~/exp_ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVDistrib_prcthor_obj_nav__stage_02__steps_000100123710.pt ' -command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ ---distributed_ip_and_port IP_ADR:6060 \ - --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ - --seed 10 --machine_id 0' - -# command_aws5 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchNeckedObjectNavTask-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test/2022-06-06_06-58-41/exp_StretchNeckedObjectNavTask-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000020007984.pt ~/; ./manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ +# command_aws5 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ # --distributed_ip_and_port IP_ADR:6060 \ # --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ -# --seed 10 --machine_id 0 -c ~/exp_StretchNeckedObjectNavTask-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000020007984.pt ' +# --seed 10 --machine_id 0' + +command_aws5 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test/2022-06-08_21-59-41/exp_StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000014017504.pt ~/; ./manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ +--distributed_ip_and_port IP_ADR:6060 \ + --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ + --seed 10 --machine_id 0 -c ~/exp_StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000014017504.pt ' # command = command_aws5 diff --git a/scripts/update_tensorboards.py b/scripts/update_tensorboards.py index cf8d390..2edbddb 100644 --- a/scripts/update_tensorboards.py +++ b/scripts/update_tensorboards.py @@ -3,7 +3,7 @@ import time import psutil -servers = [('aws1', 6006), ('aws5', 6007)] +servers = [('aws1', 6006), ('aws5', 6007), ('aws6', 6008), ('aws7', 6009)] while(True): diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index c62a7ad..484d420 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -74,7 +74,7 @@ def __init__( self.visualizers = visualizers self.sampler_mode = kwargs["sampler_mode"] - self.cap_training = kwargs["cap_training"] + # self.cap_training = kwargs["cap_training"] self.episode_index = 0 self.houses = houses From e8199b03116ab1a2b43e42b5ef73c55049fbd84a Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Fri, 10 Jun 2022 16:38:04 -0700 Subject: [PATCH 35/53] more refactor, finish replication and start single-camera training variants on stretch --- .../ithor/obj_nav_2camera_ithor_wide.py | 2 + .../experiments/obj_nav_base_config.py | 65 +++++---- .../procthor/obj_nav_for_procthor.py | 11 +- ...nav_for_procthor_clip_resnet50_rgb_only.py | 17 +-- .../robothor/test_robothor_procthorstyle.py | 10 +- .../clip_resnet_ncamera_preprocess_mixin.py | 129 ------------------ .../procthor_object_nav_task_samplers.py | 11 +- .../all_rooms_object_nav_task_sampler.py | 1 - .../stretch_utils/stretch_object_nav_tasks.py | 2 +- 9 files changed, 54 insertions(+), 194 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py index 91ab5a6..d3e9e3c 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py @@ -54,6 +54,8 @@ class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( ALL_SCENES = TRAIN_SCENES + TEST_SCENES + VALID_SCENES + WHICH_AGENT = 'stretch' + # OBJECT_TYPES = tuple(sorted(TRAIN_OBJECTS)) with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: OBJECT_TYPES=yaml.safe_load(f) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py index bcbf6a0..fcef8db 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py @@ -8,12 +8,10 @@ import torch import torch.optim as optim -# from ai2thor.platform import CloudRendering -# from allenact.embodiedai.sensors.vision_sensors import DepthSensor from allenact.base_abstractions.experiment_config import ExperimentConfig +from allenact.base_abstractions.task import TaskSampler import ai2thor.fifo_server -# from manipulathor_baselines.bring_object_baselines.experiments.bring_object_thor_base import BringObjectThorBaseConfig from allenact.utils.experiment_utils import ( Builder, PipelineStage, @@ -55,11 +53,11 @@ gridSize=0.25, width=224, height=224, - visibilityDistance=1.0, + visibilityDistance=3.0, agentMode='stretch', fieldOfView=69, - # agentControllerType="mid-level", - # server_class=ai2thor.fifo_server.FifoServer, + agentControllerType="mid-level", + server_class=ai2thor.fifo_server.FifoServer, snapToGrid=False, useMassThreshold=True, massThreshold=10, @@ -109,6 +107,13 @@ class ObjectNavBaseConfig(ExperimentConfig, ABC): VALID_GPU_IDS = [] TEST_GPU_IDS = [] + TRAIN_SCENES: str = None + VALID_SCENES: str = None + TEST_SCENES: str = None + VALID_SAMPLES_IN_SCENE = 1 + TEST_SAMPLES_IN_SCENE = 1 + OBJECT_TYPES: Optional[Sequence[str]] = None + WHICH_AGENT = None CAMERA_WIDTH = 224 @@ -120,6 +125,7 @@ class ObjectNavBaseConfig(ExperimentConfig, ABC): TASK_SAMPLER = AllRoomsObjectNavTaskSampler TASK_TYPE = ObjectNavTask DISTANCE_TYPE = "l2" # "geo" # Can be "geo" or "l2" + MAX_STEPS = 500 def __init__(self): # super().__init__() @@ -149,6 +155,23 @@ def _partition_inds(n: int, num_parts: int): np.int32 ) + @classmethod + def make_sampler_fn(cls, **kwargs) -> TaskSampler: + from datetime import datetime + + now = datetime.now() + + exp_name_w_time = cls.__name__ + "_" + now.strftime("%m_%d_%Y_%H_%M_%S_%f") + if cls.VISUALIZE: + visualizers = [ + viz(exp_name=exp_name_w_time) for viz in cls.POTENTIAL_VISUALIZERS + ] + kwargs["visualizers"] = visualizers + kwargs["objects"] = cls.OBJECT_TYPES + kwargs["task_type"] = cls.TASK_TYPE + kwargs["exp_name"] = exp_name_w_time + return cls.TASK_SAMPLER(**kwargs) + def _get_sampler_args_for_scene_split( self, scenes: List[str], @@ -156,6 +179,7 @@ def _get_sampler_args_for_scene_split( total_processes: int, seeds: Optional[List[int]] = None, deterministic_cudnn: bool = False, + devices: Optional[List[int]] = None ) -> Dict[str, Any]: if total_processes > len(scenes): # oversample some scenes -> bias if total_processes % len(scenes) != 0: @@ -173,7 +197,7 @@ def _get_sampler_args_for_scene_split( ) inds = self._partition_inds(len(scenes), total_processes) - return { + out = { "scenes": scenes[inds[process_ind] : inds[process_ind + 1]], "env_args": self.ENV_ARGS, "max_steps": self.MAX_STEPS, @@ -185,8 +209,11 @@ def _get_sampler_args_for_scene_split( "deterministic_cudnn": deterministic_cudnn, "rewards_config": self.REWARD_CONFIG, } + + x_display = (("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None) + out['env_args']["x_display"] = x_display + return out - # TODO clean these up def train_task_sampler_args( self, process_ind: int, @@ -201,15 +228,11 @@ def train_task_sampler_args( total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, + devices=devices ) res["scene_period"] = "manual" res["sampler_mode"] = "train" - res["cap_training"] = self.CAP_TRAINING - res["env_args"] = {} - res["env_args"].update(self.ENV_ARGS) - res["env_args"]["x_display"] = ( - ("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None - ) + return res def valid_task_sampler_args( @@ -226,16 +249,11 @@ def valid_task_sampler_args( total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, + devices=devices ) res["scene_period"] = self.VALID_SAMPLES_IN_SCENE res["sampler_mode"] = "val" - res["cap_training"] = self.CAP_TRAINING res["max_tasks"] = self.VALID_SAMPLES_IN_SCENE * len(res["scenes"]) - res["env_args"] = {} - res["env_args"].update(self.ENV_ARGS) - res["env_args"]["x_display"] = ( - ("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None - ) return res def test_task_sampler_args( @@ -252,15 +270,10 @@ def test_task_sampler_args( total_processes, seeds=seeds, deterministic_cudnn=deterministic_cudnn, + devices=devices ) res["scene_period"] = self.TEST_SAMPLES_IN_SCENE res["sampler_mode"] = "test" - res["env_args"] = {} - res["cap_training"] = self.CAP_TRAINING - res["env_args"].update(self.ENV_ARGS) - res["env_args"]["x_display"] = ( - ("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None - ) return res diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py index 30ec6ac..303dcb3 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py @@ -5,10 +5,7 @@ import datasets import numpy as np -from math import ceil -import torch -from allenact.utils.system import get_logger from allenact.base_abstractions.preprocessor import ( Preprocessor, SensorPreprocessorGraph, @@ -18,7 +15,6 @@ Union, ) from allenact.base_abstractions.task import TaskSampler -from allenact.embodiedai.sensors.vision_sensors import DepthSensor from allenact.utils.experiment_utils import ( Builder, ) @@ -45,8 +41,6 @@ class ProcTHORObjectNavBaseConfig(ObjectNavBaseConfig): DISTANCE_TYPE = "l2" # "geo" # Can be "geo" or "l2" - # CAP_TRAINING = None - ADVANCE_SCENE_ROLLOUT_PERIOD: Optional[ int ] = 20 # default config/main.yaml @@ -65,7 +59,6 @@ class ProcTHORObjectNavBaseConfig(ObjectNavBaseConfig): "allenai/houses", use_auth_token=True, ignore_verifications=True ) - MAX_STEPS = 500 NUM_TRAIN_HOUSES = None # none means all @classmethod @@ -116,6 +109,7 @@ def _get_sampler_args_for_scene_split( general_args = super()._get_sampler_args_for_scene_split(scenes=scenes, # scenes=scenes, process_ind=process_ind, + devices=devices, **kwargs) x_display = (("0.%d" % devices[process_ind % len(devices)]) if len(devices) > 0 else None) @@ -129,18 +123,15 @@ def _get_sampler_args_for_scene_split( "max_tasks": max_tasks if max_tasks is not None else len(general_args['scenes']), "distance_type": self.DISTANCE_TYPE, "resample_same_scene_freq": resample_same_scene_freq, - "scene_name": "Procedural" } del general_args['scenes'] out = {**general_args,**procthor_specific} out["task_type"] = self.TASK_TYPE - # out["cap_training"] = self.CAP_TRAINING out["env_args"]["x_display"] = x_display out["env_args"]['commit_id'] = UPDATED_PROCTHOR_COMMIT_ID#PROCTHOR_COMMIT_ID out["env_args"]['scene'] = 'Procedural' - out["env_args"]["branch"] = "nanna" if allow_flipping is not None: out["env_args"]['allow_flipping'] = allow_flipping diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index 4072fea..2324104 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -38,8 +38,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( OBJECT_TYPES=yaml.safe_load(f) NOISE_LEVEL = 0 - distance_thr = 1.0 # set larger for stretch work - WHICH_AGENT = 'locobot' # 'locobot' 'default' 'stretch' + WHICH_AGENT = 'stretch' # 'locobot' 'default' 'stretch' SENSORS = [ RGBSensorThor( @@ -84,23 +83,11 @@ class ProcTHORObjectNavClipResnet50RGBOnly( def __init__(self): super().__init__() - # self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) - # self.ENV_ARGS['agentMode']=self.WHICH_AGENT self.ENV_ARGS['p_randomize_material'] = 0.8 - self.ENV_ARGS['visibilityDistance'] = self.distance_thr self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice self.ENV_ARGS['renderInstanceSegmentation'] = False self.ENV_ARGS['renderDepthImage'] = False - self.ENV_ARGS['allow_flipping'] = True - # if self.WHICH_AGENT == 'locobot': - # self.ENV_ARGS['fieldOfView'] = 63.453048374758716 - # self.ENV_ARGS['rotateStepDegrees'] = 30.0 - # self.ENV_ARGS["snapToGrid"] = False - # self.ENV_ARGS["quality"] = 'Ultra' - # self.ENV_ARGS['width'] = 400 - # self.ENV_ARGS['height'] = 300 - - # self.REWARD_CONFIG['shaping_weight'] = 0.0 + self.ENV_ARGS['allow_flipping'] = False self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin( sensors=self.SENSORS, diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index 734506c..f74af44 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -15,23 +15,19 @@ from manipulathor_utils.debugger_util import ForkedPdb - class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): EVAL_TASKS = datasets.load_dataset( f"allenai/robothor-objectnav-eval", use_auth_token=True ) TEST_TASK_SAMPLER = RoboThorObjectNavTestTaskSampler - TASK_TYPE = StretchNeckedObjectNavTaskUpdateOrder + TASK_TYPE = StretchObjectNavTask ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment TEST_ON_VALIDATION = True # TEST_GPU_IDS = list(range(torch.cuda.device_count())) # uncomment for vision server testing - POTENTIAL_VISUALIZERS = [] - # DISTANCE_TYPE = "geo" - # CAMERA_WIDTH = 400 - # CAMERA_HEIGHT = 300 - # ROTATION_DEGREES = 30 + # POTENTIAL_VISUALIZERS = [] + WHICH_AGENT = 'stretch' @classmethod def tag(cls): diff --git a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py index 8e9bd0c..3aa772e 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py @@ -20,124 +20,6 @@ ResnetTensorNavNCameraActorCritic from manipulathor_utils.debugger_util import ForkedPdb -# from typing import Any, Dict, List, Optional, cast - -# import clip -# import gym -# import numpy as np -# import torch -# import torch.nn as nn -# import torch.nn.functional as F -# # from allenact.base_abstractions.preprocessor import Preprocessor -# from allenact.utils.misc_utils import prepare_locals_for_super - - -# class ClipResNetEmbedder(nn.Module): -# def __init__(self, resnet, pool=True): -# super().__init__() -# self.model = resnet -# self.pool = pool -# self.eval() - -# def forward(self, x): -# m = self.model.visual -# with torch.no_grad(): - -# def stem(x): -# for conv, bn in [(m.conv1, m.bn1), (m.conv2, m.bn2), (m.conv3, m.bn3)]: -# x = m.relu(bn(conv(x))) -# x = m.avgpool(x) -# return x - -# x = x.type(m.conv1.weight.dtype) -# x = stem(x) -# x = m.layer1(x) -# x = m.layer2(x) -# x = m.layer3(x) -# x = m.layer4(x) -# if self.pool: -# x = F.adaptive_avg_pool2d(x, (1, 1)) -# x = torch.flatten(x, 1) -# return x - - -# class ClipResNetPreprocessor(Preprocessor): -# """Preprocess RGB or depth image using a ResNet model with CLIP model -# weights.""" - -# CLIP_RGB_MEANS = (0.48145466, 0.4578275, 0.40821073) -# CLIP_RGB_STDS = (0.26862954, 0.26130258, 0.27577711) - -# def __init__( -# self, -# rgb_input_uuid: str, -# clip_model_type: str, -# pool: bool, -# device: Optional[torch.device] = None, -# device_ids: Optional[List[torch.device]] = None, -# **kwargs: Any, -# ): -# assert clip_model_type in clip.available_models() - -# if clip_model_type == "RN50": -# output_shape = (2048, 7, 7) -# elif clip_model_type == "RN50x16": -# output_shape = (3072, 7, 7) -# else: -# raise NotImplementedError( -# f"Currently `clip_model_type` must be one of 'RN50' or 'RN50x16'" -# ) - -# if pool: -# output_shape = output_shape[:1] - -# self.clip_model_type = clip_model_type - -# self.pool = pool - -# self.device = torch.device("cpu") if device is None else device -# self.device_ids = device_ids or cast( -# List[torch.device], list(range(torch.cuda.device_count())) -# ) -# self._resnet: Optional[ClipResNetEmbedder] = None - -# low = -np.inf -# high = np.inf -# shape = output_shape - -# input_uuids = [rgb_input_uuid] -# assert ( -# len(input_uuids) == 1 -# ), "resnet preprocessor can only consume one observation type" - -# observation_space = gym.spaces.Box(low=low, high=high, shape=shape) - -# super().__init__(**prepare_locals_for_super(locals())) - -# @property -# def resnet(self) -> ClipResNetEmbedder: -# import clip - -# if self._resnet is None: -# self._resnet = ClipResNetEmbedder( -# clip.load(self.clip_model_type, device=self.device)[0], pool=self.pool -# ).to(self.device) -# return self._resnet - -# def to(self, device: torch.device) -> "ClipResNetPreprocessor": -# self._resnet = self.resnet.to(device) -# self.device = device -# return self - -# def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: -# x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) # bhwc -> bchw -# # If the input is depth, repeat it across all 3 channels -# if x.shape[1] == 1: -# x = x.repeat(1, 3, 1, 1) -# x = self.resnet(x).float() -# return x - - @attr.s(kw_only=True) class ClipResNetPreprocessNCameraGRUActorCriticMixin: @@ -191,17 +73,6 @@ def create_model(self, num_actions: int, visualize: bool, **kwargs) -> nn.Module # display or assert sensor order here? possible source for sneaky failure if they're not the same # as in pretraining when loading weights. - # return ResnetTensorNavActorCritic( - # action_space=gym.spaces.Discrete(num_actions), - # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - # goal_sensor_uuid=goal_sensor_uuid, - # rgb_resnet_preprocessor_uuid="rgb_lowres_clip_resnet", - # # depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None, - # hidden_size=512, - # goal_dims=32, - # add_prev_actions=True, - # ) - return ResnetTensorNavNCameraActorCritic( action_space=gym.spaces.Discrete(num_actions), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index 484d420..5d8e37c 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -74,7 +74,8 @@ def __init__( self.visualizers = visualizers self.sampler_mode = kwargs["sampler_mode"] - # self.cap_training = kwargs["cap_training"] + if self.sampler_mode is not "train": + self.rewards_config['shaping_weight'] = 0.0 self.episode_index = 0 self.houses = houses @@ -91,7 +92,7 @@ def __init__( self.reachable_positions_map: Dict[int, Vector3] = dict() self.objects_in_scene_map: Dict[str, List[str]] = dict() self.visible_objects_cache = dict() - self.max_tasks = max_tasks #if max_tasks is not None else np.Inf # stop when I tell you to stop + self.max_tasks = max_tasks self.reset_tasks = self.max_tasks self.max_vis_points = 6 @@ -210,7 +211,7 @@ def is_object_visible(self, object_id: str) -> bool: if ( event.metadata["lastActionSuccess"] and hit["objectId"] == object_id - and hit["hitDistance"] < self.env_args['visibilityDistance'] + and hit["hitDistance"] < 1.5#self.env_args['visibilityDistance'] ): self.visible_objects_cache[self.house_index][object_id] = True return True @@ -380,7 +381,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject starting_pose = AgentPose( position=random.choice(self.reachable_positions), rotation=Vector3(x=0, y=random.choice(self.valid_rotations), z=0), - horizon=0, + horizon=30, ) if self.env_args['agentMode'] != 'locobot': starting_pose['standing']=True @@ -462,7 +463,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject ) if self.env_args['agentMode'] != 'locobot': ep["agentPose"]["standing"] = True - ep["agentPose"]["horizon"] = 0 # reset for stretch agent + ep["agentPose"]["horizon"] = 30 # reset for stretch agent event = self.env.controller.step(action="TeleportFull", **ep["agentPose"]) if not event: # NOTE: Skip scenes where TeleportFull fails. diff --git a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py index e7c3cac..096493c 100644 --- a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py +++ b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py @@ -142,7 +142,6 @@ def __init__( self.reset() self.visualizers = visualizers self.sampler_mode = kwargs["sampler_mode"] - self.cap_training = kwargs["cap_training"] possible_initial_locations = ( "datasets/apnd-dataset/valid_agent_initial_locations.json" diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index f4504f8..fad4fe2 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -218,7 +218,7 @@ def _is_goal_in_range(self) -> bool: obj for obj in self.env.last_event.metadata["objects"] if obj["visible"] and obj["objectType"] == self.task_info["object_type"] - # and self.dist_to_target_func() < 1.5 # only applies if viz distance is greater than 1.5 + and self.dist_to_target_func() < 1.5 # only applies if viz distance is greater than 1.5 ) def shaping(self) -> float: From 47c077b17e064fadae885687a5a3d7b8ba19b3ef Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Fri, 10 Jun 2022 17:20:11 -0700 Subject: [PATCH 36/53] start 2-camera training with fixed horizon 15, fix shaping weight --- .../experiments/procthor/obj_nav_2camera_procthor_wide.py | 5 ++++- .../experiments/robothor/test_robothor_procthorstyle.py | 3 ++- utils/procthor_utils/procthor_object_nav_task_samplers.py | 6 +++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py index e9eb252..17e54db 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py @@ -22,6 +22,9 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( """An Object Navigation experiment configuration in iThor with RGB input.""" + WHICH_AGENT = 'stretch' # this only works for stretch + #TODO add check for allow flipping false + with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: OBJECT_TYPES=yaml.safe_load(f) @@ -76,6 +79,6 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( @classmethod def tag(cls): - return cls.__name__ + return cls.TASK_TYPE.__name__ + '-RGB-2Camera-ProcTHOR' + '-' + cls.WHICH_AGENT diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index f74af44..380e17f 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -15,7 +15,8 @@ from manipulathor_utils.debugger_util import ForkedPdb -class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): +# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): +class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): EVAL_TASKS = datasets.load_dataset( f"allenai/robothor-objectnav-eval", use_auth_token=True ) diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index 5d8e37c..bf8a44c 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -74,7 +74,7 @@ def __init__( self.visualizers = visualizers self.sampler_mode = kwargs["sampler_mode"] - if self.sampler_mode is not "train": + if self.sampler_mode != "train": self.rewards_config['shaping_weight'] = 0.0 self.episode_index = 0 @@ -381,7 +381,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject starting_pose = AgentPose( position=random.choice(self.reachable_positions), rotation=Vector3(x=0, y=random.choice(self.valid_rotations), z=0), - horizon=30, + horizon=15, ) if self.env_args['agentMode'] != 'locobot': starting_pose['standing']=True @@ -463,7 +463,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject ) if self.env_args['agentMode'] != 'locobot': ep["agentPose"]["standing"] = True - ep["agentPose"]["horizon"] = 30 # reset for stretch agent + ep["agentPose"]["horizon"] = 15 # reset for stretch agent event = self.env.controller.step(action="TeleportFull", **ep["agentPose"]) if not event: # NOTE: Skip scenes where TeleportFull fails. From 6ba352f9473430c11b46ecd7bd535e9a3abb2208 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Mon, 13 Jun 2022 14:39:06 -0700 Subject: [PATCH 37/53] locobot training replication settings reset --- .../experiments/obj_nav_base_config.py | 2 +- .../obj_nav_for_procthor_clip_resnet50_rgb_only.py | 6 +++--- .../robothor/test_robothor_procthorstyle.py | 10 +++++----- .../robothor/test_robothor_procthorstyle_distrib.py | 2 +- .../procthor_object_nav_task_samplers.py | 5 +++-- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py index fcef8db..9c00c6b 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py @@ -78,7 +78,7 @@ class ObjectNavBaseConfig(ExperimentConfig, ABC): # STEP_SIZE = 0.25 # ROTATION_DEGREES = 30.0 # VISIBILITY_DISTANCE = 1.0 - # STOCHASTIC = False + STOCHASTIC = False VISUALIZE = False if platform.system() == "Darwin": diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index 2324104..1c7a206 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -18,7 +18,7 @@ from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler -from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask, ObjectNavTask, StretchNeckedObjectNavTask +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask, ObjectNavTask, StretchNeckedObjectNavTask, StretchNeckedObjectNavTaskUpdateOrder from utils.stretch_utils.stretch_constants import STRETCH_ENV_ARGS from manipulathor_utils.debugger_util import ForkedPdb @@ -38,7 +38,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( OBJECT_TYPES=yaml.safe_load(f) NOISE_LEVEL = 0 - WHICH_AGENT = 'stretch' # 'locobot' 'default' 'stretch' + WHICH_AGENT = 'locobot' # 'locobot' 'default' 'stretch' SENSORS = [ RGBSensorThor( @@ -72,7 +72,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( NUM_PROCESSES = 56 TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = StretchNeckedObjectNavTask + TASK_TYPE = StretchNeckedObjectNavTaskUpdateOrder ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment # NUM_TRAIN_HOUSES = 500 diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index 380e17f..3480730 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -15,20 +15,20 @@ from manipulathor_utils.debugger_util import ForkedPdb -# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): -class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): +class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): +# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): EVAL_TASKS = datasets.load_dataset( f"allenai/robothor-objectnav-eval", use_auth_token=True ) TEST_TASK_SAMPLER = RoboThorObjectNavTestTaskSampler - TASK_TYPE = StretchObjectNavTask - ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment + # TASK_TYPE = StretchObjectNavTask + # ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment TEST_ON_VALIDATION = True # TEST_GPU_IDS = list(range(torch.cuda.device_count())) # uncomment for vision server testing # POTENTIAL_VISUALIZERS = [] - WHICH_AGENT = 'stretch' + # WHICH_AGENT = 'stretch' @classmethod def tag(cls): diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py index 1959dd3..b2f1da3 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py @@ -17,7 +17,7 @@ class ObjectNavRoboTHORTestProcTHORstyleDistrib( ObjectNavRoboTHORTestProcTHORstyle ): NUM_PROCESSES = 56 # one them crashed for space? - NUM_TRAIN_PROCESSES = 64 + NUM_TRAIN_PROCESSES = 56 NUM_VAL_PROCESSES = 8 NUM_TEST_PROCESSES = 60 diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index bf8a44c..c0254f5 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -211,7 +211,7 @@ def is_object_visible(self, object_id: str) -> bool: if ( event.metadata["lastActionSuccess"] and hit["objectId"] == object_id - and hit["hitDistance"] < 1.5#self.env_args['visibilityDistance'] + and hit["hitDistance"] < np.min([self.env_args['visibilityDistance'],1.5]) ): self.visible_objects_cache[self.house_index][object_id] = True return True @@ -381,10 +381,11 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject starting_pose = AgentPose( position=random.choice(self.reachable_positions), rotation=Vector3(x=0, y=random.choice(self.valid_rotations), z=0), - horizon=15, + horizon=0, ) if self.env_args['agentMode'] != 'locobot': starting_pose['standing']=True + starting_pose['horizon'] = 15 event = self.env.controller.step(action="TeleportFull", **starting_pose) if attempts > 10: get_logger().error(f"Teleport failed {attempts-1} times in house {self.house_index} - something may be wrong") From 9057818262315735756099351bcd41d8e7050948 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Mon, 13 Jun 2022 16:37:11 -0700 Subject: [PATCH 38/53] 2-camera zero horizon training run checkpoint --- .../experiments/obj_nav_base_config.py | 6 ++++-- .../procthor/obj_nav_2camera_procthor_wide.py | 13 ++++++++----- .../experiments/procthor/obj_nav_for_procthor.py | 2 +- .../obj_nav_for_procthor_clip_resnet50_rgb_only.py | 6 ++---- .../robothor/test_robothor_procthorstyle.py | 9 ++------- .../procthor_object_nav_task_samplers.py | 4 ++-- 6 files changed, 19 insertions(+), 21 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py index 9c00c6b..ae30a70 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py @@ -65,7 +65,8 @@ autoSyncTransforms=True, renderInstanceSegmentation=False, renderDepthImage=False, - commit_id=UPDATED_PROCTHOR_COMMIT_ID #### TODO not sure if this works for stetch + commit_id=UPDATED_PROCTHOR_COMMIT_ID, #### TODO not sure if this works for stretch + horizon_init=0 ) @@ -78,7 +79,7 @@ class ObjectNavBaseConfig(ExperimentConfig, ABC): # STEP_SIZE = 0.25 # ROTATION_DEGREES = 30.0 # VISIBILITY_DISTANCE = 1.0 - STOCHASTIC = False + # STOCHASTIC = False VISUALIZE = False if platform.system() == "Darwin": @@ -102,6 +103,7 @@ class ObjectNavBaseConfig(ExperimentConfig, ABC): distributed_nodes = 1 NUM_PROCESSES: Optional[int] = None + NUMBER_OF_TEST_PROCESS: Optional[int] = None TRAIN_GPU_IDS = list(range(torch.cuda.device_count())) SAMPLER_GPU_IDS = TRAIN_GPU_IDS VALID_GPU_IDS = [] diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py index 17e54db..7bc4a9c 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py @@ -2,8 +2,6 @@ import yaml from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor - -from utils.procthor_utils.procthor_helper import PROCTHOR_INVALID_SCENES from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchKinectBigFov from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor @@ -22,9 +20,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( """An Object Navigation experiment configuration in iThor with RGB input.""" - WHICH_AGENT = 'stretch' # this only works for stretch - #TODO add check for allow flipping false - + with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: OBJECT_TYPES=yaml.safe_load(f) @@ -77,6 +73,13 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( TASK_TYPE = StretchObjectNavTask + def __init__(self): + super().__init__() + assert ( + self.WHICH_AGENT == 'stretch' # this only works for stretch + and self.ENV_ARGS['allow_flipping'] == False # not with 2-camera + ) + @classmethod def tag(cls): return cls.TASK_TYPE.__name__ + '-RGB-2Camera-ProcTHOR' + '-' + cls.WHICH_AGENT diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py index 303dcb3..d64a846 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py @@ -51,7 +51,7 @@ class ProcTHORObjectNavBaseConfig(ObjectNavBaseConfig): if platform.system() == "Darwin": RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN = 1 - RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 100 # TODO apparently this won't work with 100 (why?) + RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 100 TEST_ON_VALIDATION = False diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index 1c7a206..9ecd9d2 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -1,7 +1,6 @@ import platform import random from typing import Sequence, Union -from typing_extensions import final import gym import numpy as np @@ -14,7 +13,6 @@ from utils.stretch_utils.stretch_ithor_arm_environment import StretchManipulaTHOREnvironment -from utils.stretch_utils.stretch_ithor_arm_environment_minimal import MinimalStretchManipulaTHOREnvironment from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor import ProcTHORObjectNavBaseConfig from utils.procthor_utils.procthor_object_nav_task_samplers import ProcTHORObjectNavTaskSampler @@ -38,7 +36,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( OBJECT_TYPES=yaml.safe_load(f) NOISE_LEVEL = 0 - WHICH_AGENT = 'locobot' # 'locobot' 'default' 'stretch' + WHICH_AGENT = 'stretch' # 'locobot' 'default' 'stretch' SENSORS = [ RGBSensorThor( @@ -72,7 +70,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( NUM_PROCESSES = 56 TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = StretchNeckedObjectNavTaskUpdateOrder + TASK_TYPE = StretchObjectNavTask ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment # NUM_TRAIN_HOUSES = 500 diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index 3480730..f0ec040 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -15,21 +15,16 @@ from manipulathor_utils.debugger_util import ForkedPdb -class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): -# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): +# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): +class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): EVAL_TASKS = datasets.load_dataset( f"allenai/robothor-objectnav-eval", use_auth_token=True ) TEST_TASK_SAMPLER = RoboThorObjectNavTestTaskSampler - # TASK_TYPE = StretchObjectNavTask - # ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment TEST_ON_VALIDATION = True # TEST_GPU_IDS = list(range(torch.cuda.device_count())) # uncomment for vision server testing - # POTENTIAL_VISUALIZERS = [] - # WHICH_AGENT = 'stretch' - @classmethod def tag(cls): return super().tag() + "-RoboTHOR-Test" diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index c0254f5..7c79dcf 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -385,7 +385,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject ) if self.env_args['agentMode'] != 'locobot': starting_pose['standing']=True - starting_pose['horizon'] = 15 + starting_pose['horizon'] = self.env_args['horizon_init'] event = self.env.controller.step(action="TeleportFull", **starting_pose) if attempts > 10: get_logger().error(f"Teleport failed {attempts-1} times in house {self.house_index} - something may be wrong") @@ -464,7 +464,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject ) if self.env_args['agentMode'] != 'locobot': ep["agentPose"]["standing"] = True - ep["agentPose"]["horizon"] = 15 # reset for stretch agent + ep["agentPose"]["horizon"] = self.env_args['horizon_init'] # reset for stretch agent event = self.env.controller.step(action="TeleportFull", **ep["agentPose"]) if not event: # NOTE: Skip scenes where TeleportFull fails. From c6ee736488a81e6579d8cd5360cf0dba73c1525a Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 14 Jun 2022 16:20:20 -0700 Subject: [PATCH 39/53] verified functional locobot procthor training with neck - goes with vision-server6 weights --- ...nav_for_procthor_clip_resnet50_rgb_only.py | 6 +- .../robothor/test_robothor_procthorstyle.py | 4 +- .../test_robothor_procthorstyle_distrib.py | 2 +- .../clip_resnet_ncamera_preprocess_mixin.py | 139 +++++++++++++++++- .../stretch_utils/stretch_object_nav_tasks.py | 2 +- 5 files changed, 141 insertions(+), 12 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index 9ecd9d2..2057a8d 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -36,7 +36,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( OBJECT_TYPES=yaml.safe_load(f) NOISE_LEVEL = 0 - WHICH_AGENT = 'stretch' # 'locobot' 'default' 'stretch' + WHICH_AGENT = 'locobot' # 'locobot' 'default' 'stretch' SENSORS = [ RGBSensorThor( @@ -70,7 +70,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( NUM_PROCESSES = 56 TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = StretchObjectNavTask + TASK_TYPE = StretchNeckedObjectNavTaskUpdateOrder ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment # NUM_TRAIN_HOUSES = 500 @@ -85,7 +85,7 @@ def __init__(self): self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice self.ENV_ARGS['renderInstanceSegmentation'] = False self.ENV_ARGS['renderDepthImage'] = False - self.ENV_ARGS['allow_flipping'] = False + self.ENV_ARGS['allow_flipping'] = True self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin( sensors=self.SENSORS, diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index f0ec040..32de45f 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -15,8 +15,8 @@ from manipulathor_utils.debugger_util import ForkedPdb -# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): -class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): +class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): +# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): EVAL_TASKS = datasets.load_dataset( f"allenai/robothor-objectnav-eval", use_auth_token=True ) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py index b2f1da3..1959dd3 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py @@ -17,7 +17,7 @@ class ObjectNavRoboTHORTestProcTHORstyleDistrib( ObjectNavRoboTHORTestProcTHORstyle ): NUM_PROCESSES = 56 # one them crashed for space? - NUM_TRAIN_PROCESSES = 56 + NUM_TRAIN_PROCESSES = 64 NUM_VAL_PROCESSES = 8 NUM_TEST_PROCESSES = 60 diff --git a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py index 3aa772e..146987a 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py @@ -9,7 +9,7 @@ from allenact.base_abstractions.sensor import Sensor from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor from allenact.utils.experiment_utils import Builder -from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor +# from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor from allenact_plugins.navigation_plugin.objectnav.models import ( @@ -20,6 +20,124 @@ ResnetTensorNavNCameraActorCritic from manipulathor_utils.debugger_util import ForkedPdb +from typing import Any, Dict, List, Optional, cast + +import clip +import gym +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +# from allenact.base_abstractions.preprocessor import Preprocessor +from allenact.utils.misc_utils import prepare_locals_for_super + + +class ClipResNetEmbedder(nn.Module): + def __init__(self, resnet, pool=True): + super().__init__() + self.model = resnet + self.pool = pool + self.eval() + + def forward(self, x): + m = self.model.visual + with torch.no_grad(): + + def stem(x): + for conv, bn in [(m.conv1, m.bn1), (m.conv2, m.bn2), (m.conv3, m.bn3)]: + x = m.relu(bn(conv(x))) + x = m.avgpool(x) + return x + + x = x.type(m.conv1.weight.dtype) + x = stem(x) + x = m.layer1(x) + x = m.layer2(x) + x = m.layer3(x) + x = m.layer4(x) + if self.pool: + x = F.adaptive_avg_pool2d(x, (1, 1)) + x = torch.flatten(x, 1) + return x + + +class ClipResNetPreprocessor(Preprocessor): + """Preprocess RGB or depth image using a ResNet model with CLIP model + weights.""" + + CLIP_RGB_MEANS = (0.48145466, 0.4578275, 0.40821073) + CLIP_RGB_STDS = (0.26862954, 0.26130258, 0.27577711) + + def __init__( + self, + rgb_input_uuid: str, + clip_model_type: str, + pool: bool, + device: Optional[torch.device] = None, + device_ids: Optional[List[torch.device]] = None, + **kwargs: Any, + ): + assert clip_model_type in clip.available_models() + + if clip_model_type == "RN50": + output_shape = (2048, 7, 7) + elif clip_model_type == "RN50x16": + output_shape = (3072, 7, 7) + else: + raise NotImplementedError( + f"Currently `clip_model_type` must be one of 'RN50' or 'RN50x16'" + ) + + if pool: + output_shape = output_shape[:1] + + self.clip_model_type = clip_model_type + + self.pool = pool + + self.device = torch.device("cpu") if device is None else device + self.device_ids = device_ids or cast( + List[torch.device], list(range(torch.cuda.device_count())) + ) + self._resnet: Optional[ClipResNetEmbedder] = None + + low = -np.inf + high = np.inf + shape = output_shape + + input_uuids = [rgb_input_uuid] + assert ( + len(input_uuids) == 1 + ), "resnet preprocessor can only consume one observation type" + + observation_space = gym.spaces.Box(low=low, high=high, shape=shape) + + super().__init__(**prepare_locals_for_super(locals())) + + @property + def resnet(self) -> ClipResNetEmbedder: + import clip + + if self._resnet is None: + self._resnet = ClipResNetEmbedder( + clip.load(self.clip_model_type, device=self.device)[0], pool=self.pool + ).to(self.device) + return self._resnet + + def to(self, device: torch.device) -> "ClipResNetPreprocessor": + self._resnet = self.resnet.to(device) + self.device = device + return self + + def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: + x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) # bhwc -> bchw + # If the input is depth, repeat it across all 3 channels + if x.shape[1] == 1: + x = x.repeat(1, 3, 1, 1) + x = self.resnet(x).float() + return x + + @attr.s(kw_only=True) class ClipResNetPreprocessNCameraGRUActorCriticMixin: @@ -73,13 +191,24 @@ def create_model(self, num_actions: int, visualize: bool, **kwargs) -> nn.Module # display or assert sensor order here? possible source for sneaky failure if they're not the same # as in pretraining when loading weights. - return ResnetTensorNavNCameraActorCritic( + return ResnetTensorNavActorCritic( action_space=gym.spaces.Discrete(num_actions), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid=goal_sensor_uuid, - resnet_preprocessor_uuids=self.resnet_preprocessor_uuids, + rgb_resnet_preprocessor_uuid="rgb_lowres_clip_resnet", + # depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None, hidden_size=512, goal_dims=32, add_prev_actions=True, - visualize=visualize - ) \ No newline at end of file + ) + + # return ResnetTensorNavNCameraActorCritic( + # action_space=gym.spaces.Discrete(num_actions), + # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, + # goal_sensor_uuid=goal_sensor_uuid, + # resnet_preprocessor_uuids=self.resnet_preprocessor_uuids, + # hidden_size=512, + # goal_dims=32, + # add_prev_actions=True, + # visualize=visualize + # ) \ No newline at end of file diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index fad4fe2..f4504f8 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -218,7 +218,7 @@ def _is_goal_in_range(self) -> bool: obj for obj in self.env.last_event.metadata["objects"] if obj["visible"] and obj["objectType"] == self.task_info["object_type"] - and self.dist_to_target_func() < 1.5 # only applies if viz distance is greater than 1.5 + # and self.dist_to_target_func() < 1.5 # only applies if viz distance is greater than 1.5 ) def shaping(self) -> float: From 9efb3f1a71ac2cf330ce248412761dfd90018f7d Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 14 Jun 2022 17:23:24 -0700 Subject: [PATCH 40/53] stretch 2-camera functional checkpoint --- .../experiments/obj_nav_base_config.py | 2 +- ...nav_for_procthor_clip_resnet50_rgb_only.py | 4 +- .../robothor/test_robothor_procthorstyle.py | 4 +- .../clip_resnet_ncamera_preprocess_mixin.py | 139 +----------------- .../stretch_utils/stretch_object_nav_tasks.py | 2 +- 5 files changed, 11 insertions(+), 140 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py index ae30a70..6745685 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py @@ -66,7 +66,7 @@ renderInstanceSegmentation=False, renderDepthImage=False, commit_id=UPDATED_PROCTHOR_COMMIT_ID, #### TODO not sure if this works for stretch - horizon_init=0 + horizon_init=15 ) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index 2057a8d..6a869ff 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -36,7 +36,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( OBJECT_TYPES=yaml.safe_load(f) NOISE_LEVEL = 0 - WHICH_AGENT = 'locobot' # 'locobot' 'default' 'stretch' + WHICH_AGENT = 'stretch' # 'locobot' 'default' 'stretch' SENSORS = [ RGBSensorThor( @@ -85,7 +85,7 @@ def __init__(self): self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice self.ENV_ARGS['renderInstanceSegmentation'] = False self.ENV_ARGS['renderDepthImage'] = False - self.ENV_ARGS['allow_flipping'] = True + self.ENV_ARGS['allow_flipping'] = False self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin( sensors=self.SENSORS, diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index 32de45f..f0ec040 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -15,8 +15,8 @@ from manipulathor_utils.debugger_util import ForkedPdb -class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): -# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): +# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): +class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): EVAL_TASKS = datasets.load_dataset( f"allenai/robothor-objectnav-eval", use_auth_token=True ) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py index 146987a..3aa772e 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_resnet_ncamera_preprocess_mixin.py @@ -9,7 +9,7 @@ from allenact.base_abstractions.sensor import Sensor from allenact.embodiedai.sensors.vision_sensors import RGBSensor, DepthSensor from allenact.utils.experiment_utils import Builder -# from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor +from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor from allenact_plugins.navigation_plugin.objectnav.models import ( @@ -20,124 +20,6 @@ ResnetTensorNavNCameraActorCritic from manipulathor_utils.debugger_util import ForkedPdb -from typing import Any, Dict, List, Optional, cast - -import clip -import gym -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F -# from allenact.base_abstractions.preprocessor import Preprocessor -from allenact.utils.misc_utils import prepare_locals_for_super - - -class ClipResNetEmbedder(nn.Module): - def __init__(self, resnet, pool=True): - super().__init__() - self.model = resnet - self.pool = pool - self.eval() - - def forward(self, x): - m = self.model.visual - with torch.no_grad(): - - def stem(x): - for conv, bn in [(m.conv1, m.bn1), (m.conv2, m.bn2), (m.conv3, m.bn3)]: - x = m.relu(bn(conv(x))) - x = m.avgpool(x) - return x - - x = x.type(m.conv1.weight.dtype) - x = stem(x) - x = m.layer1(x) - x = m.layer2(x) - x = m.layer3(x) - x = m.layer4(x) - if self.pool: - x = F.adaptive_avg_pool2d(x, (1, 1)) - x = torch.flatten(x, 1) - return x - - -class ClipResNetPreprocessor(Preprocessor): - """Preprocess RGB or depth image using a ResNet model with CLIP model - weights.""" - - CLIP_RGB_MEANS = (0.48145466, 0.4578275, 0.40821073) - CLIP_RGB_STDS = (0.26862954, 0.26130258, 0.27577711) - - def __init__( - self, - rgb_input_uuid: str, - clip_model_type: str, - pool: bool, - device: Optional[torch.device] = None, - device_ids: Optional[List[torch.device]] = None, - **kwargs: Any, - ): - assert clip_model_type in clip.available_models() - - if clip_model_type == "RN50": - output_shape = (2048, 7, 7) - elif clip_model_type == "RN50x16": - output_shape = (3072, 7, 7) - else: - raise NotImplementedError( - f"Currently `clip_model_type` must be one of 'RN50' or 'RN50x16'" - ) - - if pool: - output_shape = output_shape[:1] - - self.clip_model_type = clip_model_type - - self.pool = pool - - self.device = torch.device("cpu") if device is None else device - self.device_ids = device_ids or cast( - List[torch.device], list(range(torch.cuda.device_count())) - ) - self._resnet: Optional[ClipResNetEmbedder] = None - - low = -np.inf - high = np.inf - shape = output_shape - - input_uuids = [rgb_input_uuid] - assert ( - len(input_uuids) == 1 - ), "resnet preprocessor can only consume one observation type" - - observation_space = gym.spaces.Box(low=low, high=high, shape=shape) - - super().__init__(**prepare_locals_for_super(locals())) - - @property - def resnet(self) -> ClipResNetEmbedder: - import clip - - if self._resnet is None: - self._resnet = ClipResNetEmbedder( - clip.load(self.clip_model_type, device=self.device)[0], pool=self.pool - ).to(self.device) - return self._resnet - - def to(self, device: torch.device) -> "ClipResNetPreprocessor": - self._resnet = self.resnet.to(device) - self.device = device - return self - - def process(self, obs: Dict[str, Any], *args: Any, **kwargs: Any) -> Any: - x = obs[self.input_uuids[0]].to(self.device).permute(0, 3, 1, 2) # bhwc -> bchw - # If the input is depth, repeat it across all 3 channels - if x.shape[1] == 1: - x = x.repeat(1, 3, 1, 1) - x = self.resnet(x).float() - return x - - @attr.s(kw_only=True) class ClipResNetPreprocessNCameraGRUActorCriticMixin: @@ -191,24 +73,13 @@ def create_model(self, num_actions: int, visualize: bool, **kwargs) -> nn.Module # display or assert sensor order here? possible source for sneaky failure if they're not the same # as in pretraining when loading weights. - return ResnetTensorNavActorCritic( + return ResnetTensorNavNCameraActorCritic( action_space=gym.spaces.Discrete(num_actions), observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, goal_sensor_uuid=goal_sensor_uuid, - rgb_resnet_preprocessor_uuid="rgb_lowres_clip_resnet", - # depth_resnet_preprocessor_uuid="depth_clip_resnet" if has_depth else None, + resnet_preprocessor_uuids=self.resnet_preprocessor_uuids, hidden_size=512, goal_dims=32, add_prev_actions=True, - ) - - # return ResnetTensorNavNCameraActorCritic( - # action_space=gym.spaces.Discrete(num_actions), - # observation_space=kwargs["sensor_preprocessor_graph"].observation_spaces, - # goal_sensor_uuid=goal_sensor_uuid, - # resnet_preprocessor_uuids=self.resnet_preprocessor_uuids, - # hidden_size=512, - # goal_dims=32, - # add_prev_actions=True, - # visualize=visualize - # ) \ No newline at end of file + visualize=visualize + ) \ No newline at end of file diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index f4504f8..fad4fe2 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -218,7 +218,7 @@ def _is_goal_in_range(self) -> bool: obj for obj in self.env.last_event.metadata["objects"] if obj["visible"] and obj["objectType"] == self.task_info["object_type"] - # and self.dist_to_target_func() < 1.5 # only applies if viz distance is greater than 1.5 + and self.dist_to_target_func() < 1.5 # only applies if viz distance is greater than 1.5 ) def shaping(self) -> float: From ef57e7e2be34300468a5d626c903b7611f85204e Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Wed, 15 Jun 2022 16:40:06 -0700 Subject: [PATCH 41/53] update ithor style testing and minor refactors --- .../ithor/obj_nav_2camera_ithor_wide.py | 23 ++++--------------- .../procthor/obj_nav_2camera_procthor_wide.py | 2 -- .../procthor/obj_nav_for_procthor.py | 4 ++-- ...nav_for_procthor_clip_resnet50_rgb_only.py | 2 +- .../robothor/test_robothor_procthorstyle.py | 7 +++--- .../all_rooms_object_nav_task_sampler.py | 8 ++++--- 6 files changed, 17 insertions(+), 29 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py index d3e9e3c..e0a7d5a 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py @@ -1,13 +1,9 @@ import platform -import random from typing import Sequence, Union -from typing_extensions import final -import gym -import numpy as np from torch import nn +import torch import yaml -import copy from allenact_plugins.ithor_plugin.ithor_sensors import RGBSensorThor from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel, RGBSensorStretchKinectBigFov @@ -19,10 +15,8 @@ from manipulathor_baselines.stretch_object_nav_baselines.experiments.obj_nav_base_config import ObjectNavBaseConfig from utils.stretch_utils.all_rooms_object_nav_task_sampler import AllRoomsObjectNavTaskSampler from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask -from utils.stretch_utils.stretch_constants import STRETCH_MANIPULATHOR_COMMIT_ID, STRETCH_ENV_ARGS from manipulathor_utils.debugger_util import ForkedPdb -# from manipulathor_baselines.procthor_baselines.models.clip_preprocessors import ClipResNetPreprocessor from manipulathor_baselines.stretch_object_nav_baselines.models.clip_resnet_ncamera_preprocess_mixin import \ ClipResNetPreprocessNCameraGRUActorCriticMixin from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor @@ -60,10 +54,6 @@ class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: OBJECT_TYPES=yaml.safe_load(f) - NOISE_LEVEL = 0 - distance_thr = 1.0 # match procthor config - mean = np.array([0.48145466, 0.4578275, 0.40821073]) - stdev = np.array([0.26862954, 0.26130258, 0.27577711]) SENSORS = [ RGBSensorThor( height=ObjectNavBaseConfig.SCREEN_SIZE, @@ -86,7 +76,6 @@ class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( ), ] - MAX_STEPS = 500 if platform.system() == "Darwin": MAX_STEPS = 100 SENSORS += [ #TODO FIX ORDER HERE @@ -113,18 +102,16 @@ class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment POTENTIAL_VISUALIZERS = [StretchObjNavImageVisualizer, TestMetricLogger] + TEST_GPU_IDS = list(range(torch.cuda.device_count())) + NUMBER_OF_TEST_PROCESS = 8 + NUM_PROCESSES = 40 CLIP_MODEL_TYPE = "RN50" def __init__(self): super().__init__() - - self.ENV_ARGS = copy.deepcopy(STRETCH_ENV_ARGS) - self.ENV_ARGS['visibilityDistance'] = self.distance_thr self.ENV_ARGS['environment_type'] = self.ENVIRONMENT_TYPE #TODO this is nto the best choice - self.ENV_ARGS['renderInstanceSegmentation'] = False - self.ENV_ARGS['commit_id'] = STRETCH_MANIPULATHOR_COMMIT_ID self.preprocessing_and_model = ClipResNetPreprocessNCameraGRUActorCriticMixin( sensors=self.SENSORS, @@ -145,6 +132,6 @@ def create_model(self, **kwargs) -> nn.Module: @classmethod def tag(cls): - return cls.__name__ + return cls.TASK_TYPE.__name__ + '-RGB-2Camera-iTHOR' + '-' + cls.WHICH_AGENT diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py index 7bc4a9c..a110406 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py @@ -71,8 +71,6 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( ), ] - TASK_TYPE = StretchObjectNavTask - def __init__(self): super().__init__() assert ( diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py index d64a846..4c18670 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor.py @@ -51,13 +51,13 @@ class ProcTHORObjectNavBaseConfig(ObjectNavBaseConfig): if platform.system() == "Darwin": RESAMPLE_SAME_SCENE_FREQ_IN_TRAIN = 1 - RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 100 + RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE = 1 # change from 100 TEST_ON_VALIDATION = False HOUSE_DATASET = datasets.load_dataset( "allenai/houses", use_auth_token=True, ignore_verifications=True - ) + ) # this loads even for inherited classes where is unused - not necessarily a problem NUM_TRAIN_HOUSES = None # none means all diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py index 6a869ff..9ecd9d2 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_for_procthor_clip_resnet50_rgb_only.py @@ -70,7 +70,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly( NUM_PROCESSES = 56 TASK_SAMPLER = ProcTHORObjectNavTaskSampler - TASK_TYPE = StretchNeckedObjectNavTaskUpdateOrder + TASK_TYPE = StretchObjectNavTask ENVIRONMENT_TYPE = StretchManipulaTHOREnvironment # NUM_TRAIN_HOUSES = 500 diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index f0ec040..dd4cda9 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -16,7 +16,8 @@ # class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): -class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): +# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): +class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV): EVAL_TASKS = datasets.load_dataset( f"allenai/robothor-objectnav-eval", use_auth_token=True ) @@ -52,7 +53,7 @@ def valid_task_sampler_args(self, **kwargs): out = self._get_sampler_args_for_scene_split( houses=self.EVAL_TASKS["validation"], mode="eval", - max_tasks=15, + max_tasks=None, allow_flipping=False, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # ignored **kwargs, @@ -66,7 +67,7 @@ def test_task_sampler_args(self, **kwargs): out = self._get_sampler_args_for_scene_split( houses=self.EVAL_TASKS["test"].shuffle(), mode="eval", - max_tasks=15, + max_tasks=None, allow_flipping=False, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # ignored **kwargs, diff --git a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py index 096493c..0e8c0c9 100644 --- a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py +++ b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py @@ -294,7 +294,7 @@ def get_full_object_info(env, object_type): init_object = get_full_object_info(self.env, target_obj_type) - agent_state["cameraHorizon"] = 0 # 0 for stretch, 20 for other manipulathor agent + agent_state["cameraHorizon"] = self.env_args['horizon_init'] # 0 for stretch, 20 for other manipulathor agent event = this_controller.step( dict( action="TeleportFull", @@ -397,7 +397,7 @@ def get_source_target_indices(self): "name": "agent", "position": dict(x=agent_pose['x'], y=agent_pose['y'], z=agent_pose['z']), "rotation": dict(x=0, y=agent_pose['rotation'], z=0), - "cameraHorizon": agent_pose['horizon'], + "cameraHorizon": self.env_args['horizon_init'], #agent_pose['horizon'], "isStanding": True, } @@ -414,7 +414,7 @@ def get_source_target_indices(self): "name": "agent", "position": dict(x=agent_pose['x'], y=agent_pose['y'], z=agent_pose['z']), "rotation": dict(x=0, y=agent_pose['rotation'], z=0), - "cameraHorizon": agent_pose['horizon'], + "cameraHorizon": self.env_args['horizon_init'], #agent_pose['horizon'], "isStanding": True, } @@ -476,6 +476,8 @@ def next_task( if self.env is None: self.env = self._create_environment() self.env.reset(scene_name='RealRobothor') + + # TODO: set horizon to horizon_init skip_object = True while skip_object: From 64d7411c48db8c0292233bded884860c9e6b2313 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Fri, 17 Jun 2022 10:51:33 -0700 Subject: [PATCH 42/53] add kinect camera segmentation success task, set success distance in task sampler, minor qol updates to real episodes --- .../obj_nav_2camera_procthor_narrow.py | 23 ++++++------ .../procthor/obj_nav_2camera_procthor_wide.py | 6 +++- .../robothor/real_robothor_objectnav.py | 2 +- .../robothor/test_robothor_procthorstyle.py | 8 ++--- .../test_robothor_procthorstyle_distrib.py | 5 ++- .../procthor_object_nav_task_samplers.py | 7 +++- .../all_rooms_object_nav_task_sampler.py | 9 ++--- utils/stretch_utils/real_stretch_sensors.py | 2 +- .../stretch_utils/stretch_object_nav_tasks.py | 36 +++++++++++++------ 9 files changed, 63 insertions(+), 35 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py index 4a92490..ed96bf4 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_narrow.py @@ -4,32 +4,33 @@ from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor -from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only \ - import ProcTHORObjectNavClipResnet50RGBOnly +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide \ + import ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor class ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV( - ProcTHORObjectNavClipResnet50RGBOnly + ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV ): """An Object Navigation experiment configuration in iThor with RGB input.""" with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: OBJECT_TYPES=yaml.safe_load(f) + SENSORS = [ RGBSensorStretchIntel( - height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, - width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + height=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, use_resnet_normalization=True, mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres", ), RGBSensorStretchKinect( - height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, - width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + height=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, use_resnet_normalization=True, mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, @@ -43,16 +44,16 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV( if platform.system() == "Darwin": SENSORS += [ RGBSensorStretchKinect( - height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, - width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + height=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, use_resnet_normalization=True, mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, uuid="rgb_lowres_arm_only_viz", ), RGBSensorStretchIntel( - height=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, - width=ProcTHORObjectNavClipResnet50RGBOnly.SCREEN_SIZE, + height=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + width=ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, use_resnet_normalization=True, mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py index a110406..a24cefc 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py @@ -7,9 +7,10 @@ from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only \ import ProcTHORObjectNavClipResnet50RGBOnly -from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTaskSegmentationSuccess from manipulathor_utils.debugger_util import ForkedPdb + from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor @@ -23,6 +24,8 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: OBJECT_TYPES=yaml.safe_load(f) + + TASK_TYPE = StretchObjectNavTaskSegmentationSuccess SENSORS = [ RGBSensorThor( @@ -73,6 +76,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( def __init__(self): super().__init__() + self.ENV_ARGS['renderInstanceSegmentation'] = True assert ( self.WHICH_AGENT == 'stretch' # this only works for stretch and self.ENV_ARGS['allow_flipping'] == False # not with 2-camera diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py index b1e9211..fc05824 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py @@ -67,7 +67,7 @@ class RealStretchObjectNav( TASK_TYPE = RealStretchObjectNavTask #RealStretchExploreWiseRewardTask ENVIRONMENT_TYPE = StretchRealEnvironment # account for the super init - NUM_PROCESSES = 20 + NUM_PROCESSES = 1 TRAIN_SCENES = ['RealRobothor'] TEST_SCENES = ['RealRobothor'] diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index dd4cda9..69fe7be 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -16,8 +16,8 @@ # class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): -# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): -class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV): +class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): +# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV): EVAL_TASKS = datasets.load_dataset( f"allenai/robothor-objectnav-eval", use_auth_token=True ) @@ -53,7 +53,7 @@ def valid_task_sampler_args(self, **kwargs): out = self._get_sampler_args_for_scene_split( houses=self.EVAL_TASKS["validation"], mode="eval", - max_tasks=None, + max_tasks=5, allow_flipping=False, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # ignored **kwargs, @@ -67,7 +67,7 @@ def test_task_sampler_args(self, **kwargs): out = self._get_sampler_args_for_scene_split( houses=self.EVAL_TASKS["test"].shuffle(), mode="eval", - max_tasks=None, + max_tasks=5, allow_flipping=False, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # ignored **kwargs, diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py index 1959dd3..603c9f9 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py @@ -18,7 +18,7 @@ class ObjectNavRoboTHORTestProcTHORstyleDistrib( ): NUM_PROCESSES = 56 # one them crashed for space? NUM_TRAIN_PROCESSES = 64 - NUM_VAL_PROCESSES = 8 + NUM_VAL_PROCESSES = 4 NUM_TEST_PROCESSES = 60 TRAIN_DEVICES = ( @@ -42,6 +42,9 @@ def __init__( ): super().__init__() self.distributed_nodes = distributed_nodes + # self.ENV_ARGS['continuousMode']=True + # self.ENV_ARGS["returnToStart"]= False # do either of these actually do something + # self.train_gpu_ids = tuple(range(torch.cuda.device_count())) # should I do this for everyone?, should i add val diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index 7c79dcf..733fd0d 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -98,6 +98,9 @@ def __init__( self.max_vis_points = 6 self.max_agent_positions = 6 self.p_greedy_target_object = 0.8 + self.min_raycast_distance = 1.5 + + self.success_distance = 1.0 self.reset() @@ -211,7 +214,7 @@ def is_object_visible(self, object_id: str) -> bool: if ( event.metadata["lastActionSuccess"] and hit["objectId"] == object_id - and hit["hitDistance"] < np.min([self.env_args['visibilityDistance'],1.5]) + and hit["hitDistance"] < np.min([self.env_args['visibilityDistance'],self.min_raycast_distance]) ): self.visible_objects_cache[self.house_index][object_id] = True return True @@ -413,6 +416,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject "object_type": target_object_type, "starting_pose": starting_pose, "mirrored": self.env_args['allow_flipping'] and random.random() > 0.5, + 'success_distance': self.success_distance }, ) return self._last_sampled_task @@ -494,6 +498,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject "starting_pose": ep["agentPose"], "mirrored": False, "id": f"{ep['scene']}__global{epidx}__{ep['targetObjectType']}", + 'success_distance': self.success_distance, **difficulty, }, ) diff --git a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py index 0e8c0c9..40e2451 100644 --- a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py +++ b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py @@ -143,6 +143,8 @@ def __init__( self.visualizers = visualizers self.sampler_mode = kwargs["sampler_mode"] + self.success_distance = 1.0 + possible_initial_locations = ( "datasets/apnd-dataset/valid_agent_initial_locations.json" ) @@ -328,7 +330,8 @@ def get_full_object_info(env, object_type): 'mode': self.env_args['agentMode'], 'house_name': scene_name, 'scene_name': scene_name, - 'mirrored': False #self.env_args['allow_flipping'] and random.random() > 0.5, # not including for non-procthor + 'mirrored': False, #self.env_args['allow_flipping'] and random.random() > 0.5, # not including for non-procthor + 'success_distance': self.success_distance } @@ -476,13 +479,11 @@ def next_task( if self.env is None: self.env = self._create_environment() self.env.reset(scene_name='RealRobothor') - - # TODO: set horizon to horizon_init skip_object = True while skip_object: target_object = random.choice(self.possible_real_objects) - print('I am now seeking a ', target_object['object_type'], '. Continue by setting skip_object=False') + print('I am now seeking a', target_object['object_type'], '. Accept by setting skip_object=False') ForkedPdb().set_trace() task_info = { diff --git a/utils/stretch_utils/real_stretch_sensors.py b/utils/stretch_utils/real_stretch_sensors.py index 25339f6..911dc12 100644 --- a/utils/stretch_utils/real_stretch_sensors.py +++ b/utils/stretch_utils/real_stretch_sensors.py @@ -87,7 +87,7 @@ def frame_from_env(self, env: IThorEnvironment, task: Optional[Task]) -> np.ndar return (depth) def normalize_real_intel_image(image,final_size=224): - image = cv2.rotate(image, cv2.cv2.ROTATE_90_CLOCKWISE) + image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) # if len(image.shape) == 3: # image = image[:,:,::-1] # image = cv2.flip(image,1) diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index fad4fe2..7bea608 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -29,6 +29,7 @@ from ithor_arm.ithor_arm_environment import ManipulaTHOREnvironment from manipulathor_utils.debugger_util import ForkedPdb +from datetime import datetime class ObjectNavTask(Task[ManipulaTHOREnvironment]): _actions = ( @@ -218,7 +219,7 @@ def _is_goal_in_range(self) -> bool: obj for obj in self.env.last_event.metadata["objects"] if obj["visible"] and obj["objectType"] == self.task_info["object_type"] - and self.dist_to_target_func() < 1.5 # only applies if viz distance is greater than 1.5 + and self.dist_to_target_func() < self.task_info['success_distance'] ) def shaping(self) -> float: @@ -396,13 +397,6 @@ class StretchObjectNavTask(ObjectNavTask): DONE, ) - # TODO: change this to find the object via segementation or some other way - account for FOV cropping - # def _is_goal_in_range(self) -> bool: - # candidates = [obj - # for obj in self.env.last_event.metadata["objects"] - # if obj["visible"] and obj["objectType"] == self.task_info["object_type"]] - # return any(candidates) - class StretchNeckedObjectNavTask(ObjectNavTask): _actions = ( MOVE_AHEAD, @@ -425,6 +419,16 @@ class StretchNeckedObjectNavTaskUpdateOrder(ObjectNavTask): "LookDown" ) +class StretchObjectNavTaskSegmentationSuccess(StretchObjectNavTask): + def _is_goal_in_range(self) -> bool: + all_kinect_masks = self.env.controller.last_event.third_party_instance_masks[0] + for object_id in self.task_info["target_object_ids"]: + if object_id in all_kinect_masks and self.dist_to_target_func() < self.task_info['success_distance']: + return True + + return False + + class ExploreWiseObjectNavTask(ObjectNavTask): def __init__(self, **kwargs): super().__init__(**kwargs) @@ -465,6 +469,10 @@ def judge(self) -> float: class RealStretchObjectNavTask(StretchObjectNavTask): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.start_time = datetime.now() + self.last_time = None def _step(self, action: int) -> RLStepResult: @@ -477,14 +485,20 @@ def _step(self, action: int) -> RLStepResult: if action_str == "Done": self._took_end_action = True - print('I think I found a ', self.task_info['object_type']) - # self._success = bool(int(input('Was I correct? 0 for no, 1 for yes: '))) - print('Was I correct? Set self._success in trace.') + dt_total = (datetime.now() - self.start_time).total_seconds()/60 + print('I think I found a ', self.task_info['object_type'], ' after ', str(dt_total), ' minutes.' ) + print('Was I correct? Set self._success in trace.') ForkedPdb().set_trace() self._last_action_str = action_str action_dict = {"action": action_str} self.env.step(action_dict) + + if self.last_time is not None: + dt = (datetime.now() - self.last_time).total_seconds() + print('FPS: ', str(1/dt)) + self.last_time = datetime.now() + self.last_action_success = self.env.last_action_success last_action_name = self._last_action_str From 4be2d5ecac6d35bab77e63146989037f37800104 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Fri, 17 Jun 2022 16:50:42 -0700 Subject: [PATCH 43/53] add easy 10 robothor objectnav tasks --- .../robothor/real_robothor_objectnav.py | 40 ++++++++++--------- .../models/clip_objnav_ncamera_model.py | 18 ++++----- .../all_rooms_object_nav_task_sampler.py | 30 +++++++++++--- 3 files changed, 56 insertions(+), 32 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py index fc05824..7ba0b75 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py @@ -42,32 +42,36 @@ class RealStretchObjectNav( ] # this should only ever be run on darwin anyway - sensors for visualization - SENSORS += [ - RealRGBSensorStretchIntel( - height=desired_screen_size, - width=desired_screen_size, - use_resnet_normalization=True, - mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, - stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, - uuid="rgb_lowres_only_viz", - ), - RealRGBSensorStretchKinect( - height=desired_screen_size, - width=desired_screen_size, - use_resnet_normalization=True, - mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, - stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, - uuid="rgb_lowres_arm_only_viz", - ), - ] + # SENSORS += [ + # RealRGBSensorStretchIntel( + # height=desired_screen_size, + # width=desired_screen_size, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres_only_viz", + # ), + # RealRGBSensorStretchKinect( + # height=desired_screen_size, + # width=desired_screen_size, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres_arm_only_viz", + # ), + # ] MAX_STEPS = 60 TASK_SAMPLER = RealStretchAllRoomsObjectNavTaskSampler #RealStretchDiverseBringObjectTaskSampler TASK_TYPE = RealStretchObjectNavTask #RealStretchExploreWiseRewardTask ENVIRONMENT_TYPE = StretchRealEnvironment # account for the super init + VISUALIZE = True NUM_PROCESSES = 1 + NUMBER_OF_TEST_PROCESS = 0 + VAL_DEVICES = [] + TEST_DEVICES = [] TRAIN_SCENES = ['RealRobothor'] TEST_SCENES = ['RealRobothor'] diff --git a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_objnav_ncamera_model.py b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_objnav_ncamera_model.py index 178cb69..36637f8 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/models/clip_objnav_ncamera_model.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/models/clip_objnav_ncamera_model.py @@ -93,15 +93,15 @@ def is_blind(self) -> bool: def forward_encoder(self, observations: ObservationType) -> torch.FloatTensor: return self.goal_visual_encoder(observations) - def forward(self, **kwargs): #TODO NOW remove - if platform.system() == "Darwin": - hacky_visualization(kwargs['observations'], base_directory_to_right_images=self.starting_time) - return super(ResnetTensorNavNCameraActorCritic, self).forward(**kwargs) - - def forward(self, **kwargs): #TODO NOW remove - if self.visualize: - hacky_visualization(kwargs['observations'], base_directory_to_right_images=self.starting_time) - return super(ResnetTensorNavNCameraActorCritic, self).forward(**kwargs) + # def forward(self, **kwargs): #TODO NOW remove + # if platform.system() == "Darwin": + # hacky_visualization(kwargs['observations'], base_directory_to_right_images=self.starting_time) + # return super(ResnetTensorNavNCameraActorCritic, self).forward(**kwargs) + + # def forward(self, **kwargs): #TODO NOW remove + # if self.visualize: + # hacky_visualization(kwargs['observations'], base_directory_to_right_images=self.starting_time) + # return super(ResnetTensorNavNCameraActorCritic, self).forward(**kwargs) class ResnetNCameraTensorGoalEncoder(nn.Module): diff --git a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py index 40e2451..396d869 100644 --- a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py +++ b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py @@ -463,6 +463,21 @@ def __init__(self, **kwargs) -> None: {'object_id': 'Vase|1|1|1', 'object_type':"Vase"} ] + self.preset_easyish_tasks = [ + # {'object_id': 'Apple|1|1|1', 'object_type':"Apple"}, + # {'object_id': 'BaseballBat|1|1|1', 'object_type':"BaseballBat"}, + # {'object_id': 'BasketBall|1|1|1', 'object_type':"BasketBall"}, + # {'object_id': 'Bowl|1|1|1', 'object_type':"Bowl"}, + # {'object_id': 'Chair|1|1|1', 'object_type':"Chair"}, + # {'object_id': 'HousePlant|1|1|1', 'object_type':"HousePlant"}, + # {'object_id': 'Mug|1|1|1', 'object_type':"Mug"}, + # {'object_id': 'SprayBottle|1|1|1', 'object_type':"SprayBottle"}, + # {'object_id': 'Television|1|1|1', 'object_type':"Television"}, + {'object_id': 'Vase|1|1|1', 'object_type':"Vase"} + ] + + self.real_object_index = 0 + if self.sampler_mode == "test": self.max_tasks = self.reset_tasks = 200 @@ -480,11 +495,15 @@ def next_task( self.env = self._create_environment() self.env.reset(scene_name='RealRobothor') - skip_object = True - while skip_object: - target_object = random.choice(self.possible_real_objects) - print('I am now seeking a', target_object['object_type'], '. Accept by setting skip_object=False') - ForkedPdb().set_trace() + # skip_object = True + # while skip_object: + # target_object = random.choice(self.possible_real_objects) + # print('I am now seeking a', target_object['object_type'], '. Accept by setting skip_object=False') + # ForkedPdb().set_trace() + + target_object = self.preset_easyish_tasks[self.real_object_index] + print('I am now seeking a', target_object['object_type'], '. Continue when ready.') + ForkedPdb().set_trace() task_info = { 'target_object_ids': [target_object['object_id']], @@ -508,5 +527,6 @@ def next_task( distance_cache=self.distance_cache, additional_visualize=False # not implemented for non-procthor ) + self.real_object_index = self.real_object_index + 1 return self._last_sampled_task \ No newline at end of file From de6dca070d34e4061adb74e5efcc3e0759003de7 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 21 Jun 2022 13:38:30 -0700 Subject: [PATCH 44/53] functional stochastic procthor training, 2 camera wide FOV --- ...bj_nav_2camera_procthor_wide_stochastic.py | 21 +++++++++++++++++++ .../robothor/test_robothor_procthorstyle.py | 8 ++++++- .../procthor_object_nav_task_samplers.py | 4 +++- .../stretch_ithor_arm_environment.py | 20 ++++++++++-------- 4 files changed, 42 insertions(+), 11 deletions(-) create mode 100644 manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py new file mode 100644 index 0000000..cb240f6 --- /dev/null +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py @@ -0,0 +1,21 @@ +# import platform +# import yaml + +# from utils.stretch_utils.stretch_thor_sensors import RGBSensorStretchKinect, RGBSensorStretchIntel +# from allenact_plugins.ithor_plugin.ithor_sensors import GoalObjectTypeThorSensor + +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide \ + import ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV +# from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor + + +class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVNoisy( + ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): + + def __init__(self): + super().__init__() + self.ENV_ARGS['motion_noise_type'] = 'habitat' + self.ENV_ARGS['motion_noise_args'] = dict() + self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] + self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01] + self.ENV_ARGS['motion_noise_args']['effect_scale'] = .25 \ No newline at end of file diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index 69fe7be..fa50382 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -1,11 +1,15 @@ import datasets import torch + from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_narrow \ import ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide import \ ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only import \ ProcTHORObjectNavClipResnet50RGBOnly +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide_stochastic import \ + ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVNoisy + from utils.procthor_utils.procthor_object_nav_task_samplers import RoboThorObjectNavTestTaskSampler from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTask, ObjectNavTask, StretchNeckedObjectNavTask, StretchNeckedObjectNavTaskUpdateOrder @@ -16,8 +20,10 @@ # class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly): -class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): +# class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV): # class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV): +class ObjectNavRoboTHORTestProcTHORstyle(ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVNoisy): + EVAL_TASKS = datasets.load_dataset( f"allenai/robothor-objectnav-eval", use_auth_token=True ) diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index 733fd0d..72beb50 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -297,7 +297,9 @@ def increment_scene(self) -> bool: self.increment_scene_index() # self.env.controller.step(action="DestroyHouse", raise_for_failure=True) - self.env.controller.reset() + # self.env.controller.reset() + self.env.reset(scene_name='Procedural') + self.env.list_of_actions_so_far = [] self.house_entry = self.houses[self.house_index] self.house = pickle.loads(self.house_entry["house"]) diff --git a/utils/stretch_utils/stretch_ithor_arm_environment.py b/utils/stretch_utils/stretch_ithor_arm_environment.py index e24f9ec..237b6e9 100644 --- a/utils/stretch_utils/stretch_ithor_arm_environment.py +++ b/utils/stretch_utils/stretch_ithor_arm_environment.py @@ -205,6 +205,8 @@ def reset( if self.env_args['agentMode'] is not 'stretch': self.controller.reset(scene_name) + elif scene_name=='Procedural': + self.controller.reset() else: # to solve the crash issue @@ -221,16 +223,16 @@ def reset( {"action": "ChangeOpenSpeed", "x": self.object_open_speed} ) - self._initially_reachable_points = None - self._initially_reachable_points_set = None - self.controller.step({"action": "GetReachablePositions"}) - if not self.controller.last_event.metadata["lastActionSuccess"]: - warnings.warn( - "Error when getting reachable points: {}".format( - self.controller.last_event.metadata["errorMessage"] + self._initially_reachable_points = None + self._initially_reachable_points_set = None + self.controller.step({"action": "GetReachablePositions"}) + if not self.controller.last_event.metadata["lastActionSuccess"]: + warnings.warn( + "Error when getting reachable points: {}".format( + self.controller.last_event.metadata["errorMessage"] + ) ) - ) - self._initially_reachable_points = self.last_action_return + self._initially_reachable_points = self.last_action_return self.list_of_actions_so_far = [] From 53e64c9a01405e09fe46afa29c338f2ade1cc03f Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Wed, 22 Jun 2022 16:50:43 -0700 Subject: [PATCH 45/53] add task that ends with penalty for repeated action failure --- .../procthor/obj_nav_2camera_procthor_wide.py | 3 +- .../stretch_utils/stretch_object_nav_tasks.py | 29 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py index a24cefc..2d8c411 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py @@ -7,7 +7,7 @@ from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only \ import ProcTHORObjectNavClipResnet50RGBOnly -from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTaskSegmentationSuccess +from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTaskSegmentationSuccess, StretchObjectNavTaskSegmentationSuccessActionFail from manipulathor_utils.debugger_util import ForkedPdb @@ -26,6 +26,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( OBJECT_TYPES=yaml.safe_load(f) TASK_TYPE = StretchObjectNavTaskSegmentationSuccess + # TASK_TYPE = StretchObjectNavTaskSegmentationSuccessActionFail SENSORS = [ RGBSensorThor( diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index 7bea608..299ace1 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -7,6 +7,7 @@ import gym from importlib_metadata import Lookup import numpy as np +from sklearn.metrics import recall_score import torch from allenact.base_abstractions.misc import RLStepResult from allenact.base_abstractions.sensor import Sensor @@ -428,6 +429,34 @@ def _is_goal_in_range(self) -> bool: return False +class StretchObjectNavTaskSegmentationSuccessActionFail(StretchObjectNavTaskSegmentationSuccess): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.recent_three_strikes = 0 + self.reward_config['got_stuck_penalty'] = -1.0 # TODO this is hacky + # possible enhancement: add a "I'm stuck" action that agent is not penalized for + + def _step(self, action: int) -> RLStepResult: + sr = super()._step(action) + + if self.last_action_success or self._took_end_action: + self.recent_three_strikes = 0 + return sr + elif self.recent_three_strikes < 2: + self.recent_three_strikes += 1 + return sr + else: + # print('Task ended for repeated action failure') + # ForkedPdb().set_trace() + self._took_end_action = True + step_result = RLStepResult( + observation=sr.observation, + reward=sr.reward - self.reward_config['got_stuck_penalty'], + done=self.is_done(), + info={"last_action_success": self.last_action_success, "action": action}, + ) + return step_result + class ExploreWiseObjectNavTask(ObjectNavTask): def __init__(self, **kwargs): From 1c8894b8def04538844b5000543ded6e22026f21 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Fri, 24 Jun 2022 15:04:48 -0700 Subject: [PATCH 46/53] qol updates for real robothor eval, stochastic testing --- .../experiments/obj_nav_base_config.py | 3 +- .../procthor/obj_nav_2camera_procthor_wide.py | 3 +- ...bj_nav_2camera_procthor_wide_stochastic.py | 8 ++- .../robothor/real_robothor_objectnav.py | 1 + .../robothor/test_robothor_procthorstyle.py | 9 ++- scripts/update_tensorboards.py | 2 +- .../procthor_object_nav_task_samplers.py | 3 +- .../stretch_utils/stretch_object_nav_tasks.py | 56 ++++++++++++++++--- 8 files changed, 68 insertions(+), 17 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py index 6745685..9c6ccfe 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/obj_nav_base_config.py @@ -104,6 +104,7 @@ class ObjectNavBaseConfig(ExperimentConfig, ABC): NUM_PROCESSES: Optional[int] = None NUMBER_OF_TEST_PROCESS: Optional[int] = None + NUMBER_OF_VALID_PROCESS: Optional[int] = 1 TRAIN_GPU_IDS = list(range(torch.cuda.device_count())) SAMPLER_GPU_IDS = TRAIN_GPU_IDS VALID_GPU_IDS = [] @@ -354,7 +355,7 @@ def machine_params(self, mode="train", **kwargs): ) sampler_devices = self.SAMPLER_GPU_IDS elif mode == "valid": - nprocesses = 1 + nprocesses = self.NUMBER_OF_VALID_PROCESS gpu_ids = [] if not torch.cuda.is_available() else self.VALID_GPU_IDS elif mode == "test": # nprocesses = self.NUMBER_OF_TEST_PROCESS if torch.cuda.is_available() else 1 diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py index 2d8c411..c1ccdef 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py @@ -7,7 +7,8 @@ from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only \ import ProcTHORObjectNavClipResnet50RGBOnly -from utils.stretch_utils.stretch_object_nav_tasks import StretchObjectNavTaskSegmentationSuccess, StretchObjectNavTaskSegmentationSuccessActionFail +from utils.stretch_utils.stretch_object_nav_tasks import \ + StretchObjectNavTaskSegmentationSuccess, StretchObjectNavTaskSegmentationSuccessActionFail from manipulathor_utils.debugger_util import ForkedPdb diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py index cb240f6..20b6b20 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py @@ -6,7 +6,9 @@ from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_wide \ import ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV -# from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor +from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_2camera_procthor_narrow \ + import ProcTHORObjectNavClipResnet50RGBOnly2CameraNarrowFOV + # from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOVNoisy( @@ -18,4 +20,6 @@ def __init__(self): self.ENV_ARGS['motion_noise_args'] = dict() self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01] - self.ENV_ARGS['motion_noise_args']['effect_scale'] = .25 \ No newline at end of file + self.ENV_ARGS['motion_noise_args']['effect_scale'] = .1 # .1 for eval .25 for training + + self.ENV_ARGS['returnToStart'] = False # for eval \ No newline at end of file diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py index 7ba0b75..84551f2 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py @@ -70,6 +70,7 @@ class RealStretchObjectNav( NUM_PROCESSES = 1 NUMBER_OF_TEST_PROCESS = 0 + NUMBER_OF_VALID_PROCESS = 0 VAL_DEVICES = [] TEST_DEVICES = [] diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index fa50382..cc883f5 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -68,12 +68,15 @@ def valid_task_sampler_args(self, **kwargs): def test_task_sampler_args(self, **kwargs): if self.TEST_ON_VALIDATION: - return self.valid_task_sampler_args(**kwargs) + houses = self.EVAL_TASKS["validation"] + else: + houses = self.EVAL_TASKS["test"].shuffle() + # return self.valid_task_sampler_args(**kwargs) out = self._get_sampler_args_for_scene_split( - houses=self.EVAL_TASKS["test"].shuffle(), + houses=houses, mode="eval", - max_tasks=5, + max_tasks=None, allow_flipping=False, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # ignored **kwargs, diff --git a/scripts/update_tensorboards.py b/scripts/update_tensorboards.py index 2edbddb..efd0875 100644 --- a/scripts/update_tensorboards.py +++ b/scripts/update_tensorboards.py @@ -3,7 +3,7 @@ import time import psutil -servers = [('aws1', 6006), ('aws5', 6007), ('aws6', 6008), ('aws7', 6009)] +servers = [('aws1', 6006), ('aws5', 6007), ('aws7', 6009)] while(True): diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index 72beb50..7650436 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -453,7 +453,8 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject if self.last_scene is None or self.last_scene != ep["scene"]: self.last_scene = ep["scene"] - self.env.controller.reset(ep["scene"]) + # self.env.controller.reset(ep["scene"]) + self.env.reset(scene_name=ep["scene"]) # NOTE: not using ep["targetObjectIds"] due to floating points with # target objects moving. diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index 299ace1..b68fc99 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -1,9 +1,13 @@ from ast import For from dis import dis +from email.encoders import encode_noop import random from typing import Any, Dict, List, Optional, Tuple, Union from typing_extensions import Literal +import signal +import time + import gym from importlib_metadata import Lookup import numpy as np @@ -32,6 +36,9 @@ from manipulathor_utils.debugger_util import ForkedPdb from datetime import datetime +def handler(signum, frame): + raise Exception("Controller call took too long") + class ObjectNavTask(Task[ManipulaTHOREnvironment]): _actions = ( MOVE_AHEAD, @@ -433,7 +440,8 @@ class StretchObjectNavTaskSegmentationSuccessActionFail(StretchObjectNavTaskSegm def __init__(self, **kwargs): super().__init__(**kwargs) self.recent_three_strikes = 0 - self.reward_config['got_stuck_penalty'] = -1.0 # TODO this is hacky + self.reward_config['got_stuck_penalty'] = 0.0 # TODO this is hacky + self.reward_config['failed_action_penalty'] = -0.25 # possible enhancement: add a "I'm stuck" action that agent is not penalized for def _step(self, action: int) -> RLStepResult: @@ -456,6 +464,25 @@ def _step(self, action: int) -> RLStepResult: info={"last_action_success": self.last_action_success, "action": action}, ) return step_result + + def judge(self) -> float: + """Judge the last event.""" + reward = self.reward_config['step_penalty'] + if not self.last_action_success: + reward += self.reward_config['failed_action_penalty'] + + reward += self.shaping() + + if self._took_end_action: + if self._success: + reward += self.reward_config['goal_success_reward'] + else: + reward += self.reward_config['failed_stop_reward'] + elif self.num_steps_taken() + 1 >= self.max_steps: + reward += self.reward_config['reached_horizon_reward'] + + self._rewards.append(float(reward)) + return float(reward) class ExploreWiseObjectNavTask(ObjectNavTask): @@ -502,6 +529,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) self.start_time = datetime.now() self.last_time = None + signal.signal(signal.SIGALRM, handler) def _step(self, action: int) -> RLStepResult: @@ -510,19 +538,31 @@ def _step(self, action: int) -> RLStepResult: self.manual_action = False - # print('Action Called', action_str) + # add/remove/adjust to allow graceful exit from auto-battlebots + end_ep_early = False + if self.num_steps_taken() % 10 == 0: + print('verify continue? set end_ep_early=True to gracefully fail out') + ForkedPdb().set_trace() + + self._last_action_str = action_str + action_dict = {"action": action_str} + signal.alarm(10) # second timeout - catch missed server connection. THIS IS NOT THREAD SAFE + try: + self.env.step(action_dict) + except: + print('continue to try again or set end_ep_early to fail out instead') + ForkedPdb().set_trace() + self.env.step(action_dict) - if action_str == "Done": + signal.alarm(0) + + if action_str == "Done" or end_ep_early: self._took_end_action = True dt_total = (datetime.now() - self.start_time).total_seconds()/60 print('I think I found a ', self.task_info['object_type'], ' after ', str(dt_total), ' minutes.' ) - print('Was I correct? Set self._success in trace.') + print('Was I correct? Set self._success in trace. Default false.') ForkedPdb().set_trace() - self._last_action_str = action_str - action_dict = {"action": action_str} - self.env.step(action_dict) - if self.last_time is not None: dt = (datetime.now() - self.last_time).total_seconds() print('FPS: ', str(1/dt)) From a944476a50d7ebd0f76a25bb1d5e75cb2632c533 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Wed, 29 Jun 2022 11:59:29 -0700 Subject: [PATCH 47/53] add task more like real with episode failure for repeated action failure --- .../procthor/obj_nav_2camera_procthor_wide.py | 4 ++-- ...obj_nav_2camera_procthor_wide_stochastic.py | 2 +- .../robothor/real_robothor_objectnav.py | 4 ++-- scripts/update_tensorboards.py | 2 +- .../all_rooms_object_nav_task_sampler.py | 18 +++++++++--------- .../stretch_utils/stretch_object_nav_tasks.py | 10 +++++----- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py index c1ccdef..2c84589 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py @@ -26,8 +26,8 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( with open('datasets/objects/robothor_habitat2022.yaml', 'r') as f: OBJECT_TYPES=yaml.safe_load(f) - TASK_TYPE = StretchObjectNavTaskSegmentationSuccess - # TASK_TYPE = StretchObjectNavTaskSegmentationSuccessActionFail + # TASK_TYPE = StretchObjectNavTaskSegmentationSuccess + TASK_TYPE = StretchObjectNavTaskSegmentationSuccessActionFail SENSORS = [ RGBSensorThor( diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py index 20b6b20..7e111af 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py @@ -20,6 +20,6 @@ def __init__(self): self.ENV_ARGS['motion_noise_args'] = dict() self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01] - self.ENV_ARGS['motion_noise_args']['effect_scale'] = .1 # .1 for eval .25 for training + self.ENV_ARGS['motion_noise_args']['effect_scale'] = .1 # .1 for eval .25 for training. change to .1 for fine-tune with action penalties self.ENV_ARGS['returnToStart'] = False # for eval \ No newline at end of file diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py index 84551f2..642adae 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/real_robothor_objectnav.py @@ -63,8 +63,8 @@ class RealStretchObjectNav( MAX_STEPS = 60 - TASK_SAMPLER = RealStretchAllRoomsObjectNavTaskSampler #RealStretchDiverseBringObjectTaskSampler - TASK_TYPE = RealStretchObjectNavTask #RealStretchExploreWiseRewardTask + TASK_SAMPLER = RealStretchAllRoomsObjectNavTaskSampler + TASK_TYPE = RealStretchObjectNavTask # ENVIRONMENT_TYPE = StretchRealEnvironment # account for the super init VISUALIZE = True diff --git a/scripts/update_tensorboards.py b/scripts/update_tensorboards.py index efd0875..2edbddb 100644 --- a/scripts/update_tensorboards.py +++ b/scripts/update_tensorboards.py @@ -3,7 +3,7 @@ import time import psutil -servers = [('aws1', 6006), ('aws5', 6007), ('aws7', 6009)] +servers = [('aws1', 6006), ('aws5', 6007), ('aws6', 6008), ('aws7', 6009)] while(True): diff --git a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py index 396d869..e11338e 100644 --- a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py +++ b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py @@ -464,15 +464,15 @@ def __init__(self, **kwargs) -> None: ] self.preset_easyish_tasks = [ - # {'object_id': 'Apple|1|1|1', 'object_type':"Apple"}, - # {'object_id': 'BaseballBat|1|1|1', 'object_type':"BaseballBat"}, - # {'object_id': 'BasketBall|1|1|1', 'object_type':"BasketBall"}, - # {'object_id': 'Bowl|1|1|1', 'object_type':"Bowl"}, - # {'object_id': 'Chair|1|1|1', 'object_type':"Chair"}, - # {'object_id': 'HousePlant|1|1|1', 'object_type':"HousePlant"}, - # {'object_id': 'Mug|1|1|1', 'object_type':"Mug"}, - # {'object_id': 'SprayBottle|1|1|1', 'object_type':"SprayBottle"}, - # {'object_id': 'Television|1|1|1', 'object_type':"Television"}, + {'object_id': 'Apple|1|1|1', 'object_type':"Apple"}, + {'object_id': 'BaseballBat|1|1|1', 'object_type':"BaseballBat"}, + {'object_id': 'BasketBall|1|1|1', 'object_type':"BasketBall"}, + {'object_id': 'Bowl|1|1|1', 'object_type':"Bowl"}, + {'object_id': 'Chair|1|1|1', 'object_type':"Chair"}, + {'object_id': 'HousePlant|1|1|1', 'object_type':"HousePlant"}, + {'object_id': 'Mug|1|1|1', 'object_type':"Mug"}, + {'object_id': 'SprayBottle|1|1|1', 'object_type':"SprayBottle"}, + {'object_id': 'Television|1|1|1', 'object_type':"Television"}, {'object_id': 'Vase|1|1|1', 'object_type':"Vase"} ] diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index b68fc99..86f4866 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -440,9 +440,9 @@ class StretchObjectNavTaskSegmentationSuccessActionFail(StretchObjectNavTaskSegm def __init__(self, **kwargs): super().__init__(**kwargs) self.recent_three_strikes = 0 - self.reward_config['got_stuck_penalty'] = 0.0 # TODO this is hacky - self.reward_config['failed_action_penalty'] = -0.25 - # possible enhancement: add a "I'm stuck" action that agent is not penalized for + self.reward_config['got_stuck_penalty'] = 0.0 # TODO this is hacky, should be in config + self.reward_config['failed_action_penalty'] = -0.2 + # possible enhancement: add a "I'm stuck" action? def _step(self, action: int) -> RLStepResult: sr = super()._step(action) @@ -546,11 +546,11 @@ def _step(self, action: int) -> RLStepResult: self._last_action_str = action_str action_dict = {"action": action_str} - signal.alarm(10) # second timeout - catch missed server connection. THIS IS NOT THREAD SAFE + signal.alarm(8) # second timeout - catch missed server connection. THIS IS NOT THREAD SAFE try: self.env.step(action_dict) except: - print('continue to try again or set end_ep_early to fail out instead') + print('Controller call took too long, continue to try again or set end_ep_early to fail out instead') ForkedPdb().set_trace() self.env.step(action_dict) From 58200802cbd334f9188dba46321f5addd744b8b3 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Thu, 30 Jun 2022 14:40:19 -0700 Subject: [PATCH 48/53] action failure multinode training run begin --- scripts/run_same_commands_servers.py | 4 ++++ scripts/update_tensorboards.py | 2 +- utils/stretch_utils/stretch_object_nav_tasks.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/run_same_commands_servers.py b/scripts/run_same_commands_servers.py index 6e6916b..6c3da3c 100644 --- a/scripts/run_same_commands_servers.py +++ b/scripts/run_same_commands_servers.py @@ -287,6 +287,10 @@ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ --seed 10 --machine_id 0 -c ~/exp_StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000014017504.pt ' +command_aws1 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ +--distributed_ip_and_port IP_ADR:6060 \ + --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ + --seed 10 --machine_id 0' # command = command_aws5 # command = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/no_pointnav_stretch_all_rooms_distrib \ diff --git a/scripts/update_tensorboards.py b/scripts/update_tensorboards.py index 2edbddb..efd0875 100644 --- a/scripts/update_tensorboards.py +++ b/scripts/update_tensorboards.py @@ -3,7 +3,7 @@ import time import psutil -servers = [('aws1', 6006), ('aws5', 6007), ('aws6', 6008), ('aws7', 6009)] +servers = [('aws1', 6006), ('aws5', 6007), ('aws7', 6009)] while(True): diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index 86f4866..26bd86f 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -441,7 +441,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) self.recent_three_strikes = 0 self.reward_config['got_stuck_penalty'] = 0.0 # TODO this is hacky, should be in config - self.reward_config['failed_action_penalty'] = -0.2 + self.reward_config['failed_action_penalty'] = -0.5 # possible enhancement: add a "I'm stuck" action? def _step(self, action: int) -> RLStepResult: From aaefafb7c6416a44797794f7689eac15b25b8435 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Thu, 7 Jul 2022 10:01:31 -0700 Subject: [PATCH 49/53] fix insufficiently strict action failure identifier --- .../test_robothor_procthorstyle_distrib.py | 2 +- scripts/run_same_commands_servers.py | 15 +++++++++++++-- scripts/update_tensorboards.py | 2 +- utils/stretch_utils/stretch_object_nav_tasks.py | 4 ++-- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py index 603c9f9..06910a9 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py @@ -18,7 +18,7 @@ class ObjectNavRoboTHORTestProcTHORstyleDistrib( ): NUM_PROCESSES = 56 # one them crashed for space? NUM_TRAIN_PROCESSES = 64 - NUM_VAL_PROCESSES = 4 + NUM_VAL_PROCESSES = 2 NUM_TEST_PROCESSES = 60 TRAIN_DEVICES = ( diff --git a/scripts/run_same_commands_servers.py b/scripts/run_same_commands_servers.py index 6c3da3c..afb9f1d 100644 --- a/scripts/run_same_commands_servers.py +++ b/scripts/run_same_commands_servers.py @@ -287,10 +287,21 @@ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ --seed 10 --machine_id 0 -c ~/exp_StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000014017504.pt ' -command_aws1 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ +command_aws1 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test/2022-07-06_21-19-19/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000052491920.pt ~/; \ + ./manipulathor/scripts/kill-zombie.sh; ai2thor-xorg start; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ --distributed_ip_and_port IP_ADR:6060 \ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ - --seed 10 --machine_id 0' + --seed 10 --machine_id 0 -c ~/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000052491920.pt ' + +# command_aws1 = ' ./manipulathor/scripts/kill-zombie.sh; ai2thor-xorg start; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ +# --distributed_ip_and_port IP_ADR:6060 \ +# --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ +# --seed 10 --machine_id 0 ' + +# command_aws1 = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ +# --distributed_ip_and_port IP_ADR:6060 \ +# --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ +# --seed 10 --machine_id 0' # command = command_aws5 # command = './manipulathor/scripts/kill-zombie.sh; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_bring_object_baselines/experiments/ithor/no_pointnav_stretch_all_rooms_distrib \ diff --git a/scripts/update_tensorboards.py b/scripts/update_tensorboards.py index efd0875..2d234ea 100644 --- a/scripts/update_tensorboards.py +++ b/scripts/update_tensorboards.py @@ -3,7 +3,7 @@ import time import psutil -servers = [('aws1', 6006), ('aws5', 6007), ('aws7', 6009)] +servers = [('aws1', 6006), ('aws5', 6007),('aws6', 6008)]#, ('aws7', 6009)] while(True): diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index 26bd86f..755d683 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -342,8 +342,8 @@ def _step(self, action: int) -> RLStepResult: self.task_info["action_successes"].append(True) else: action_dict = {"action": action_str} - self.env.step(action_dict) - self.last_action_success = bool(self.env.last_event) + sr = self.env.step(action_dict) + self.last_action_success = bool(sr.metadata["lastActionSuccess"]) position = self.env.last_event.metadata["agent"]["position"] self.path.append(position) From e56eddb90fde32cc5eacda8862ca8acf7114ca6d Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Fri, 8 Jul 2022 17:10:46 -0700 Subject: [PATCH 50/53] basic color correct and viz for kinect cam --- scripts/run_same_commands_servers.py | 4 +-- .../stretch_utils/real_stretch_environment.py | 2 +- utils/stretch_utils/real_stretch_sensors.py | 26 +++++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/scripts/run_same_commands_servers.py b/scripts/run_same_commands_servers.py index afb9f1d..29cfd35 100644 --- a/scripts/run_same_commands_servers.py +++ b/scripts/run_same_commands_servers.py @@ -287,11 +287,11 @@ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ --seed 10 --machine_id 0 -c ~/exp_StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000014017504.pt ' -command_aws1 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test/2022-07-06_21-19-19/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000052491920.pt ~/; \ +command_aws1 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test/2022-07-07_16-40-33/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000072781120.pt ~/; \ ./manipulathor/scripts/kill-zombie.sh; ai2thor-xorg start; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ --distributed_ip_and_port IP_ADR:6060 \ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ - --seed 10 --machine_id 0 -c ~/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000052491920.pt ' + --seed 10 --machine_id 0 -c ~/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000072781120.pt ' # command_aws1 = ' ./manipulathor/scripts/kill-zombie.sh; ai2thor-xorg start; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ # --distributed_ip_and_port IP_ADR:6060 \ diff --git a/utils/stretch_utils/real_stretch_environment.py b/utils/stretch_utils/real_stretch_environment.py index c7dd705..1d06982 100644 --- a/utils/stretch_utils/real_stretch_environment.py +++ b/utils/stretch_utils/real_stretch_environment.py @@ -39,7 +39,7 @@ def check_controller_version(self): def create_controller(self): - controller = ai2thor.robot_controller.Controller(host="stretch2.corp.ai2", port=9000, width=1280, height=720) + controller = ai2thor.robot_controller.Controller(host="stretch2", port=9000, width=1280, height=720) return controller def start( diff --git a/utils/stretch_utils/real_stretch_sensors.py b/utils/stretch_utils/real_stretch_sensors.py index 911dc12..c9a1410 100644 --- a/utils/stretch_utils/real_stretch_sensors.py +++ b/utils/stretch_utils/real_stretch_sensors.py @@ -40,6 +40,8 @@ from utils.detection_translator_util import THOR2COCO from utils.noise_in_motion_util import squeeze_bool_mask +from scipy.interpolate import UnivariateSpline + # from utils.real_stretch_utils import get_binary_mask_of_arm, get_mid_point_of_object_from_depth_and_mask from utils.stretch_utils.stretch_constants import INTEL_RESIZED_H, INTEL_RESIZED_W, KINECT_REAL_W, KINECT_REAL_H, \ MAX_INTEL_DEPTH, MIN_INTEL_DEPTH, MAX_KINECT_DEPTH, MIN_KINECT_DEPTH, INTEL_FOV_W, INTEL_FOV_H, KINECT_FOV_W, \ @@ -117,6 +119,22 @@ def normalize_real_kinect_image(frame,size=224): w,h = (int(current_size[0] * ratio), int(current_size[1] * ratio)) frame = cv2.resize(frame,(h,w)) + + spl = UnivariateSpline([0, 64, 128, 192, 255], [0, 30, 80, 160, 255]) + color_channel_decr = spl(range(256)) + + c_r, c_g, c_b = cv2.split(frame) + c_r = cv2.LUT(c_r, color_channel_decr).astype('uint8') + frame_mod = cv2.merge((c_r,c_g,c_b)) + hsv=cv2.cvtColor(frame_mod,cv2.COLOR_RGB2HSV).astype('int64') + hsv[:,:,2] = hsv[:,:,2] + 25 + hsv = np.clip(hsv,0,255).astype('uint8') + frame_bright = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB) + + sidebyside=np.hstack((frame,frame_mod,frame_bright)) + frame=frame_bright + + # ForkedPdb().set_trace() if len(frame.shape) == 3: result = np.zeros((size, size, frame.shape[2])) elif len(frame.shape) == 2: @@ -131,6 +149,14 @@ def normalize_real_kinect_image(frame,size=224): result[result < MIN_KINECT_DEPTH] = 0 return result.astype(frame.dtype) + +""" +cv2.imshow('window2',cv2.cvtColor(sidebyside,cv2.COLOR_RGB2BGR)) +cv2.waitKey(0) +cv2.imshow('window',cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)) +cv2.waitKey(0) + +""" # class StretchDetectronObjectMask(Sensor): # def __init__(self, type: str,noise, source_camera, uuid: str = "object_mask", **kwargs: Any): # observation_space = gym.spaces.Box( From 5be3f085c27916601d79f65ab14b162328256ab8 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 12 Jul 2022 16:53:54 -0700 Subject: [PATCH 51/53] exploration reward and additional metrics implemented --- .../procthor/obj_nav_2camera_procthor_wide.py | 7 ++- ...bj_nav_2camera_procthor_wide_stochastic.py | 4 +- .../robothor/test_robothor_procthorstyle.py | 4 +- scripts/run_same_commands_servers.py | 4 +- .../procthor_object_nav_task_samplers.py | 1 + .../stretch_utils/stretch_object_nav_tasks.py | 59 ++++++++++++++++--- 6 files changed, 65 insertions(+), 14 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py index 2c84589..fda0098 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py @@ -8,7 +8,7 @@ from manipulathor_baselines.stretch_object_nav_baselines.experiments.procthor.obj_nav_for_procthor_clip_resnet50_rgb_only \ import ProcTHORObjectNavClipResnet50RGBOnly from utils.stretch_utils.stretch_object_nav_tasks import \ - StretchObjectNavTaskSegmentationSuccess, StretchObjectNavTaskSegmentationSuccessActionFail + StretchObjectNavTaskSegmentationSuccess, StretchObjectNavTaskSegmentationSuccessActionFail, ExploreWiseObjectNavTask from manipulathor_utils.debugger_util import ForkedPdb @@ -28,6 +28,7 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( # TASK_TYPE = StretchObjectNavTaskSegmentationSuccess TASK_TYPE = StretchObjectNavTaskSegmentationSuccessActionFail + # TASK_TYPE = ExploreWiseObjectNavTask SENSORS = [ RGBSensorThor( @@ -83,6 +84,10 @@ def __init__(self): self.WHICH_AGENT == 'stretch' # this only works for stretch and self.ENV_ARGS['allow_flipping'] == False # not with 2-camera ) + # self.REWARD_CONFIG['shaping_weight'] = 0.0 + self.REWARD_CONFIG['exploration_reward'] = 0.1 + self.REWARD_CONFIG['got_stuck_penalty'] = 0.0 + self.REWARD_CONFIG['failed_action_penalty'] = -0.5 @classmethod def tag(cls): diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py index 7e111af..c9b2c40 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide_stochastic.py @@ -20,6 +20,6 @@ def __init__(self): self.ENV_ARGS['motion_noise_args'] = dict() self.ENV_ARGS['motion_noise_args']['multiplier_means'] = [1,1,1,1,1,1] self.ENV_ARGS['motion_noise_args']['multiplier_sigmas'] = [0.01,0.01,0.01,0.01,0.01,0.01,0.01] - self.ENV_ARGS['motion_noise_args']['effect_scale'] = .1 # .1 for eval .25 for training. change to .1 for fine-tune with action penalties + self.ENV_ARGS['motion_noise_args']['effect_scale'] = .1 - self.ENV_ARGS['returnToStart'] = False # for eval \ No newline at end of file + self.ENV_ARGS['returnToStart'] = False # \ No newline at end of file diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py index cc883f5..964e652 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle.py @@ -57,9 +57,9 @@ def make_sampler_fn(cls, **kwargs): def valid_task_sampler_args(self, **kwargs): out = self._get_sampler_args_for_scene_split( - houses=self.EVAL_TASKS["validation"], + houses=self.EVAL_TASKS["validation"].shuffle(), mode="eval", - max_tasks=5, + max_tasks=10, allow_flipping=False, resample_same_scene_freq=self.RESAMPLE_SAME_SCENE_FREQ_IN_INFERENCE, # ignored **kwargs, diff --git a/scripts/run_same_commands_servers.py b/scripts/run_same_commands_servers.py index 29cfd35..82c7598 100644 --- a/scripts/run_same_commands_servers.py +++ b/scripts/run_same_commands_servers.py @@ -287,11 +287,11 @@ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ --seed 10 --machine_id 0 -c ~/exp_StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000014017504.pt ' -command_aws1 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test/2022-07-07_16-40-33/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000072781120.pt ~/; \ +command_aws1 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test/2022-07-12_16-52-21/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000160001536.pt ~/; \ ./manipulathor/scripts/kill-zombie.sh; ai2thor-xorg start; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ --distributed_ip_and_port IP_ADR:6060 \ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ - --seed 10 --machine_id 0 -c ~/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000072781120.pt ' + --seed 10 --machine_id 0 -c ~/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000160001536.pt ' # command_aws1 = ' ./manipulathor/scripts/kill-zombie.sh; ai2thor-xorg start; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ # --distributed_ip_and_port IP_ADR:6060 \ diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index 7650436..c5c9947 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -76,6 +76,7 @@ def __init__( self.sampler_mode = kwargs["sampler_mode"] if self.sampler_mode != "train": self.rewards_config['shaping_weight'] = 0.0 + self.rewards_config['exploration_reward'] = 0.0 self.episode_index = 0 self.houses = houses diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index 755d683..7886fa3 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -343,6 +343,9 @@ def _step(self, action: int) -> RLStepResult: else: action_dict = {"action": action_str} sr = self.env.step(action_dict) + # RH NOTE: this is an important change from procthor because of how the noise model + # is implemented. the env step return corresponds to the nominal/called + # action, which may or may not be the last thing the controller did. self.last_action_success = bool(sr.metadata["lastActionSuccess"]) position = self.env.last_event.metadata["agent"]["position"] @@ -440,9 +443,6 @@ class StretchObjectNavTaskSegmentationSuccessActionFail(StretchObjectNavTaskSegm def __init__(self, **kwargs): super().__init__(**kwargs) self.recent_three_strikes = 0 - self.reward_config['got_stuck_penalty'] = 0.0 # TODO this is hacky, should be in config - self.reward_config['failed_action_penalty'] = -0.5 - # possible enhancement: add a "I'm stuck" action? def _step(self, action: int) -> RLStepResult: sr = super()._step(action) @@ -485,17 +485,54 @@ def judge(self) -> float: return float(reward) -class ExploreWiseObjectNavTask(ObjectNavTask): + +class ExploreWiseObjectNavTask(StretchObjectNavTaskSegmentationSuccessActionFail): def __init__(self, **kwargs): super().__init__(**kwargs) all_locations = [[k['x'], k['y'], k['z']] for k in (self.env.get_reachable_positions())] self.all_reachable_positions = torch.Tensor(all_locations) self.has_visited = torch.zeros((len(self.all_reachable_positions), 1)) assert len(self.all_reachable_positions) > 0, 'no reachable positions to calculate reward' + + def calc_action_stat_metrics(self) -> Dict[str, Any]: + action_stat = { + "metric/action_stat/" + action_str: 0.0 for action_str in self._actions + } + action_success_stat = { + "metric/action_success/" + action_str: 0.0 for action_str in self._actions + } + action_success_stat["metric/action_success/total"] = 0.0 + + seq_len = len(self.task_info["taken_actions"]) + for (action_name, action_success) in list(zip( + self.task_info["taken_actions"], + self.task_info["action_successes"])): + action_stat["metric/action_stat/" + action_name] += 1.0 + action_success_stat[ + "metric/action_success/{}".format(action_name) + ] += action_success + action_success_stat["metric/action_success/total"] += action_success + + action_success_stat["metric/action_success/total"] /= seq_len + + for action_name in self._actions: + action_success_stat[ + "metric/" + "action_success/{}".format(action_name) + ] /= (action_stat["metric/action_stat/" + action_name] + 0.000001) + action_stat["metric/action_stat/" + action_name] /= seq_len + + result = {**action_stat, **action_success_stat} + return result def judge(self) -> float: """Compute the reward after having taken a step.""" - reward = self.reward_configs["step_penalty"] + reward = self.reward_config["step_penalty"] + + # additional scaling step penalty, thresholds at half max steps at the failed action penalty + reward += -0.5 * (1.05)**(np.min([-(self.max_steps/2)+self.num_steps_taken(),0])) + + if not self.last_action_success: + reward += self.reward_config['failed_action_penalty'] current_agent_location = self.env.get_agent_location() current_agent_location = torch.Tensor([current_agent_location['x'], current_agent_location['y'], current_agent_location['z']]) @@ -509,8 +546,8 @@ def judge(self) -> float: self.has_visited[location_index] = 1 if visited_new_place: - reward += self.reward_configs["exploration_reward"] - + reward += self.reward_config["exploration_reward"] + reward += self.shaping() if self._took_end_action: @@ -522,6 +559,14 @@ def judge(self) -> float: reward += self.reward_config['reached_horizon_reward'] self._rewards.append(float(reward)) return float(reward) + + def metrics(self) -> Dict[str, Any]: + result = super(ExploreWiseObjectNavTask, self).metrics() + + if self.is_done(): + result['percent_room_visited'] = self.has_visited.mean().item() + result = {**result, **self.calc_action_stat_metrics()} + return result class RealStretchObjectNavTask(StretchObjectNavTask): From 7c3c8c68ca0324962453fa3b8a0f8b0067ced815 Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Fri, 15 Jul 2022 16:33:34 -0700 Subject: [PATCH 52/53] move action stats upstream, reduce exploration reward and start multinode shaping -> exploration fnetune --- .../procthor/obj_nav_2camera_procthor_wide.py | 8 +-- scripts/run_same_commands_servers.py | 4 +- .../stretch_utils/stretch_object_nav_tasks.py | 67 ++++++++++--------- 3 files changed, 41 insertions(+), 38 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py index fda0098..065f39a 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py @@ -27,8 +27,8 @@ class ProcTHORObjectNavClipResnet50RGBOnly2CameraWideFOV( OBJECT_TYPES=yaml.safe_load(f) # TASK_TYPE = StretchObjectNavTaskSegmentationSuccess - TASK_TYPE = StretchObjectNavTaskSegmentationSuccessActionFail - # TASK_TYPE = ExploreWiseObjectNavTask + # TASK_TYPE = StretchObjectNavTaskSegmentationSuccessActionFail + TASK_TYPE = ExploreWiseObjectNavTask SENSORS = [ RGBSensorThor( @@ -84,8 +84,8 @@ def __init__(self): self.WHICH_AGENT == 'stretch' # this only works for stretch and self.ENV_ARGS['allow_flipping'] == False # not with 2-camera ) - # self.REWARD_CONFIG['shaping_weight'] = 0.0 - self.REWARD_CONFIG['exploration_reward'] = 0.1 + self.REWARD_CONFIG['shaping_weight'] = 0.0 + self.REWARD_CONFIG['exploration_reward'] = 0.05 self.REWARD_CONFIG['got_stuck_penalty'] = 0.0 self.REWARD_CONFIG['failed_action_penalty'] = -0.5 diff --git a/scripts/run_same_commands_servers.py b/scripts/run_same_commands_servers.py index 82c7598..792f46f 100644 --- a/scripts/run_same_commands_servers.py +++ b/scripts/run_same_commands_servers.py @@ -287,11 +287,11 @@ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ --seed 10 --machine_id 0 -c ~/exp_StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000014017504.pt ' -command_aws1 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test/2022-07-12_16-52-21/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000160001536.pt ~/; \ +command_aws1 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test/2022-07-14_17-36-17/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000198561968.pt ~/; \ ./manipulathor/scripts/kill-zombie.sh; ai2thor-xorg start; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ --distributed_ip_and_port IP_ADR:6060 \ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ - --seed 10 --machine_id 0 -c ~/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000160001536.pt ' + --seed 10 --machine_id 0 -c ~/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000198561968.pt ' # command_aws1 = ' ./manipulathor/scripts/kill-zombie.sh; ai2thor-xorg start; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ # --distributed_ip_and_port IP_ADR:6060 \ diff --git a/utils/stretch_utils/stretch_object_nav_tasks.py b/utils/stretch_utils/stretch_object_nav_tasks.py index 7886fa3..13d9ef0 100644 --- a/utils/stretch_utils/stretch_object_nav_tasks.py +++ b/utils/stretch_utils/stretch_object_nav_tasks.py @@ -229,6 +229,38 @@ def _is_goal_in_range(self) -> bool: if obj["visible"] and obj["objectType"] == self.task_info["object_type"] and self.dist_to_target_func() < self.task_info['success_distance'] ) + + def calc_action_stat_metrics(self) -> Dict[str, Any]: + action_stat = { + "metric/action_stat/" + action_str: 0.0 for action_str in self._actions + } + action_success_stat = { + "metric/action_success/" + action_str: 0.0 for action_str in self._actions + } + action_success_stat["metric/action_success/total"] = 0.0 + + seq_len = len(self.task_info["taken_actions"]) + + for (action_name, action_success) in list(zip( + self.task_info["taken_actions"], + self.task_info["action_successes"])): + action_stat["metric/action_stat/" + action_name] += 1.0 + + action_success_stat[ + "metric/action_success/{}".format(action_name) + ] += action_success + action_success_stat["metric/action_success/total"] += action_success + + action_success_stat["metric/action_success/total"] /= seq_len + + for action_name in self._actions: + action_success_stat[ + "metric/" + "action_success/{}".format(action_name) + ] /= (action_stat["metric/action_stat/" + action_name] + 0.000001) + action_stat["metric/action_stat/" + action_name] /= seq_len + + result = {**action_stat, **action_success_stat} + return result def shaping(self) -> float: if self.reward_config['shaping_weight'] == 0.0: @@ -307,6 +339,7 @@ def metrics(self) -> Dict[str, Any]: if self.is_done(): result={} # placeholder for future metrics = super().metrics() + metrics = {**metrics, **self.calc_action_stat_metrics()} metrics["dist_to_target"] = self.dist_to_target_func() metrics["total_reward"] = np.sum(self._rewards) metrics["spl"] = spl_metric( @@ -493,43 +526,13 @@ def __init__(self, **kwargs): self.all_reachable_positions = torch.Tensor(all_locations) self.has_visited = torch.zeros((len(self.all_reachable_positions), 1)) assert len(self.all_reachable_positions) > 0, 'no reachable positions to calculate reward' - - def calc_action_stat_metrics(self) -> Dict[str, Any]: - action_stat = { - "metric/action_stat/" + action_str: 0.0 for action_str in self._actions - } - action_success_stat = { - "metric/action_success/" + action_str: 0.0 for action_str in self._actions - } - action_success_stat["metric/action_success/total"] = 0.0 - - seq_len = len(self.task_info["taken_actions"]) - for (action_name, action_success) in list(zip( - self.task_info["taken_actions"], - self.task_info["action_successes"])): - action_stat["metric/action_stat/" + action_name] += 1.0 - action_success_stat[ - "metric/action_success/{}".format(action_name) - ] += action_success - action_success_stat["metric/action_success/total"] += action_success - - action_success_stat["metric/action_success/total"] /= seq_len - - for action_name in self._actions: - action_success_stat[ - "metric/" + "action_success/{}".format(action_name) - ] /= (action_stat["metric/action_stat/" + action_name] + 0.000001) - action_stat["metric/action_stat/" + action_name] /= seq_len - - result = {**action_stat, **action_success_stat} - return result def judge(self) -> float: """Compute the reward after having taken a step.""" reward = self.reward_config["step_penalty"] # additional scaling step penalty, thresholds at half max steps at the failed action penalty - reward += -0.5 * (1.05)**(np.min([-(self.max_steps/2)+self.num_steps_taken(),0])) + reward += -0.2 * (1.1)**(np.min([-(self.max_steps/2)+self.num_steps_taken(),0])) if not self.last_action_success: reward += self.reward_config['failed_action_penalty'] @@ -565,7 +568,7 @@ def metrics(self) -> Dict[str, Any]: if self.is_done(): result['percent_room_visited'] = self.has_visited.mean().item() - result = {**result, **self.calc_action_stat_metrics()} + result['new_locations_visited'] = self.has_visited.sum().item() return result From 10a4c17e79f21af6aa6fd1695b158ac6fd2fadad Mon Sep 17 00:00:00 2001 From: roseh-ai2 Date: Tue, 19 Jul 2022 11:32:22 -0700 Subject: [PATCH 53/53] fine-tuning in robothor with lighting randomization and exploration --- .../ithor/obj_nav_2camera_ithor_wide.py | 41 +++++---- .../procthor/obj_nav_2camera_procthor_wide.py | 2 +- .../obj_nav_2camera_robothor_narrow.py | 86 ++++++++++--------- scripts/run_same_commands_servers.py | 6 +- .../procthor_object_nav_task_samplers.py | 1 + .../all_rooms_object_nav_task_sampler.py | 7 ++ 6 files changed, 81 insertions(+), 62 deletions(-) diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py index e0a7d5a..396f0a0 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/ithor/obj_nav_2camera_ithor_wide.py @@ -78,24 +78,24 @@ class ithorObjectNavClipResnet50RGBOnly2CameraWideFOV( if platform.system() == "Darwin": MAX_STEPS = 100 - SENSORS += [ #TODO FIX ORDER HERE - RGBSensorStretchKinectBigFov( - height=ObjectNavBaseConfig.SCREEN_SIZE, - width=ObjectNavBaseConfig.SCREEN_SIZE, - use_resnet_normalization=True, - mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, - stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, - uuid="rgb_lowres_arm_only_viz", - ), - RGBSensorThor( - height=ObjectNavBaseConfig.SCREEN_SIZE, - width=ObjectNavBaseConfig.SCREEN_SIZE, - use_resnet_normalization=True, - mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, - stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, - uuid="rgb_lowres_only_viz", - ), - ] + # SENSORS += [ #TODO FIX ORDER HERE + # RGBSensorStretchKinectBigFov( + # height=ObjectNavBaseConfig.SCREEN_SIZE, + # width=ObjectNavBaseConfig.SCREEN_SIZE, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres_arm_only_viz", + # ), + # RGBSensorThor( + # height=ObjectNavBaseConfig.SCREEN_SIZE, + # width=ObjectNavBaseConfig.SCREEN_SIZE, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres_only_viz", + # ), + # ] TASK_SAMPLER = AllRoomsObjectNavTaskSampler TASK_TYPE = StretchObjectNavTask @@ -118,6 +118,11 @@ def __init__(self): clip_model_type=self.CLIP_MODEL_TYPE, screen_size=self.SCREEN_SIZE, ) + self.REWARD_CONFIG['shaping_weight'] = 0.0 + self.REWARD_CONFIG['exploration_reward'] = 0.05 + self.REWARD_CONFIG['got_stuck_penalty'] = 0.0 + self.REWARD_CONFIG['failed_action_penalty'] = -0.5 + self.ENV_ARGS['renderInstanceSegmentation'] = True def preprocessors(self) -> Sequence[Union[Preprocessor, Builder[Preprocessor]]]: return self.preprocessing_and_model.preprocessors() diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py index 065f39a..f25bf23 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/procthor/obj_nav_2camera_procthor_wide.py @@ -85,7 +85,7 @@ def __init__(self): and self.ENV_ARGS['allow_flipping'] == False # not with 2-camera ) self.REWARD_CONFIG['shaping_weight'] = 0.0 - self.REWARD_CONFIG['exploration_reward'] = 0.05 + self.REWARD_CONFIG['exploration_reward'] = 0.1 self.REWARD_CONFIG['got_stuck_penalty'] = 0.0 self.REWARD_CONFIG['failed_action_penalty'] = -0.5 diff --git a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/obj_nav_2camera_robothor_narrow.py b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/obj_nav_2camera_robothor_narrow.py index 41c5f00..763bddd 100644 --- a/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/obj_nav_2camera_robothor_narrow.py +++ b/manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/obj_nav_2camera_robothor_narrow.py @@ -9,6 +9,9 @@ ithorObjectNavClipResnet50RGBOnly2CameraWideFOV from allenact_plugins.clip_plugin.clip_preprocessors import ClipResNetPreprocessor +from utils.stretch_utils.stretch_object_nav_tasks import \ + StretchObjectNavTaskSegmentationSuccess, StretchObjectNavTaskSegmentationSuccessActionFail, ExploreWiseObjectNavTask + from scripts.dataset_generation.find_categories_to_use import FULL_LIST_OF_OBJECTS, ROBOTHOR_TRAIN, ROBOTHOR_VAL @@ -30,47 +33,50 @@ class RobothorObjectNavClipResnet50RGBOnly2CameraNarrowFOV( random.shuffle(TRAIN_SCENES) random.shuffle(TEST_SCENES) - SENSORS = [ - RGBSensorStretchIntel( - height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, - width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, - use_resnet_normalization=True, - mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, - stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, - uuid="rgb_lowres", - ), - RGBSensorStretchKinect( - height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, - width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, - use_resnet_normalization=True, - mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, - stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, - uuid="rgb_lowres_arm", - ), - GoalObjectTypeThorSensor( - object_types=OBJECT_TYPES, - ), - ] + # SENSORS = [ + # RGBSensorStretchIntel( + # height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres", + # ), + # RGBSensorStretchKinect( + # height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres_arm", + # ), + # GoalObjectTypeThorSensor( + # object_types=OBJECT_TYPES, + # ), + # ] + + TASK_TYPE = ExploreWiseObjectNavTask + MAX_STEPS = 200 - if platform.system() == "Darwin": - SENSORS += [ - RGBSensorStretchKinect( - height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, - width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, - use_resnet_normalization=True, - mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, - stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, - uuid="rgb_lowres_arm_only_viz", - ), - RGBSensorStretchIntel( - height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, - width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, - use_resnet_normalization=True, - mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, - stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, - uuid="rgb_lowres_only_viz", - ), - ] + # if platform.system() == "Darwin": + # SENSORS += [ + # RGBSensorStretchKinect( + # height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres_arm_only_viz", + # ), + # RGBSensorStretchIntel( + # height=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # width=ithorObjectNavClipResnet50RGBOnly2CameraWideFOV.SCREEN_SIZE, + # use_resnet_normalization=True, + # mean=ClipResNetPreprocessor.CLIP_RGB_MEANS, + # stdev=ClipResNetPreprocessor.CLIP_RGB_STDS, + # uuid="rgb_lowres_only_viz", + # ), + # ] diff --git a/scripts/run_same_commands_servers.py b/scripts/run_same_commands_servers.py index 792f46f..9d1e810 100644 --- a/scripts/run_same_commands_servers.py +++ b/scripts/run_same_commands_servers.py @@ -287,11 +287,11 @@ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ --seed 10 --machine_id 0 -c ~/exp_StretchNeckedObjectNavTaskUpdateOrder-RGB-SingleCam-ProcTHOR-locobot-RoboTHOR-Test__stage_01__steps_000014017504.pt ' -command_aws1 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test/2022-07-14_17-36-17/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000198561968.pt ~/; \ +command_aws1 = 'scp MAIN_SERVER:~/manipulathor/experiment_output/checkpoints/ExploreWiseObjectNavTask-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test/2022-07-18_15-49-34/exp_ExploreWiseObjectNavTask-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000273645744.pt ~/; \ ./manipulathor/scripts/kill-zombie.sh; ai2thor-xorg start; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ --distributed_ip_and_port IP_ADR:6060 \ --config_kwargs \'{\\"distributed_nodes\\":NUM_MACHINES}\' \ - --seed 10 --machine_id 0 -c ~/exp_StretchObjectNavTaskSegmentationSuccessActionFail-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000198561968.pt ' + --seed 10 --machine_id 0 -c ~/exp_ExploreWiseObjectNavTask-RGB-2Camera-ProcTHOR-stretch-RoboTHOR-Test__stage_02__steps_000273645744.pt ' # command_aws1 = ' ./manipulathor/scripts/kill-zombie.sh; ai2thor-xorg start; cd manipulathor && export PYTHONPATH="./" && allenact manipulathor_baselines/stretch_object_nav_baselines/experiments/robothor/test_robothor_procthorstyle_distrib.py \ # --distributed_ip_and_port IP_ADR:6060 \ @@ -314,7 +314,7 @@ server_sets = { 'aws1':{ 'servers':[f'aws{i}' for i in range(1,5)], - 'ip_adr': '18.237.24.199', + 'ip_adr': '52.13.80.54', 'command': command_aws1, }, 'aws5': { diff --git a/utils/procthor_utils/procthor_object_nav_task_samplers.py b/utils/procthor_utils/procthor_object_nav_task_samplers.py index c5c9947..907aae6 100644 --- a/utils/procthor_utils/procthor_object_nav_task_samplers.py +++ b/utils/procthor_utils/procthor_object_nav_task_samplers.py @@ -370,6 +370,7 @@ def next_task(self, force_advance_scene: bool = False) -> Optional[StretchObject if random.random() < self.env_args['p_randomize_material']: self.env.controller.step(action="RandomizeMaterials", raise_for_failure=True) + self.env.controller.step(action="RandomizeLighting", synchronized=True, raise_for_failure=True) else: self.env.controller.step(action="ResetMaterials", raise_for_failure=True) diff --git a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py index e11338e..cc467cf 100644 --- a/utils/stretch_utils/all_rooms_object_nav_task_sampler.py +++ b/utils/stretch_utils/all_rooms_object_nav_task_sampler.py @@ -279,6 +279,13 @@ def put_object_in_location(location_point): # if not event_transport_init_obj.metadata['lastActionSuccess']: # print('scene', scene_name, 'init', init_object['object_id']) # print('ERROR: one of transfers fail', 'init', event_transport_init_obj.metadata['errorMessage']) + + if random.random() < 0.8: + # self.env.controller.step(action="RandomizeMaterials", raise_for_failure=True) + self.env.controller.step(action="RandomizeLighting", synchronized=True, raise_for_failure=True) + # else: + # self.env.controller.step(action="ResetMaterials", raise_for_failure=True) + else: #Object is already at the location it should be target_obj_type = data_point['target_obj_type']