diff --git a/.gitignore b/.gitignore index 50bcf5450..c0ace1dc9 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ roboschool *.orig docs/site coach_env +venv build rl_coach.egg* rl_coach_slim.egg* @@ -32,4 +33,3 @@ trace_test* .cache/ *.pyc coachenv - diff --git a/rl_coach/agents/agent.py b/rl_coach/agents/agent.py index bbd9004cc..f497e1476 100644 --- a/rl_coach/agents/agent.py +++ b/rl_coach/agents/agent.py @@ -895,10 +895,7 @@ def observe(self, env_response: EnvResponse) -> bool: transition = self.update_transition_before_adding_to_replay_buffer(transition) # merge the intrinsic reward in - if self.ap.algorithm.scale_external_reward_by_intrinsic_reward_value: - transition.reward = transition.reward * (1 + self.last_action_info.action_intrinsic_reward) - else: - transition.reward = transition.reward + self.last_action_info.action_intrinsic_reward + transition.reward = transition.reward + self.last_action_info.action_intrinsic_reward # sum up the total shaped reward self.total_shaped_reward_in_current_episode += transition.reward @@ -1026,7 +1023,7 @@ def emulate_observe_on_trainer(self, transition: Transition) -> bool: self.total_reward_in_current_episode += transition.reward self.shaped_reward.add_sample(transition.reward) self.reward.add_sample(transition.reward) - + # create and store the transition if self.phase in [RunPhase.TRAIN, RunPhase.HEATUP]: # for episodic memories we keep the transitions in a local buffer until the episode is ended. diff --git a/rl_coach/base_parameters.py b/rl_coach/base_parameters.py index e462e2b5d..815a78a2d 100644 --- a/rl_coach/base_parameters.py +++ b/rl_coach/base_parameters.py @@ -200,9 +200,6 @@ def __init__(self): # distributed agents params self.share_statistics_between_workers = True - # intrinsic reward - self.scale_external_reward_by_intrinsic_reward_value = False - # n-step returns self.n_step = -1 # calculate the total return (no bootstrap, by default)