Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: openai/baselines
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: madras-simulator/baselines
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Can’t automatically merge. Don’t worry, you can still create the pull request.
  • 10 commits
  • 3 files changed
  • 2 contributors

Commits on Oct 28, 2018

  1. Supports for Madras Env

    buridiaditya committed Oct 28, 2018

    Verified

    This commit was signed with the committer’s verified signature.
    mp911de Mark Paluch
    Copy the full SHA
    67df72f View commit details

Commits on Nov 29, 2018

  1. Merge pull request #2 from openai/master

    Updating the Fork
    rudrasohan authored Nov 29, 2018
    Copy the full SHA
    a75102a View commit details

Commits on Nov 30, 2018

  1. Copy the full SHA
    2d5593f View commit details
  2. Copy the full SHA
    bba5cfc View commit details
  3. fixed typos

    rudrasohan committed Nov 30, 2018
    Copy the full SHA
    3e28aee View commit details
  4. fixed typos 1

    rudrasohan committed Nov 30, 2018
    Copy the full SHA
    f66db73 View commit details

Commits on Dec 1, 2018

  1. fixed loader

    rudrasohan committed Dec 1, 2018
    Copy the full SHA
    44e1b53 View commit details
  2. prevent file overwriting

    rudrasohan committed Dec 1, 2018
    Copy the full SHA
    a33f0e1 View commit details
  3. removed specific path

    rudrasohan committed Dec 1, 2018
    Copy the full SHA
    0ddec01 View commit details

Commits on Dec 5, 2018

  1. Merge pull request #3 from rudrasohan/comp

    Added Saving functionality.
    buridiaditya authored Dec 5, 2018
    Copy the full SHA
    198bbed View commit details
Showing with 25 additions and 5 deletions.
  1. +17 −3 baselines/ddpg/ddpg.py
  2. +6 −0 baselines/ddpg/ddpg_learner.py
  3. +2 −2 baselines/run.py
20 changes: 17 additions & 3 deletions baselines/ddpg/ddpg.py
Original file line number Diff line number Diff line change
@@ -42,8 +42,12 @@ def learn(network, env,
tau=0.01,
eval_env=None,
param_noise_adaption_interval=50,
load_path = None,
save_path = '<specify/path>',
**network_kwargs):

print("Save PATH;{}".format(save_path))
print("Load PATH;{}".format(load_path))
set_global_seeds(seed)

if total_timesteps is not None:
@@ -58,8 +62,7 @@ def learn(network, env,
rank = 0

nb_actions = env.action_space.shape[-1]
assert (np.abs(env.action_space.low) == env.action_space.high).all() # we assume symmetric actions.

#assert (np.abs(env.action_space.low) == env.action_space.high).all() # we assume symmetric actions.
memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape)
critic = Critic(network=network, **network_kwargs)
actor = Actor(nb_actions, network=network, **network_kwargs)
@@ -91,14 +94,19 @@ def learn(network, env,
batch_size=batch_size, action_noise=action_noise, param_noise=param_noise, critic_l2_reg=critic_l2_reg,
actor_lr=actor_lr, critic_lr=critic_lr, enable_popart=popart, clip_norm=clip_norm,
reward_scale=reward_scale)

logger.info('Using agent with the following configuration:')
logger.info(str(agent.__dict__.items()))

eval_episode_rewards_history = deque(maxlen=100)
episode_rewards_history = deque(maxlen=100)
sess = U.get_session()
# Prepare everything.
sess = U.get_session()
agent.initialize(sess)
checkpoint_num = 0
if load_path is not None:
agent.load(load_path)
checkpoint_num = int(os.path.split(load_path)[1]) + 1
sess.graph.finalize()

agent.reset()
@@ -124,6 +132,8 @@ def learn(network, env,
epoch_actions = []
epoch_qs = []
epoch_episodes = 0
if load_path is None:
os.makedirs(save_path, exist_ok=True)
for epoch in range(nb_epochs):
for cycle in range(nb_epoch_cycles):
# Perform rollouts.
@@ -269,5 +279,9 @@ def as_scalar(x):
with open(os.path.join(logdir, 'eval_env_state.pkl'), 'wb') as f:
pickle.dump(eval_env.get_state(), f)

savepath = os.path.join(save_path, str(epoch+checkpoint_num))
print('Saving to ', savepath)
agent.save(savepath)


return agent
6 changes: 6 additions & 0 deletions baselines/ddpg/ddpg_learner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from copy import copy
from functools import reduce

import functools
import numpy as np
import tensorflow as tf
import tensorflow.contrib as tc
@@ -9,6 +10,7 @@
from baselines.common.mpi_adam import MpiAdam
import baselines.common.tf_util as U
from baselines.common.mpi_running_mean_std import RunningMeanStd
from baselines.common.tf_util import save_variables, load_variables
try:
from mpi4py import MPI
except ImportError:
@@ -98,6 +100,8 @@ def __init__(self, actor, critic, memory, observation_shape, action_shape, param
self.batch_size = batch_size
self.stats_sample = None
self.critic_l2_reg = critic_l2_reg
self.save = None
self.load = None

# Observation normalization.
if self.normalize_observations:
@@ -333,6 +337,8 @@ def train(self):
def initialize(self, sess):
self.sess = sess
self.sess.run(tf.global_variables_initializer())
self.save = functools.partial(save_variables, sess=self.sess)
self.load = functools.partial(load_variables, sess=self.load)
self.actor_optimizer.sync()
self.critic_optimizer.sync()
self.sess.run(self.target_init_updates)
4 changes: 2 additions & 2 deletions baselines/run.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
from collections import defaultdict
import tensorflow as tf
import numpy as np

import MADRaS
from baselines.common.vec_env.vec_video_recorder import VecVideoRecorder
from baselines.common.vec_env.vec_frame_stack import VecFrameStack
from baselines.common.cmd_util import common_arg_parser, parse_unknown_args, make_vec_env, make_env
@@ -50,7 +50,7 @@
'SpaceInvaders-Snes',
}


_game_envs['madras'] = {'gym-torcs-v0','gym-madras-v0'}
def train(args, extra_args):
env_type, env_id = get_env_type(args.env)
print('env_type: {}'.format(env_type))