Skip to content

Commit

Permalink
Practical Reinforcement Learning Week4
Browse files Browse the repository at this point in the history
  • Loading branch information
jiadaizhao committed Jan 7, 2019
1 parent 95c4c02 commit e47da9e
Show file tree
Hide file tree
Showing 15 changed files with 1,761 additions and 0 deletions.
Binary file not shown.
Binary file not shown.
Binary file not shown.
59 changes: 59 additions & 0 deletions Practical Reinforcement Learning/Week4_approx/atari_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""Auxilary files for those who wanted to solve breakout with CEM or policy gradient"""
import numpy as np
import gym
from scipy.misc import imresize
from gym.core import Wrapper
from gym.spaces.box import Box

class PreprocessAtari(Wrapper):
def __init__(self, env, height=42, width=42, color=False, crop=lambda img: img,
n_frames=4, dim_order='theano', reward_scale=1,):
"""A gym wrapper that reshapes, crops and scales image into the desired shapes"""
super(PreprocessAtari, self).__init__(env)
assert dim_order in ('theano', 'tensorflow')
self.img_size = (height, width)
self.crop=crop
self.color=color
self.dim_order = dim_order
self.reward_scale = reward_scale

n_channels = (3 * n_frames) if color else n_frames
obs_shape = [n_channels,height,width] if dim_order == 'theano' else [height,width,n_channels]
self.observation_space = Box(0.0, 1.0, obs_shape)
self.framebuffer = np.zeros(obs_shape, 'float32')

def reset(self):
"""resets breakout, returns initial frames"""
self.framebuffer = np.zeros_like(self.framebuffer)
self.update_buffer(self.env.reset())
return self.framebuffer

def step(self,action):
"""plays breakout for 1 step, returns frame buffer"""
new_img, reward, done, info = self.env.step(action)
self.update_buffer(new_img)
return self.framebuffer, reward * self.reward_scale, done, info

### image processing ###

def update_buffer(self,img):
img = self.preproc_image(img)
offset = 3 if self.color else 1
if self.dim_order == 'theano':
axis = 0
cropped_framebuffer = self.framebuffer[:-offset]
else:
axis = -1
cropped_framebuffer = self.framebuffer[:,:,:-offset]
self.framebuffer = np.concatenate([img, cropped_framebuffer], axis = axis)

def preproc_image(self, img):
"""what happens to the observation"""
img = self.crop(img)
img = imresize(img, self.img_size)
if not self.color:
img = img.mean(-1, keepdims=True)
if self.dim_order == 'theano':
img = img.transpose([2,0,1]) # [h, w, c] to [c, h, w]
img = img.astype('float32') / 255.
return img
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1,034 changes: 1,034 additions & 0 deletions Practical Reinforcement Learning/Week4_approx/dqn_atari.ipynb

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
41 changes: 41 additions & 0 deletions Practical Reinforcement Learning/Week4_approx/framebuffer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import numpy as np
from gym.spaces.box import Box
from gym.core import Wrapper
class FrameBuffer(Wrapper):
def __init__(self, env, n_frames=4, dim_order='tensorflow'):
"""A gym wrapper that reshapes, crops and scales image into the desired shapes"""
super(FrameBuffer, self).__init__(env)
self.dim_order = dim_order
if dim_order == 'tensorflow':
height, width, n_channels = env.observation_space.shape
obs_shape = [height, width, n_channels * n_frames]
elif dim_order == 'pytorch':
n_channels, height, width = env.observation_space.shape
obs_shape = [n_channels * n_frames, height, width]
else:
raise ValueError('dim_order should be "tensorflow" or "pytorch", got {}'.format(dim_order))
self.observation_space = Box(0.0, 1.0, obs_shape)
self.framebuffer = np.zeros(obs_shape, 'float32')

def reset(self):
"""resets breakout, returns initial frames"""
self.framebuffer = np.zeros_like(self.framebuffer)
self.update_buffer(self.env.reset())
return self.framebuffer

def step(self, action):
"""plays breakout for 1 step, returns frame buffer"""
new_img, reward, done, info = self.env.step(action)
self.update_buffer(new_img)
return self.framebuffer, reward, done, info

def update_buffer(self, img):
if self.dim_order == 'tensorflow':
offset = self.env.observation_space.shape[-1]
axis = -1
cropped_framebuffer = self.framebuffer[:,:,:-offset]
elif self.dim_order == 'pytorch':
offset = self.env.observation_space.shape[0]
axis = 0
cropped_framebuffer = self.framebuffer[:-offset]
self.framebuffer = np.concatenate([img, cropped_framebuffer], axis = axis)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
63 changes: 63 additions & 0 deletions Practical Reinforcement Learning/Week4_approx/replay_buffer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# This code is shamelessly stolen from https://github.com/openai/baselines/blob/master/baselines/deepq/replay_buffer.py
import numpy as np
import random

class ReplayBuffer(object):
def __init__(self, size):
"""Create Replay buffer.
Parameters
----------
size: int
Max number of transitions to store in the buffer. When the buffer
overflows the old memories are dropped.
"""
self._storage = []
self._maxsize = size
self._next_idx = 0

def __len__(self):
return len(self._storage)

def add(self, obs_t, action, reward, obs_tp1, done):
data = (obs_t, action, reward, obs_tp1, done)

if self._next_idx >= len(self._storage):
self._storage.append(data)
else:
self._storage[self._next_idx] = data
self._next_idx = (self._next_idx + 1) % self._maxsize

def _encode_sample(self, idxes):
obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], []
for i in idxes:
data = self._storage[i]
obs_t, action, reward, obs_tp1, done = data
obses_t.append(np.array(obs_t, copy=False))
actions.append(np.array(action, copy=False))
rewards.append(reward)
obses_tp1.append(np.array(obs_tp1, copy=False))
dones.append(done)
return np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones)

def sample(self, batch_size):
"""Sample a batch of experiences.
Parameters
----------
batch_size: int
How many transitions to sample.
Returns
-------
obs_batch: np.array
batch of observations
act_batch: np.array
batch of actions executed given obs_batch
rew_batch: np.array
rewards received as results of executing act_batch
next_obs_batch: np.array
next set of observations seen after executing act_batch
done_mask: np.array
done_mask[i] = 1 if executing act_batch[i] resulted in
the end of an episode and 0 otherwise.
"""
idxes = [random.randint(0, len(self._storage) - 1) for _ in range(batch_size)]
return self._encode_sample(idxes)
21 changes: 21 additions & 0 deletions Practical Reinforcement Learning/Week4_approx/submit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import sys
import numpy as np
sys.path.append("..")
import grading


def submit_cartpole(generate_session, email, token):
sessions = [generate_session() for _ in range(100)]
#session_rewards, _, _ = map(np.array, zip(*sessions))
session_rewards = np.array(sessions)
grader = grading.Grader("RDofv-QXEeeaGw6kpIOf3g")
grader.set_answer("NRNkl", int(np.mean(session_rewards)))
grader.submit(email, token)


def submit_breakout(agent, env, evaluate, email, token):
sessions = [evaluate(env, agent, n_games=1) for _ in range(100)]
session_rewards = np.array(sessions)
grader = grading.Grader("WTOZHCn1EeiNwAoZNi-Hrg")
grader.set_answer("VFM7Z", int(np.mean(session_rewards)))
grader.submit(email, token)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit e47da9e

Please sign in to comment.