-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Practical Reinforcement Learning Week4
- Loading branch information
1 parent
95c4c02
commit e47da9e
Showing
15 changed files
with
1,761 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+381 KB
Practical Reinforcement Learning/Week4_approx/QUIZ SARSA and QLeaning.pdf
Binary file not shown.
59 changes: 59 additions & 0 deletions
59
Practical Reinforcement Learning/Week4_approx/atari_util.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
"""Auxilary files for those who wanted to solve breakout with CEM or policy gradient""" | ||
import numpy as np | ||
import gym | ||
from scipy.misc import imresize | ||
from gym.core import Wrapper | ||
from gym.spaces.box import Box | ||
|
||
class PreprocessAtari(Wrapper): | ||
def __init__(self, env, height=42, width=42, color=False, crop=lambda img: img, | ||
n_frames=4, dim_order='theano', reward_scale=1,): | ||
"""A gym wrapper that reshapes, crops and scales image into the desired shapes""" | ||
super(PreprocessAtari, self).__init__(env) | ||
assert dim_order in ('theano', 'tensorflow') | ||
self.img_size = (height, width) | ||
self.crop=crop | ||
self.color=color | ||
self.dim_order = dim_order | ||
self.reward_scale = reward_scale | ||
|
||
n_channels = (3 * n_frames) if color else n_frames | ||
obs_shape = [n_channels,height,width] if dim_order == 'theano' else [height,width,n_channels] | ||
self.observation_space = Box(0.0, 1.0, obs_shape) | ||
self.framebuffer = np.zeros(obs_shape, 'float32') | ||
|
||
def reset(self): | ||
"""resets breakout, returns initial frames""" | ||
self.framebuffer = np.zeros_like(self.framebuffer) | ||
self.update_buffer(self.env.reset()) | ||
return self.framebuffer | ||
|
||
def step(self,action): | ||
"""plays breakout for 1 step, returns frame buffer""" | ||
new_img, reward, done, info = self.env.step(action) | ||
self.update_buffer(new_img) | ||
return self.framebuffer, reward * self.reward_scale, done, info | ||
|
||
### image processing ### | ||
|
||
def update_buffer(self,img): | ||
img = self.preproc_image(img) | ||
offset = 3 if self.color else 1 | ||
if self.dim_order == 'theano': | ||
axis = 0 | ||
cropped_framebuffer = self.framebuffer[:-offset] | ||
else: | ||
axis = -1 | ||
cropped_framebuffer = self.framebuffer[:,:,:-offset] | ||
self.framebuffer = np.concatenate([img, cropped_framebuffer], axis = axis) | ||
|
||
def preproc_image(self, img): | ||
"""what happens to the observation""" | ||
img = self.crop(img) | ||
img = imresize(img, self.img_size) | ||
if not self.color: | ||
img = img.mean(-1, keepdims=True) | ||
if self.dim_order == 'theano': | ||
img = img.transpose([2,0,1]) # [h, w, c] to [c, h, w] | ||
img = img.astype('float32') / 255. | ||
return img |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1,034 changes: 1,034 additions & 0 deletions
1,034
Practical Reinforcement Learning/Week4_approx/dqn_atari.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
41 changes: 41 additions & 0 deletions
41
Practical Reinforcement Learning/Week4_approx/framebuffer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import numpy as np | ||
from gym.spaces.box import Box | ||
from gym.core import Wrapper | ||
class FrameBuffer(Wrapper): | ||
def __init__(self, env, n_frames=4, dim_order='tensorflow'): | ||
"""A gym wrapper that reshapes, crops and scales image into the desired shapes""" | ||
super(FrameBuffer, self).__init__(env) | ||
self.dim_order = dim_order | ||
if dim_order == 'tensorflow': | ||
height, width, n_channels = env.observation_space.shape | ||
obs_shape = [height, width, n_channels * n_frames] | ||
elif dim_order == 'pytorch': | ||
n_channels, height, width = env.observation_space.shape | ||
obs_shape = [n_channels * n_frames, height, width] | ||
else: | ||
raise ValueError('dim_order should be "tensorflow" or "pytorch", got {}'.format(dim_order)) | ||
self.observation_space = Box(0.0, 1.0, obs_shape) | ||
self.framebuffer = np.zeros(obs_shape, 'float32') | ||
|
||
def reset(self): | ||
"""resets breakout, returns initial frames""" | ||
self.framebuffer = np.zeros_like(self.framebuffer) | ||
self.update_buffer(self.env.reset()) | ||
return self.framebuffer | ||
|
||
def step(self, action): | ||
"""plays breakout for 1 step, returns frame buffer""" | ||
new_img, reward, done, info = self.env.step(action) | ||
self.update_buffer(new_img) | ||
return self.framebuffer, reward, done, info | ||
|
||
def update_buffer(self, img): | ||
if self.dim_order == 'tensorflow': | ||
offset = self.env.observation_space.shape[-1] | ||
axis = -1 | ||
cropped_framebuffer = self.framebuffer[:,:,:-offset] | ||
elif self.dim_order == 'pytorch': | ||
offset = self.env.observation_space.shape[0] | ||
axis = 0 | ||
cropped_framebuffer = self.framebuffer[:-offset] | ||
self.framebuffer = np.concatenate([img, cropped_framebuffer], axis = axis) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
543 changes: 543 additions & 0 deletions
543
Practical Reinforcement Learning/Week4_approx/practice_approx_qlearning.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
63 changes: 63 additions & 0 deletions
63
Practical Reinforcement Learning/Week4_approx/replay_buffer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# This code is shamelessly stolen from https://github.com/openai/baselines/blob/master/baselines/deepq/replay_buffer.py | ||
import numpy as np | ||
import random | ||
|
||
class ReplayBuffer(object): | ||
def __init__(self, size): | ||
"""Create Replay buffer. | ||
Parameters | ||
---------- | ||
size: int | ||
Max number of transitions to store in the buffer. When the buffer | ||
overflows the old memories are dropped. | ||
""" | ||
self._storage = [] | ||
self._maxsize = size | ||
self._next_idx = 0 | ||
|
||
def __len__(self): | ||
return len(self._storage) | ||
|
||
def add(self, obs_t, action, reward, obs_tp1, done): | ||
data = (obs_t, action, reward, obs_tp1, done) | ||
|
||
if self._next_idx >= len(self._storage): | ||
self._storage.append(data) | ||
else: | ||
self._storage[self._next_idx] = data | ||
self._next_idx = (self._next_idx + 1) % self._maxsize | ||
|
||
def _encode_sample(self, idxes): | ||
obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], [] | ||
for i in idxes: | ||
data = self._storage[i] | ||
obs_t, action, reward, obs_tp1, done = data | ||
obses_t.append(np.array(obs_t, copy=False)) | ||
actions.append(np.array(action, copy=False)) | ||
rewards.append(reward) | ||
obses_tp1.append(np.array(obs_tp1, copy=False)) | ||
dones.append(done) | ||
return np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones) | ||
|
||
def sample(self, batch_size): | ||
"""Sample a batch of experiences. | ||
Parameters | ||
---------- | ||
batch_size: int | ||
How many transitions to sample. | ||
Returns | ||
------- | ||
obs_batch: np.array | ||
batch of observations | ||
act_batch: np.array | ||
batch of actions executed given obs_batch | ||
rew_batch: np.array | ||
rewards received as results of executing act_batch | ||
next_obs_batch: np.array | ||
next set of observations seen after executing act_batch | ||
done_mask: np.array | ||
done_mask[i] = 1 if executing act_batch[i] resulted in | ||
the end of an episode and 0 otherwise. | ||
""" | ||
idxes = [random.randint(0, len(self._storage) - 1) for _ in range(batch_size)] | ||
return self._encode_sample(idxes) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import sys | ||
import numpy as np | ||
sys.path.append("..") | ||
import grading | ||
|
||
|
||
def submit_cartpole(generate_session, email, token): | ||
sessions = [generate_session() for _ in range(100)] | ||
#session_rewards, _, _ = map(np.array, zip(*sessions)) | ||
session_rewards = np.array(sessions) | ||
grader = grading.Grader("RDofv-QXEeeaGw6kpIOf3g") | ||
grader.set_answer("NRNkl", int(np.mean(session_rewards))) | ||
grader.submit(email, token) | ||
|
||
|
||
def submit_breakout(agent, env, evaluate, email, token): | ||
sessions = [evaluate(env, agent, n_games=1) for _ in range(100)] | ||
session_rewards = np.array(sessions) | ||
grader = grading.Grader("WTOZHCn1EeiNwAoZNi-Hrg") | ||
grader.set_answer("VFM7Z", int(np.mean(session_rewards))) | ||
grader.submit(email, token) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.