Skip to content

Commit

Permalink
[UPDATE] Init convolutional model. Memory leak issues causes training…
Browse files Browse the repository at this point in the history
… to be impossible beyond 600 episodes.
derektan95 committed Jan 31, 2022
1 parent 7225705 commit 631850c
Showing 4 changed files with 299 additions and 117 deletions.
175 changes: 106 additions & 69 deletions p1_navigation/Pixel_Based_Navigation/Navigation_Pixels.ipynb

Large diffs are not rendered by default.

61 changes: 45 additions & 16 deletions p1_navigation/Pixel_Based_Navigation/dqn_agent.py
Original file line number Diff line number Diff line change
@@ -8,14 +8,17 @@
import torch.nn.functional as F
import torch.optim as optim

BUFFER_SIZE = int(1e5) # replay buffer size
BATCH_SIZE = 64 # minibatch size
GAMMA = 0.99 # discount factor
TAU = 1e-3 # for soft update of target parameters
LR = 5e-4 # learning rate
UPDATE_EVERY = 4 # how often to update the network
BUFFER_SIZE = int(1e6) # replay buffer size
BATCH_SIZE = 64 # minibatch size
GAMMA = 0.99 # discount factor
TAU = 1e-3 # for soft update of target parameters
LR = 5e-4 # learning rate
LEARN_EVERY = 4 # how often to update the LOCAL network
UPDATE_TARGET_WEIGHTS_EVERY = 500 # how often to update the TARGET network


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

class Agent():
"""Interacts with and learns from the environment."""
@@ -40,16 +43,35 @@ def __init__(self, state_size, action_size, seed):

# Replay memory
self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
# Initialize time step (for updating every UPDATE_EVERY steps)
self.t_step = 0

# Initialize time step
self.learn_t_step = 0 # for updating every LEARN_EVERY steps
self.soft_update_t_step = 0 # for updating every UPDATE_TARGET_WEIGHTS_EVERY steps


## Print networks
print("network_local", self.qnetwork_local)
print("network_target", self.qnetwork_target)
print("optimizer", self.optimizer)

# Print Hyper-parameters
print("BUFFER_SIZE: ", BUFFER_SIZE)
print("BATCH_SIZE: ", BATCH_SIZE)
print("GAMMA: ", GAMMA)
print("TAU: ", TAU)
print("LR: ", LR)
print("LEARN_EVERY: ", LEARN_EVERY)
print("UPDATE_TARGET_WEIGHTS_EVERY: ", UPDATE_TARGET_WEIGHTS_EVERY)

def step(self, state, action, reward, next_state, done):
# Save experience in replay memory
self.memory.add(state, action, reward, next_state, done)

# Learn every UPDATE_EVERY time steps.
self.t_step = (self.t_step + 1) % UPDATE_EVERY
if self.t_step == 0:
# Learn every LEARN_EVERY time steps.
self.learn_t_step = (self.learn_t_step + 1) % LEARN_EVERY
self.soft_update_t_step = (self.soft_update_t_step + 1) % UPDATE_TARGET_WEIGHTS_EVERY

if self.learn_t_step == 0:
# If enough samples are available in memory, get random subset and learn
if len(self.memory) > BATCH_SIZE:
experiences = self.memory.sample()
@@ -64,6 +86,7 @@ def act(self, state, eps=0.):
eps (float): epsilon, for epsilon-greedy action selection
"""
state = torch.from_numpy(state).float().unsqueeze(0).to(device)

self.qnetwork_local.eval() # .eval() == (self.training=false)
with torch.no_grad():
action_values = self.qnetwork_local(state) # INFERENCE: NO NEED TO UPDATE WEIGHTS / BIASES VIA BACKPROP
@@ -87,6 +110,11 @@ def learn(self, experiences, gamma):

## TODO: compute and minimize the loss
"*** YOUR CODE HERE ***"

# # For greyscale, unsqueeze the 1 dimension that is lost in the process
# next_states = torch.unsqueeze(next_states, 1)
# states = torch.unsqueeze(states, 1)
# print("next_states: ", next_states.shape)

# Target actions from stable Fixed Target-Q Neural Network
# Detach since no need to update weights & biases param in Target Network - They are cloned from qnetwork_local
@@ -98,13 +126,14 @@ def learn(self, experiences, gamma):
Q_expecteds = Q_expecteds_arr[torch.arange(Q_expecteds_arr.shape[0]).long(), actions.squeeze().long()].unsqueeze(1)

# Compute & minimize the loss
loss = F.mse_loss(Q_expecteds, Q_targets)
self.optimizer.zero_grad() # Zero out all of the gradients for the variables which the optimizer will update
loss.backward() # Compute the gradient of the loss wrt each parameter of the model.
self.optimizer.step() # Actually update the parameters of the model using the gradients computed by the backwards pass.
loss = F.mse_loss(Q_expecteds, Q_targets) # Mean-Squared Error loss across mini-batch of experiences relative to targets array
self.optimizer.zero_grad() # Zero out all of the gradients for the variables which the optimizer will update
loss.backward() # Compute the gradient of the loss wrt each parameter of the model.
self.optimizer.step() # Actually update the parameters of the model using the gradients computed by the backwards pass.

# ------------------- update target network ------------------- #
self.soft_update(self.qnetwork_local, self.qnetwork_target, TAU)
if self.soft_update_t_step == 0:
self.soft_update(self.qnetwork_local, self.qnetwork_target, TAU)

def soft_update(self, local_model, target_model, tau):
"""Soft update model parameters.
155 changes: 127 additions & 28 deletions p1_navigation/Pixel_Based_Navigation/model.py
Original file line number Diff line number Diff line change
@@ -3,39 +3,138 @@
import torch.nn.functional as F
from collections import OrderedDict


class QNetwork(nn.Module):
"""Actor (Policy) Model."""

def __init__(self, state_size, action_size, seed, channel_1=4, channel_2=4):
"""Initialize parameters and build model.
Params
======
state_size (int): Dimension of each state
action_size (int): Dimension of each action
seed (int): Random seed
fc1_units (int): Number of nodes in first hidden layer
fc2_units (int): Number of nodes in second hidden layer
"""
# Needed to inherit functionalities from nn.Module
"""QNetwork.
Simple Dense neural network
to serve as funcction approximator.
"""
def __init__(
self,
state_size,
action_size,
seed,
in_channels=3, # DEFAULT: 3
conv1_kernel=3,
conv1_filters=16,
conv1_strides=1,
conv2_kernel=3,
conv2_filters=32,
conv2_strides=1,
conv3_kernel=3,
conv3_filters=64,
conv3_strides=1,
fc1_units=512,
fc2_units=512,
fc3_units=256
):
# super(QNetwork, self).__init__()
super().__init__()
self.seed = seed
self.network = nn.Sequential(
nn.Conv2d(in_channels, conv1_filters, kernel_size=conv1_kernel, stride=conv1_strides, padding=1),
nn.BatchNorm2d(conv1_filters),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),

nn.Conv2d(conv1_filters, conv2_filters, kernel_size=conv2_kernel, stride=conv2_strides, padding=1),
nn.BatchNorm2d(conv2_filters),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),

nn.Flatten(),
nn.Linear(conv2_filters * 21 * 21, fc1_units),
nn.BatchNorm1d(fc1_units),
nn.ReLU(),
nn.Linear(fc1_units, fc2_units),
nn.BatchNorm1d(fc2_units),
nn.ReLU(),
nn.Linear(fc2_units, action_size)
)

def forward(self, x):

x = x.squeeze()

# For single inputs (act)
if x.dim() == 3:
x = torch.unsqueeze(x, 0)

x = torch.permute(x, (0, 3, 1, 2))
return self.network(x)


###################################################################

# class QNetwork(nn.Module):
# """Actor (Policy) Model."""

# def __init__(self, state_size, action_size, seed, in_channels=3, channel_1=32, channel_2=64, channel_3=64, linear_1=512):
# """Initialize parameters and build model.
# Params
# ======
# state_size (int): Dimension of each state
# action_size (int): Dimension of each action
# seed (int): Random seed
# fc1_units (int): Number of nodes in first hidden layer
# fc2_units (int): Number of nodes in second hidden layer
# """
# # Needed to inherit functionalities from nn.Module
# # super(QNetwork, self).__init__()
# super().__init__()

# # NOTE: Following https://storage.googleapis.com/deepmind-media/dqn/DQNNaturePaper.pdf
# # Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0 ...)
# self.seed = torch.manual_seed(seed)

# self.network = nn.Sequential(
# nn.Conv2d(in_channels, channel_1, kernel_size=8, stride=4, padding=0),
# nn.BatchNorm2d(channel_1),
# nn.ReLU(),
# # nn.MaxPool2d(kernel_size=2, stride=2),

# nn.Conv2d(channel_1, channel_2, kernel_size=4, stride=2, padding=0),
# nn.BatchNorm2d(channel_2),
# nn.ReLU(),
# # nn.MaxPool2d(kernel_size=2, stride=2),

# nn.Conv2d(channel_2, channel_3, kernel_size=3, stride=1, padding=0),
# nn.BatchNorm2d(channel_3),
# nn.ReLU(),
# # nn.MaxPool2d(kernel_size=2, stride=2),

# nn.Flatten(),
# nn.Linear(3136, linear_1),
# nn.BatchNorm1d(linear_1),
# nn.ReLU(),
# nn.Linear(linear_1, action_size),
# # nn.BatchNorm1d(action_size),
# # nn.ReLU(),
# # nn.Linear(fc2_units, action_size)
# )

# Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0 ...)
self.seed = torch.manual_seed(seed)
self.conv1 = nn.Conv2d(state_size[3], channel_1, 5, stride=1, padding=2)
self.conv2 = nn.Conv2d(channel_1, channel_2, 3, stride=1, padding=1)
self.fc1 = nn.Linear(channel_2 * state_size[1] * state_size[2], action_size)

# # self.conv1 = nn.Conv2d(state_size[3], channel_1, kernel_size=8, stride=4, padding=0)
# # self.batchnorm1 = nn.BatchNorm2d(channel_1)
# # self.conv2 = nn.Conv2d(channel_1, channel_2, kernel_size=4, stride=2, padding=0)
# # self.batchnorm2 = nn.BatchNorm2d(channel_2)
# # self.conv3 = nn.Conv2d(channel_2, channel_3, kernel_size=3, stride=1, padding=0)
# # self.batchnorm3 = nn.BatchNorm2d(channel_3)
# # self.fc1 = nn.Linear(5184, 512)
# # self.bn1 = nn.BatchNorm1d(fc1_units)
# # self.fc2 = nn.Linear(512, action_size)

def forward(self, state):
# def forward(self, state):

# PERMUTE DIMs: (N, H, W, C) --> (N, C, H, W)
# NOTE: Some inputs are 4D, some are 3D (I.e. from Learn method)
state = torch.unsqueeze(state[0].squeeze(), 0)
state = torch.permute(state, (0, 3, 1, 2))
# # PERMUTE DIMs: (N, H, W, C) --> (N, C, H, W)
# # NOTE: Some inputs are 4D, some are 3D (I.e. from Learn method)
# state = torch.unsqueeze(state[0].squeeze(), 0)
# state = torch.permute(state, (0, 3, 1, 2))
# return self.network(state)

"""Build a network that maps state -> action values."""
conv1_relu_out = F.relu(self.conv1(state))
conv2_relu_out = F.relu(self.conv2(conv1_relu_out))
return self.fc1(conv2_relu_out.flatten(1, -1))
# # """Build a network that maps state -> action values."""
# # conv1_relu_out = F.relu(self.conv1(state))
# # conv2_relu_out = F.relu(self.conv2(conv1_relu_out))
# # conv3_relu_out = F.relu(self.conv3(conv2_relu_out))
# # linear1_out = self.fc1(conv2_relu_out.flatten(1, -1))
# # return self.fc2(linear1_out)
25 changes: 21 additions & 4 deletions p1_navigation/Ray_Based_Navigation/Navigation.ipynb
Original file line number Diff line number Diff line change
@@ -17,19 +17,36 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
}
],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"cuda:0\n"
]
}
],
"source": [
"from unityagents import UnityEnvironment\n",
"\n",

0 comments on commit 631850c

Please sign in to comment.