[UPDATE] Init convolutional model. Memory leak issues causes training…

… to be impossible beyond 600 episodes.
derektan95 · Jan 31, 2022 · 631850c · 631850c
1 parent 7225705
commit 631850c
Showing 4 changed files with 299 additions and 117 deletions.
diff --git a/p1_navigation/Pixel_Based_Navigation/Navigation_Pixels.ipynb b/p1_navigation/Pixel_Based_Navigation/Navigation_Pixels.ipynb
diff --git a/p1_navigation/Pixel_Based_Navigation/dqn_agent.py b/p1_navigation/Pixel_Based_Navigation/dqn_agent.py
@@ -8,14 +8,17 @@
 import torch.nn.functional as F
 import torch.optim as optim
 
-BUFFER_SIZE = int(1e5)  # replay buffer size
-BATCH_SIZE = 64         # minibatch size
-GAMMA = 0.99            # discount factor
-TAU = 1e-3              # for soft update of target parameters
-LR = 5e-4               # learning rate 
-UPDATE_EVERY = 4        # how often to update the network
+BUFFER_SIZE = int(1e6)              # replay buffer size
+BATCH_SIZE = 64                     # minibatch size
+GAMMA = 0.99                        # discount factor
+TAU = 1e-3                          # for soft update of target parameters
+LR = 5e-4                           # learning rate 
+LEARN_EVERY = 4                     # how often to update the LOCAL network
+UPDATE_TARGET_WEIGHTS_EVERY = 500   # how often to update the TARGET network
+
 
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# device = torch.device("cpu")
 
 class Agent():
     """Interacts with and learns from the environment."""
@@ -40,16 +43,35 @@ def __init__(self, state_size, action_size, seed):
 
         # Replay memory
         self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
-        # Initialize time step (for updating every UPDATE_EVERY steps)
-        self.t_step = 0
+
+        # Initialize time step
+        self.learn_t_step = 0       # for updating every LEARN_EVERY steps
+        self.soft_update_t_step = 0 # for updating every UPDATE_TARGET_WEIGHTS_EVERY steps
+
+
+        ## Print networks
+        print("network_local", self.qnetwork_local)
+        print("network_target", self.qnetwork_target)
+        print("optimizer", self.optimizer)
+
+        # Print Hyper-parameters
+        print("BUFFER_SIZE: ", BUFFER_SIZE)
+        print("BATCH_SIZE: ", BATCH_SIZE)
+        print("GAMMA: ", GAMMA)
+        print("TAU: ", TAU)
+        print("LR: ", LR)
+        print("LEARN_EVERY: ", LEARN_EVERY)
+        print("UPDATE_TARGET_WEIGHTS_EVERY: ", UPDATE_TARGET_WEIGHTS_EVERY)
 
     def step(self, state, action, reward, next_state, done):
         # Save experience in replay memory
         self.memory.add(state, action, reward, next_state, done)
 
-        # Learn every UPDATE_EVERY time steps.
-        self.t_step = (self.t_step + 1) % UPDATE_EVERY
-        if self.t_step == 0:
+        # Learn every LEARN_EVERY time steps.
+        self.learn_t_step = (self.learn_t_step + 1) % LEARN_EVERY
+        self.soft_update_t_step = (self.soft_update_t_step + 1) % UPDATE_TARGET_WEIGHTS_EVERY
+
+        if self.learn_t_step == 0:
             # If enough samples are available in memory, get random subset and learn
             if len(self.memory) > BATCH_SIZE:
                 experiences = self.memory.sample()
@@ -64,6 +86,7 @@ def act(self, state, eps=0.):
             eps (float): epsilon, for epsilon-greedy action selection
         """
         state = torch.from_numpy(state).float().unsqueeze(0).to(device)
+
         self.qnetwork_local.eval()                           # .eval() == (self.training=false)
         with torch.no_grad():
             action_values = self.qnetwork_local(state)       # INFERENCE: NO NEED TO UPDATE WEIGHTS / BIASES VIA BACKPROP
@@ -87,6 +110,11 @@ def learn(self, experiences, gamma):
 
         ## TODO: compute and minimize the loss
         "*** YOUR CODE HERE ***"
+
+        # # For greyscale, unsqueeze the 1 dimension that is lost in the process
+        # next_states = torch.unsqueeze(next_states, 1)
+        # states = torch.unsqueeze(states, 1)
+        # print("next_states: ", next_states.shape)
 
         # Target actions from stable Fixed Target-Q Neural Network
         # Detach since no need to update weights & biases param in Target Network - They are cloned from qnetwork_local
@@ -98,13 +126,14 @@ def learn(self, experiences, gamma):
         Q_expecteds = Q_expecteds_arr[torch.arange(Q_expecteds_arr.shape[0]).long(), actions.squeeze().long()].unsqueeze(1)
 
         # Compute & minimize the loss
-        loss = F.mse_loss(Q_expecteds, Q_targets)
-        self.optimizer.zero_grad()          # Zero out all of the gradients for the variables which the optimizer will update
-        loss.backward()                     # Compute the gradient of the loss wrt each parameter of the model.
-        self.optimizer.step()               # Actually update the parameters of the model using the gradients computed by the backwards pass.
+        loss = F.mse_loss(Q_expecteds, Q_targets)   # Mean-Squared Error loss across mini-batch of experiences relative to targets array
+        self.optimizer.zero_grad()                  # Zero out all of the gradients for the variables which the optimizer will update
+        loss.backward()                             # Compute the gradient of the loss wrt each parameter of the model.
+        self.optimizer.step()                       # Actually update the parameters of the model using the gradients computed by the backwards pass.
 
         # ------------------- update target network ------------------- #
-        self.soft_update(self.qnetwork_local, self.qnetwork_target, TAU)                     
+        if self.soft_update_t_step == 0:
+            self.soft_update(self.qnetwork_local, self.qnetwork_target, TAU)           
 
     def soft_update(self, local_model, target_model, tau):
         """Soft update model parameters.

diff --git a/p1_navigation/Pixel_Based_Navigation/model.py b/p1_navigation/Pixel_Based_Navigation/model.py
@@ -3,39 +3,138 @@
 import torch.nn.functional as F
 from collections import OrderedDict
 
-
 class QNetwork(nn.Module):
-    """Actor (Policy) Model."""
-
-    def __init__(self, state_size, action_size, seed, channel_1=4, channel_2=4):
-        """Initialize parameters and build model.
-        Params
-        ======
-            state_size (int): Dimension of each state
-            action_size (int): Dimension of each action
-            seed (int): Random seed
-            fc1_units (int): Number of nodes in first hidden layer
-            fc2_units (int): Number of nodes in second hidden layer
-        """
-        # Needed to inherit functionalities from nn.Module
+    """QNetwork.
+    
+    Simple Dense neural network
+    to serve as funcction approximator.
+    """
+    def __init__(
+        self, 
+        state_size,
+        action_size, 
+        seed, 
+        in_channels=3,  # DEFAULT: 3
+        conv1_kernel=3,
+        conv1_filters=16,
+        conv1_strides=1,
+        conv2_kernel=3,
+        conv2_filters=32,
+        conv2_strides=1,
+        conv3_kernel=3,
+        conv3_filters=64,
+        conv3_strides=1,
+        fc1_units=512, 
+        fc2_units=512, 
+        fc3_units=256
+    ):
         # super(QNetwork, self).__init__()
         super().__init__()
+        self.seed = seed
+        self.network = nn.Sequential(
+            nn.Conv2d(in_channels, conv1_filters, kernel_size=conv1_kernel, stride=conv1_strides, padding=1),
+            nn.BatchNorm2d(conv1_filters),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+
+            nn.Conv2d(conv1_filters, conv2_filters, kernel_size=conv2_kernel, stride=conv2_strides, padding=1),
+            nn.BatchNorm2d(conv2_filters),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+
+            nn.Flatten(),
+            nn.Linear(conv2_filters * 21 * 21, fc1_units),
+            nn.BatchNorm1d(fc1_units),
+            nn.ReLU(),
+            nn.Linear(fc1_units, fc2_units),
+            nn.BatchNorm1d(fc2_units),
+            nn.ReLU(),
+            nn.Linear(fc2_units, action_size)
+        )
+
+    def forward(self, x):
+
+        x =  x.squeeze()
+
+        # For single inputs (act)
+        if x.dim() == 3:
+            x = torch.unsqueeze(x, 0)
+
+        x = torch.permute(x, (0, 3, 1, 2))
+        return self.network(x)
+
+
+###################################################################
+
+# class QNetwork(nn.Module):
+#     """Actor (Policy) Model."""
+
+#     def __init__(self, state_size, action_size, seed, in_channels=3, channel_1=32, channel_2=64, channel_3=64, linear_1=512):
+#         """Initialize parameters and build model.
+#         Params
+#         ======
+#             state_size (int): Dimension of each state
+#             action_size (int): Dimension of each action
+#             seed (int): Random seed
+#             fc1_units (int): Number of nodes in first hidden layer
+#             fc2_units (int): Number of nodes in second hidden layer
+#         """
+#         # Needed to inherit functionalities from nn.Module
+#         # super(QNetwork, self).__init__()
+#         super().__init__()
+
+#         # NOTE: Following https://storage.googleapis.com/deepmind-media/dqn/DQNNaturePaper.pdf
+#         # Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0 ...)
+#         self.seed = torch.manual_seed(seed)
+
+#         self.network = nn.Sequential(
+#             nn.Conv2d(in_channels, channel_1, kernel_size=8, stride=4, padding=0),
+#             nn.BatchNorm2d(channel_1),
+#             nn.ReLU(),
+#             # nn.MaxPool2d(kernel_size=2, stride=2),
+
+#             nn.Conv2d(channel_1, channel_2, kernel_size=4, stride=2, padding=0),
+#             nn.BatchNorm2d(channel_2),
+#             nn.ReLU(),
+#             # nn.MaxPool2d(kernel_size=2, stride=2),
+
+#             nn.Conv2d(channel_2, channel_3, kernel_size=3, stride=1, padding=0),
+#             nn.BatchNorm2d(channel_3),
+#             nn.ReLU(),
+#             # nn.MaxPool2d(kernel_size=2, stride=2),
+
+#             nn.Flatten(),
+#             nn.Linear(3136, linear_1),
+#             nn.BatchNorm1d(linear_1),
+#             nn.ReLU(),
+#             nn.Linear(linear_1, action_size),
+#             # nn.BatchNorm1d(action_size),
+#             # nn.ReLU(),
+#             # nn.Linear(fc2_units, action_size)
+#         )
 
-        # Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0 ...)
-        self.seed = torch.manual_seed(seed)
-        self.conv1 = nn.Conv2d(state_size[3], channel_1, 5, stride=1, padding=2)
-        self.conv2 = nn.Conv2d(channel_1, channel_2, 3, stride=1, padding=1) 
-        self.fc1 = nn.Linear(channel_2 * state_size[1] * state_size[2], action_size)
 
+#         # self.conv1 = nn.Conv2d(state_size[3], channel_1, kernel_size=8, stride=4, padding=0)
+#         # self.batchnorm1 = nn.BatchNorm2d(channel_1)
+#         # self.conv2 = nn.Conv2d(channel_1, channel_2, kernel_size=4, stride=2, padding=0) 
+#         # self.batchnorm2 = nn.BatchNorm2d(channel_2)
+#         # self.conv3 = nn.Conv2d(channel_2, channel_3, kernel_size=3, stride=1, padding=0) 
+#         # self.batchnorm3 = nn.BatchNorm2d(channel_3)
+#         # self.fc1 = nn.Linear(5184, 512)
+#         # self.bn1 = nn.BatchNorm1d(fc1_units)
+#         # self.fc2 = nn.Linear(512, action_size)
 
-    def forward(self, state):
+#     def forward(self, state):
 
-        # PERMUTE DIMs: (N, H, W, C) --> (N, C, H, W)
-        # NOTE: Some inputs are 4D, some are 3D (I.e. from Learn method)
-        state = torch.unsqueeze(state[0].squeeze(), 0)  
-        state = torch.permute(state, (0, 3, 1, 2))
+#         # PERMUTE DIMs: (N, H, W, C) --> (N, C, H, W)
+#         # NOTE: Some inputs are 4D, some are 3D (I.e. from Learn method)
+#         state = torch.unsqueeze(state[0].squeeze(), 0)
+#         state = torch.permute(state, (0, 3, 1, 2))
+#         return self.network(state)
 
-        """Build a network that maps state -> action values."""
-        conv1_relu_out = F.relu(self.conv1(state))
-        conv2_relu_out = F.relu(self.conv2(conv1_relu_out))
-        return self.fc1(conv2_relu_out.flatten(1, -1))
+#         # """Build a network that maps state -> action values."""
+#         # conv1_relu_out = F.relu(self.conv1(state))
+#         # conv2_relu_out = F.relu(self.conv2(conv1_relu_out))
+#         # conv3_relu_out = F.relu(self.conv3(conv2_relu_out))
+#         # linear1_out = self.fc1(conv2_relu_out.flatten(1, -1))
+#         # return self.fc2(linear1_out)
diff --git a/p1_navigation/Ray_Based_Navigation/Navigation.ipynb b/p1_navigation/Ray_Based_Navigation/Navigation.ipynb
@@ -17,19 +17,36 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
    "source": [
     "%load_ext autoreload\n",
     "%autoreload 2"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cuda:0\n"
+     ]
+    }
+   ],
    "source": [
     "from unityagents import UnityEnvironment\n",
     "\n",