diff --git a/games/atari.py b/games/atari.py index 56461328..568a2f60 100644 --- a/games/atari.py +++ b/games/atari.py @@ -84,7 +84,7 @@ def __init__(self): self.batch_size = 1024 # Number of parts of games to train on at each training step self.checkpoint_interval = int(1e3) # Number of training steps before using the model for self-playing self.value_loss_weight = 0.25 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze) - self.train_on_gpu = True if torch.cuda.is_available() else False # Train on GPU if available + self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available self.optimizer = "SGD" # "Adam" or "SGD". Paper uses SGD self.weight_decay = 1e-4 # L2 weights regularization diff --git a/games/breakout.py b/games/breakout.py index 26bbfc70..b8556efe 100644 --- a/games/breakout.py +++ b/games/breakout.py @@ -84,7 +84,7 @@ def __init__(self): self.batch_size = 16 # Number of parts of games to train on at each training step self.checkpoint_interval = 500 # Number of training steps before using the model for self-playing self.value_loss_weight = 0.25 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze) - self.train_on_gpu = True if torch.cuda.is_available() else False # Train on GPU if available + self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available self.optimizer = "Adam" # "Adam" or "SGD". Paper uses SGD self.weight_decay = 1e-4 # L2 weights regularization diff --git a/games/cartpole.py b/games/cartpole.py index 0a3aab93..767af700 100644 --- a/games/cartpole.py +++ b/games/cartpole.py @@ -79,7 +79,7 @@ def __init__(self): self.batch_size = 128 # Number of parts of games to train on at each training step self.checkpoint_interval = 10 # Number of training steps before using the model for self-playing self.value_loss_weight = 1 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze) - self.train_on_gpu = True if torch.cuda.is_available() else False # Train on GPU if available + self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available self.optimizer = "Adam" # "Adam" or "SGD". Paper uses SGD self.weight_decay = 1e-4 # L2 weights regularization diff --git a/games/connect4.py b/games/connect4.py index d7cfcd9e..4408261e 100644 --- a/games/connect4.py +++ b/games/connect4.py @@ -78,7 +78,7 @@ def __init__(self): self.batch_size = 64 # Number of parts of games to train on at each training step self.checkpoint_interval = 10 # Number of training steps before using the model for self-playing self.value_loss_weight = 0.25 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze) - self.train_on_gpu = True if torch.cuda.is_available() else False # Train on GPU if available + self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available self.optimizer = "Adam" # "Adam" or "SGD". Paper uses SGD self.weight_decay = 1e-4 # L2 weights regularization diff --git a/games/gomoku.py b/games/gomoku.py index 150afa51..4d42a457 100644 --- a/games/gomoku.py +++ b/games/gomoku.py @@ -79,7 +79,7 @@ def __init__(self): self.batch_size = 512 # Number of parts of games to train on at each training step self.checkpoint_interval = 50 # Number of training steps before using the model for self-playing self.value_loss_weight = 1 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze) - self.train_on_gpu = True if torch.cuda.is_available() else False # Train on GPU if available + self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available self.optimizer = "Adam" # "Adam" or "SGD". Paper uses SGD self.weight_decay = 1e-4 # L2 weights regularization diff --git a/games/gridworld.py b/games/gridworld.py index dfa47b91..4c4f8ba6 100644 --- a/games/gridworld.py +++ b/games/gridworld.py @@ -84,7 +84,7 @@ def __init__(self): self.batch_size = 128 # Number of parts of games to train on at each training step self.checkpoint_interval = 10 # Number of training steps before using the model for self-playing self.value_loss_weight = 1 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze) - self.train_on_gpu = True if torch.cuda.is_available() else False # Train on GPU if available + self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available self.optimizer = "Adam" # "Adam" or "SGD". Paper uses SGD self.weight_decay = 1e-4 # L2 weights regularization diff --git a/games/lunarlander.py b/games/lunarlander.py index 5e9e6e6e..a3a771be 100644 --- a/games/lunarlander.py +++ b/games/lunarlander.py @@ -79,7 +79,7 @@ def __init__(self): self.batch_size = 64 # Number of parts of games to train on at each training step self.checkpoint_interval = 10 # Number of training steps before using the model for self-playing self.value_loss_weight = 1 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze) - self.train_on_gpu = True if torch.cuda.is_available() else False # Train on GPU if available + self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available self.optimizer = "Adam" # "Adam" or "SGD". Paper uses SGD self.weight_decay = 1e-4 # L2 weights regularization diff --git a/games/simple_grid.py b/games/simple_grid.py index e66c254a..322872ac 100644 --- a/games/simple_grid.py +++ b/games/simple_grid.py @@ -78,7 +78,7 @@ def __init__(self): self.batch_size = 32 # Number of parts of games to train on at each training step self.checkpoint_interval = 10 # Number of training steps before using the model for self-playing self.value_loss_weight = 1 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze) - self.train_on_gpu = True if torch.cuda.is_available() else False # Train on GPU if available + self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available self.optimizer = "Adam" # "Adam" or "SGD". Paper uses SGD self.weight_decay = 1e-4 # L2 weights regularization diff --git a/games/tictactoe.py b/games/tictactoe.py index 4245bd56..d7e24f44 100644 --- a/games/tictactoe.py +++ b/games/tictactoe.py @@ -78,7 +78,7 @@ def __init__(self): self.batch_size = 64 # Number of parts of games to train on at each training step self.checkpoint_interval = 10 # Number of training steps before using the model for self-playing self.value_loss_weight = 0.25 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze) - self.train_on_gpu = True if torch.cuda.is_available() else False # Train on GPU if available + self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available self.optimizer = "Adam" # "Adam" or "SGD". Paper uses SGD self.weight_decay = 1e-4 # L2 weights regularization diff --git a/games/twentyone.py b/games/twentyone.py index 7860158e..4a6cd1f4 100644 --- a/games/twentyone.py +++ b/games/twentyone.py @@ -85,7 +85,7 @@ def __init__(self): self.batch_size = 64 # Number of parts of games to train on at each training step self.checkpoint_interval = 10 # Number of training steps before using the model for self-playing self.value_loss_weight = 0.25 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze) - self.train_on_gpu = True if torch.cuda.is_available() else False # Train on GPU if available + self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available self.optimizer = "SGD" # "Adam" or "SGD". Paper uses SGD self.weight_decay = 1e-4 # L2 weights regularization diff --git a/models.py b/models.py index 4cb09f3a..fd985d65 100644 --- a/models.py +++ b/models.py @@ -219,14 +219,14 @@ def __init__(self, num_channels, stride=1): self.bn2 = torch.nn.BatchNorm2d(num_channels) def forward(self, x): - x = self.conv1(x) - x = self.bn1(x) - x = torch.nn.functional.relu(x) - x = self.conv2(x) - x = self.bn2(x) - x += x - x = torch.nn.functional.relu(x) - return x + out = self.conv1(x) + out = self.bn1(out) + out = torch.nn.functional.relu(out) + out = self.conv2(out) + out = self.bn2(out) + out += x + out = torch.nn.functional.relu(out) + return out # Downsample observations before representation network (See paper appendix Network Architecture)