diff --git a/README.md b/README.md index 5e9aefe..29b868b 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,8 @@ Example usage: model = ConvLSTM(input_dim=channels, hidden_dim=[64, 64, 128], kernel_size=(3, 3), - num_layers=3, + num_layers=3, + dropout=0.1, batch_first=True bias=True, return_all_layers=False) diff --git a/convlstm.py b/convlstm.py index e54a085..ae3f7e5 100644 --- a/convlstm.py +++ b/convlstm.py @@ -67,6 +67,7 @@ class ConvLSTM(nn.Module): hidden_dim: Number of hidden channels kernel_size: Size of kernel in convolutions num_layers: Number of LSTM layers stacked on each other + dropout: If non-zero, introduces a Dropout layer on the outputs of each ConvLSTM layer except the last layer, with dropout probability equal to dropout. Default: 0 batch_first: Whether or not dimension 0 is the batch or not bias: Bias or no bias in Convolution return_all_layers: Return the list of computations for all layers @@ -86,7 +87,7 @@ class ConvLSTM(nn.Module): >> h = last_states[0][0] # 0 for layer index, 0 for h index """ - def __init__(self, input_dim, hidden_dim, kernel_size, num_layers, + def __init__(self, input_dim, hidden_dim, kernel_size, num_layers, dropout=0.0, batch_first=False, bias=True, return_all_layers=False): super(ConvLSTM, self).__init__() @@ -116,6 +117,7 @@ def __init__(self, input_dim, hidden_dim, kernel_size, num_layers, bias=self.bias)) self.cell_list = nn.ModuleList(cell_list) + self.dropout = nn.Dropout(dropout) def forward(self, input_tensor, hidden_state=None): """ @@ -161,7 +163,10 @@ def forward(self, input_tensor, hidden_state=None): output_inner.append(h) layer_output = torch.stack(output_inner, dim=1) - cur_layer_input = layer_output + if layer_idx != self.num_layers - 1: + cur_layer_input = self.dropout(layer_output) + else: + cur_layer_input = layer_output layer_output_list.append(layer_output) last_state_list.append([h, c])