forked from modern-fortran/neural-fortran
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* First stab at dropout; conflict with base type TODO * Partial dropout integration * Test uninitialized dropout layer * Test dropout state that follows an input layer * Enable forward pass for dropout; backward pass TODO * Version bump and add dropout to the features table * Add dropout to CMake * Enable preprocessing in fpm.toml (needed with recent versions of fpm) * Small change in scale implementation * Integration of backward pass for dropout * Reduce tolerance in conv2d convergence tests * Fix bug in dropout scaling Co-authored-by: Ricardo Orsi <@ricor07> * disable dropout in inference mode (net % predict); TODO enable in net % train * Set dropout's training mode to true in net % train(); add tests * WIP dropout tests * Dropout layers always in training mode; except when is called, when they are in inference mode * Update the layers table * Ensure the actual dropout rate == requested dropout rate in most cases * Accumulate the gradient in dropout % backward and flush in network % update * Guard against bad dropout rate * Connect the backward pass; expand tests * Expand tests * Use the reference scaling in dropout; don't accumulate gradients because it's not needed * Add dropout to MNIST example; small model changes * Add reference * Update print_info dropout * Update print_info * Compute scale once in dropout constructor * dropout % backward() doesn't need input from the previous layer * Timing info of dropout --------- Co-authored-by: Vandenplas, Jeremie <[email protected]>
- Loading branch information
1 parent
c316ee1
commit 039638d
Showing
16 changed files
with
582 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
module nf_dropout_layer | ||
|
||
!! Dropout layer by Srivastava et al. (2014). | ||
!! | ||
!! Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I. and | ||
!! Salakhutdinov, R., 2014. Dropout: a simple way to prevent neural networks | ||
!! from overfitting. The Journal of Machine Learning Research, 16(1), | ||
!! pp.1929-1958. | ||
|
||
use nf_base_layer, only: base_layer | ||
|
||
implicit none | ||
|
||
private | ||
public :: dropout_layer | ||
|
||
type, extends(base_layer) :: dropout_layer | ||
!! Concrete implementation of a dropout layer type | ||
|
||
integer :: input_size = 0 | ||
|
||
real, allocatable :: output(:) | ||
real, allocatable :: gradient(:) | ||
real, allocatable :: mask(:) ! binary mask for dropout | ||
|
||
real :: dropout_rate ! probability of dropping a neuron | ||
real :: scale ! scale factor to preserve the input sum | ||
logical :: training = .true. ! set to .false. for inference | ||
|
||
contains | ||
|
||
procedure :: backward | ||
procedure :: forward | ||
procedure :: init | ||
|
||
end type dropout_layer | ||
|
||
interface dropout_layer | ||
module function dropout_layer_cons(rate) & | ||
result(res) | ||
!! This function returns the `dropout_layer` instance. | ||
real, intent(in) :: rate | ||
!! Dropout rate | ||
type(dropout_layer) :: res | ||
!! dropout_layer instance | ||
end function dropout_layer_cons | ||
end interface dropout_layer | ||
|
||
interface | ||
|
||
pure module subroutine backward(self, gradient) | ||
!! Apply the backward gradient descent pass. | ||
!! Only weight and bias gradients are updated in this subroutine, | ||
!! while the weights and biases themselves are untouched. | ||
class(dropout_layer), intent(in out) :: self | ||
!! Dropout layer instance | ||
real, intent(in) :: gradient(:) | ||
!! Gradient from the next layer | ||
end subroutine backward | ||
|
||
module subroutine forward(self, input) | ||
!! Propagate forward the layer. | ||
!! Calling this subroutine updates the values of a few data components | ||
!! of `dropout_layer` that are needed for the backward pass. | ||
class(dropout_layer), intent(in out) :: self | ||
!! Dense layer instance | ||
real, intent(in) :: input(:) | ||
!! Input from the previous layer | ||
end subroutine forward | ||
|
||
module subroutine init(self, input_shape) | ||
!! Initialize the layer data structures. | ||
!! | ||
!! This is a deferred procedure from the `base_layer` abstract type. | ||
class(dropout_layer), intent(in out) :: self | ||
!! Dropout layer instance | ||
integer, intent(in) :: input_shape(:) | ||
!! Shape of the input layer | ||
end subroutine init | ||
|
||
end interface | ||
|
||
end module nf_dropout_layer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
submodule (nf_dropout_layer) nf_dropout_layer_submodule | ||
use nf_random, only: shuffle | ||
!! This submodule implements the procedures defined in the | ||
!! nf_dropout_layer module. | ||
|
||
contains | ||
|
||
module function dropout_layer_cons(rate) result(res) | ||
real, intent(in) :: rate | ||
type(dropout_layer) :: res | ||
res % dropout_rate = rate | ||
res % scale = 1 / (1 - rate) | ||
end function dropout_layer_cons | ||
|
||
|
||
module subroutine init(self, input_shape) | ||
class(dropout_layer), intent(in out) :: self | ||
integer, intent(in) :: input_shape(:) | ||
|
||
self % input_size = input_shape(1) | ||
|
||
! Allocate arrays | ||
allocate(self % output(self % input_size)) | ||
allocate(self % gradient(self % input_size)) | ||
allocate(self % mask(self % input_size)) | ||
|
||
! Initialize arrays | ||
self % output = 0 | ||
self % gradient = 0 | ||
self % mask = 1 ! Default mask is all ones (no dropout) | ||
|
||
end subroutine init | ||
|
||
|
||
module subroutine forward(self, input) | ||
class(dropout_layer), intent(in out) :: self | ||
real, intent(in) :: input(:) | ||
|
||
! Generate random mask for dropout, training mode only | ||
if (self % training) then | ||
|
||
! Set the first dropout_rate number of elements to 0, the rest to 1, | ||
! and shuffle. Note that the selection of the elements rounds down to | ||
! the nearest integer, so in cases where size(input) * dropout_rate is | ||
! not an integer, the actual dropout rate will be slightly lower. | ||
self % mask = 1 | ||
self % mask(:int(size(self % mask) * self % dropout_rate)) = 0 | ||
call shuffle(self % mask) | ||
|
||
! Apply dropout mask | ||
self % output = input * self % mask * self % scale | ||
|
||
else | ||
! In inference mode, we don't apply dropout; simply pass through the input | ||
self % output = input | ||
|
||
end if | ||
|
||
end subroutine forward | ||
|
||
|
||
pure module subroutine backward(self, gradient) | ||
class(dropout_layer), intent(in out) :: self | ||
real, intent(in) :: gradient(:) | ||
self % gradient = gradient * self % mask * self % scale | ||
end subroutine backward | ||
|
||
end submodule nf_dropout_layer_submodule |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.