Skip to content

Commit

Permalink
Dropout layer (modern-fortran#194)
Browse files Browse the repository at this point in the history
* First stab at dropout; conflict with base type TODO

* Partial dropout integration

* Test uninitialized dropout layer

* Test dropout state that follows an input layer

* Enable forward pass for dropout; backward pass TODO

* Version bump and add dropout to the features table

* Add dropout to CMake

* Enable preprocessing in fpm.toml (needed with recent versions of fpm)

* Small change in scale implementation

* Integration of backward pass for dropout

* Reduce tolerance in conv2d convergence tests

* Fix bug in dropout scaling

Co-authored-by: Ricardo Orsi <@ricor07>

* disable dropout in inference mode (net % predict); TODO enable in net % train

* Set dropout's training mode to true in net % train(); add tests

* WIP dropout tests

* Dropout layers always in training mode; except when  is called, when they are in inference mode

* Update the layers table

* Ensure the actual dropout rate == requested dropout rate in most cases

* Accumulate the gradient in dropout % backward and flush in network % update

* Guard against bad dropout rate

* Connect the backward pass; expand tests

* Expand tests

* Use the reference scaling in dropout; don't accumulate gradients because it's not needed

* Add dropout to MNIST example; small model changes

* Add reference

* Update print_info dropout

* Update print_info

* Compute scale once in dropout constructor

* dropout % backward() doesn't need input from the previous layer

* Timing info of dropout

---------

Co-authored-by: Vandenplas, Jeremie <[email protected]>
  • Loading branch information
milancurcic and Vandenplas, Jeremie authored Feb 21, 2025
1 parent c316ee1 commit 039638d
Show file tree
Hide file tree
Showing 16 changed files with 582 additions and 34 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ add_library(neural-fortran
src/nf/nf_reshape_layer_submodule.f90
src/nf/io/nf_io_binary.f90
src/nf/io/nf_io_binary_submodule.f90
src/nf/nf_dropout_layer.f90
src/nf/nf_dropout_layer_submodule.f90
)

target_link_libraries(neural-fortran PRIVATE)
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
| Layer type | Constructor name | Supported input layers | Rank of output array | Forward pass | Backward pass |
|------------|------------------|------------------------|----------------------|--------------|---------------|
| Input | `input` | n/a | 1, 2, 3 | n/a | n/a |
| Dense (fully-connected) | `dense` | `input1d`, `flatten` | 1 |||
| Dense (fully-connected) | `dense` | `input1d`, `dense`, `dropout`, `flatten` | 1 |||
| Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 |||
| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 || ✅(*) |
| Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 |||
| Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 |||
| Linear (2-d) | `linear2d` | `input2d` | 2 |||
| Linear (2-d) | `linear2d` | `input2d`, `linear2d` | 2 |||
| Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 |||

(*) See Issue [#145](https://github.com/modern-fortran/neural-fortran/issues/145) regarding non-converging CNN training on the MNIST dataset.
Expand Down
9 changes: 5 additions & 4 deletions example/dense_mnist.f90
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
program dense_mnist

use nf, only: dense, input, network, sgd, label_digits, load_mnist, corr
use nf, only: dense, input, network, sgd, label_digits, load_mnist, corr, relu, softmax, dropout

implicit none

Expand All @@ -17,8 +17,9 @@ program dense_mnist

net = network([ &
input(784), &
dense(30), &
dense(10) &
dense(64, relu()), &
dropout(0.2), &
dense(10, softmax()) &
])
num_epochs = 10

Expand All @@ -32,7 +33,7 @@ program dense_mnist
call net % train( &
training_images, &
label_digits(training_labels), &
batch_size=100, &
batch_size=128, &
epochs=1, &
optimizer=sgd(learning_rate=3.) &
)
Expand Down
2 changes: 1 addition & 1 deletion src/nf.f90
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module nf
use nf_datasets_mnist, only: label_digits, load_mnist
use nf_layer, only: layer
use nf_layer_constructors, only: &
conv2d, dense, flatten, input, maxpool2d, reshape, linear2d
conv2d, dense, dropout, flatten, input, linear2d, maxpool2d, reshape
use nf_loss, only: mse, quadratic
use nf_metrics, only: corr, maxabs
use nf_network, only: network
Expand Down
83 changes: 83 additions & 0 deletions src/nf/nf_dropout_layer.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
module nf_dropout_layer

!! Dropout layer by Srivastava et al. (2014).
!!
!! Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I. and
!! Salakhutdinov, R., 2014. Dropout: a simple way to prevent neural networks
!! from overfitting. The Journal of Machine Learning Research, 16(1),
!! pp.1929-1958.

use nf_base_layer, only: base_layer

implicit none

private
public :: dropout_layer

type, extends(base_layer) :: dropout_layer
!! Concrete implementation of a dropout layer type

integer :: input_size = 0

real, allocatable :: output(:)
real, allocatable :: gradient(:)
real, allocatable :: mask(:) ! binary mask for dropout

real :: dropout_rate ! probability of dropping a neuron
real :: scale ! scale factor to preserve the input sum
logical :: training = .true. ! set to .false. for inference

contains

procedure :: backward
procedure :: forward
procedure :: init

end type dropout_layer

interface dropout_layer
module function dropout_layer_cons(rate) &
result(res)
!! This function returns the `dropout_layer` instance.
real, intent(in) :: rate
!! Dropout rate
type(dropout_layer) :: res
!! dropout_layer instance
end function dropout_layer_cons
end interface dropout_layer

interface

pure module subroutine backward(self, gradient)
!! Apply the backward gradient descent pass.
!! Only weight and bias gradients are updated in this subroutine,
!! while the weights and biases themselves are untouched.
class(dropout_layer), intent(in out) :: self
!! Dropout layer instance
real, intent(in) :: gradient(:)
!! Gradient from the next layer
end subroutine backward

module subroutine forward(self, input)
!! Propagate forward the layer.
!! Calling this subroutine updates the values of a few data components
!! of `dropout_layer` that are needed for the backward pass.
class(dropout_layer), intent(in out) :: self
!! Dense layer instance
real, intent(in) :: input(:)
!! Input from the previous layer
end subroutine forward

module subroutine init(self, input_shape)
!! Initialize the layer data structures.
!!
!! This is a deferred procedure from the `base_layer` abstract type.
class(dropout_layer), intent(in out) :: self
!! Dropout layer instance
integer, intent(in) :: input_shape(:)
!! Shape of the input layer
end subroutine init

end interface

end module nf_dropout_layer
68 changes: 68 additions & 0 deletions src/nf/nf_dropout_layer_submodule.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
submodule (nf_dropout_layer) nf_dropout_layer_submodule
use nf_random, only: shuffle
!! This submodule implements the procedures defined in the
!! nf_dropout_layer module.

contains

module function dropout_layer_cons(rate) result(res)
real, intent(in) :: rate
type(dropout_layer) :: res
res % dropout_rate = rate
res % scale = 1 / (1 - rate)
end function dropout_layer_cons


module subroutine init(self, input_shape)
class(dropout_layer), intent(in out) :: self
integer, intent(in) :: input_shape(:)

self % input_size = input_shape(1)

! Allocate arrays
allocate(self % output(self % input_size))
allocate(self % gradient(self % input_size))
allocate(self % mask(self % input_size))

! Initialize arrays
self % output = 0
self % gradient = 0
self % mask = 1 ! Default mask is all ones (no dropout)

end subroutine init


module subroutine forward(self, input)
class(dropout_layer), intent(in out) :: self
real, intent(in) :: input(:)

! Generate random mask for dropout, training mode only
if (self % training) then

! Set the first dropout_rate number of elements to 0, the rest to 1,
! and shuffle. Note that the selection of the elements rounds down to
! the nearest integer, so in cases where size(input) * dropout_rate is
! not an integer, the actual dropout rate will be slightly lower.
self % mask = 1
self % mask(:int(size(self % mask) * self % dropout_rate)) = 0
call shuffle(self % mask)

! Apply dropout mask
self % output = input * self % mask * self % scale

else
! In inference mode, we don't apply dropout; simply pass through the input
self % output = input

end if

end subroutine forward


pure module subroutine backward(self, gradient)
class(dropout_layer), intent(in out) :: self
real, intent(in) :: gradient(:)
self % gradient = gradient * self % mask * self % scale
end subroutine backward

end submodule nf_dropout_layer_submodule
2 changes: 1 addition & 1 deletion src/nf/nf_layer.f90
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ end subroutine backward_3d

interface

pure module subroutine forward(self, input)
module subroutine forward(self, input)
!! Apply a forward pass on the layer.
!! This changes the internal state of the layer.
!! This is normally called internally by the `network % forward`
Expand Down
20 changes: 19 additions & 1 deletion src/nf/nf_layer_constructors.f90
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ module nf_layer_constructors
implicit none

private
public :: conv2d, dense, flatten, input, maxpool2d, reshape, linear2d
public :: conv2d, dense, dropout, flatten, input, linear2d, maxpool2d, reshape

interface input

Expand Down Expand Up @@ -104,6 +104,24 @@ module function dense(layer_size, activation) result(res)
!! Resulting layer instance
end function dense

module function dropout(rate) result(res)
!! Create a dropout layer with a given dropout rate.
!!
!! This layer is for randomly disabling neurons during training.
!!
!! Example:
!!
!! ```
!! use nf, only :: dropout, layer
!! type(layer) :: dropout_layer
!! dropout_layer = dropout(rate=0.5)
!! ```
real, intent(in) :: rate
!! Dropout rate - fraction of neurons to randomly disable during training
type(layer) :: res
!! Resulting layer instance
end function dropout

module function flatten() result(res)
!! Flatten (3-d -> 1-d) layer constructor.
!!
Expand Down
12 changes: 11 additions & 1 deletion src/nf/nf_layer_constructors_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
use nf_layer, only: layer
use nf_conv2d_layer, only: conv2d_layer
use nf_dense_layer, only: dense_layer
use nf_dropout_layer, only: dropout_layer
use nf_flatten_layer, only: flatten_layer
use nf_input1d_layer, only: input1d_layer
use nf_input2d_layer, only: input2d_layer
Expand Down Expand Up @@ -65,14 +66,23 @@ module function dense(layer_size, activation) result(res)
end function dense


module function dropout(rate) result(res)
real, intent(in) :: rate
type(layer) :: res
if (rate < 0 .or. rate > 1) &
error stop 'rate must be between 0 and 1 in a dropout layer'
res % name = 'dropout'
allocate(res % p, source=dropout_layer(rate))
end function dropout


module function flatten() result(res)
type(layer) :: res
res % name = 'flatten'
allocate(res % p, source=flatten_layer())
end function flatten



module function input1d(layer_size) result(res)
integer, intent(in) :: layer_size
type(layer) :: res
Expand Down
Loading

0 comments on commit 039638d

Please sign in to comment.