From 467813541a3c15222f4cc9bec9ef7f1b650b17b6 Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Fri, 29 Apr 2022 13:25:43 -0400 Subject: [PATCH 01/13] Initial commit of the refactored library --- src/mod_activation.f90 | 101 ------ src/mod_activation_submodule.f90 | 77 ----- src/mod_io.f90 | 29 -- src/mod_kinds.f90 | 24 -- src/mod_layer.f90 | 100 ------ src/mod_layer_submodule.f90 | 106 ------- src/mod_mnist.f90 | 43 --- src/mod_mnist_submodule.f90 | 118 ------- src/mod_network.f90 | 204 ------------ src/mod_network_submodule.f90 | 296 ------------------ src/mod_parallel_submodule.f90 | 28 -- src/mod_random_submodule.f90 | 24 -- src/nf.f90 | 6 + src/nf_activation.f90 | 155 +++++++++ src/nf_base_layer.f90 | 53 ++++ src/nf_base_layer_submodule.f90 | 73 +++++ src/nf_conv2d_layer.f90 | 84 +++++ src/nf_datasets_mnist.f90 | 47 +++ src/nf_datasets_mnist_submodule.f90 | 132 ++++++++ src/nf_dense_layer.f90 | 97 ++++++ src/nf_dense_layer_submodule.f90 | 97 ++++++ src/nf_input1d_layer.f90 | 50 +++ src/nf_input1d_layer_submodule.f90 | 23 ++ src/nf_input3d_layer.f90 | 48 +++ src/nf_input3d_layer_submodule.f90 | 23 ++ src/nf_io.f90 | 42 +++ ...d_io_submodule.f90 => nf_io_submodule.f90} | 24 +- src/nf_layer.f90 | 101 ++++++ src/nf_layer_constructors.f90 | 116 +++++++ src/nf_layer_constructors_submodule.f90 | 75 +++++ src/nf_layer_submodule.f90 | 116 +++++++ src/nf_loss.f90 | 42 +++ src/nf_loss_submodule.f90 | 21 ++ src/nf_network.f90 | 113 +++++++ src/nf_network_submodule.f90 | 176 +++++++++++ src/nf_optimizers.f90 | 17 + src/{mod_parallel.f90 => nf_parallel.f90} | 11 +- src/nf_parallel_submodule.f90 | 25 ++ src/{mod_random.f90 => nf_random.f90} | 20 +- src/nf_random_submodule.f90 | 26 ++ 40 files changed, 1785 insertions(+), 1178 deletions(-) delete mode 100644 src/mod_activation.f90 delete mode 100644 src/mod_activation_submodule.f90 delete mode 100644 src/mod_io.f90 delete mode 100644 src/mod_kinds.f90 delete mode 100644 src/mod_layer.f90 delete mode 100644 src/mod_layer_submodule.f90 delete mode 100644 src/mod_mnist.f90 delete mode 100644 src/mod_mnist_submodule.f90 delete mode 100644 src/mod_network.f90 delete mode 100644 src/mod_network_submodule.f90 delete mode 100644 src/mod_parallel_submodule.f90 delete mode 100644 src/mod_random_submodule.f90 create mode 100644 src/nf.f90 create mode 100644 src/nf_activation.f90 create mode 100644 src/nf_base_layer.f90 create mode 100644 src/nf_base_layer_submodule.f90 create mode 100644 src/nf_conv2d_layer.f90 create mode 100644 src/nf_datasets_mnist.f90 create mode 100644 src/nf_datasets_mnist_submodule.f90 create mode 100644 src/nf_dense_layer.f90 create mode 100644 src/nf_dense_layer_submodule.f90 create mode 100644 src/nf_input1d_layer.f90 create mode 100644 src/nf_input1d_layer_submodule.f90 create mode 100644 src/nf_input3d_layer.f90 create mode 100644 src/nf_input3d_layer_submodule.f90 create mode 100644 src/nf_io.f90 rename src/{mod_io_submodule.f90 => nf_io_submodule.f90} (66%) create mode 100644 src/nf_layer.f90 create mode 100644 src/nf_layer_constructors.f90 create mode 100644 src/nf_layer_constructors_submodule.f90 create mode 100644 src/nf_layer_submodule.f90 create mode 100644 src/nf_loss.f90 create mode 100644 src/nf_loss_submodule.f90 create mode 100644 src/nf_network.f90 create mode 100644 src/nf_network_submodule.f90 create mode 100644 src/nf_optimizers.f90 rename src/{mod_parallel.f90 => nf_parallel.f90} (58%) create mode 100644 src/nf_parallel_submodule.f90 rename src/{mod_random.f90 => nf_random.f90} (59%) create mode 100644 src/nf_random_submodule.f90 diff --git a/src/mod_activation.f90 b/src/mod_activation.f90 deleted file mode 100644 index 4aa19317..00000000 --- a/src/mod_activation.f90 +++ /dev/null @@ -1,101 +0,0 @@ -module mod_activation - - !! A collection of activation functions and their derivatives. - - use mod_kinds, only: ik, rk - - implicit none - - private - - public :: activation_function - public :: gaussian, gaussian_prime - public :: relu, relu_prime - public :: sigmoid, sigmoid_prime - public :: step, step_prime - public :: tanhf, tanh_prime - - interface - - pure function activation_function(x) - import :: rk - real(rk), intent(in) :: x(:) - real(rk) :: activation_function(size(x)) - end function activation_function - - pure module function gaussian(x) result(res) - !! Gaussian activation function. - implicit none - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - end function gaussian - - pure module function gaussian_prime(x) result(res) - !! First derivative of the Gaussian activation function. - implicit none - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - end function gaussian_prime - - pure module function relu(x) result(res) - !! REctified Linear Unit (RELU) activation function. - implicit none - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - end function relu - - pure module function relu_prime(x) result(res) - !! First derivative of the REctified Linear Unit (RELU) activation function. - implicit none - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - end function relu_prime - - pure module function sigmoid(x) result(res) - !! Sigmoid activation function. - implicit none - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - end function sigmoid - - pure module function sigmoid_prime(x) result(res) - !! First derivative of the sigmoid activation function. - implicit none - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - end function sigmoid_prime - - pure module function step(x) result(res) - !! Step activation function. - implicit none - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - end function step - - pure module function step_prime(x) result(res) - !! First derivative of the step activation function. - implicit none - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - end function step_prime - - pure module function tanhf(x) result(res) - !! Tangent hyperbolic activation function. - !! Same as the intrinsic tanh, but must be - !! defined here so that we can use procedure - !! pointer with it. - implicit none - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - end function tanhf - - pure module function tanh_prime(x) result(res) - !! First derivative of the tanh activation function. - implicit none - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - end function tanh_prime - - end interface - -end module mod_activation diff --git a/src/mod_activation_submodule.f90 b/src/mod_activation_submodule.f90 deleted file mode 100644 index c01fa2f9..00000000 --- a/src/mod_activation_submodule.f90 +++ /dev/null @@ -1,77 +0,0 @@ -submodule(mod_activation) mod_activation_submodule - - !! A collection of activation functions and their derivatives. - - implicit none - -contains - - pure module function gaussian(x) result(res) - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - res = exp(-x**2) - end function gaussian - - pure module function gaussian_prime(x) result(res) - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - res = -2 * x * gaussian(x) - end function gaussian_prime - - pure module function relu(x) result(res) - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - res = max(0., x) - end function relu - - pure module function relu_prime(x) result(res) - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - where (x > 0) - res = 1 - elsewhere - res = 0 - end where - end function relu_prime - - pure module function sigmoid(x) result(res) - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - res = 1 / (1 + exp(-x)) - endfunction sigmoid - - pure module function sigmoid_prime(x) result(res) - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - res = sigmoid(x) * (1 - sigmoid(x)) - end function sigmoid_prime - - pure module function step(x) result(res) - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - where (x > 0) - res = 1 - elsewhere - res = 0 - end where - end function step - - pure module function step_prime(x) result(res) - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - res = 0 - end function step_prime - - pure module function tanhf(x) result(res) - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - res = tanh(x) - end function tanhf - - pure module function tanh_prime(x) result(res) - real(rk), intent(in) :: x(:) - real(rk) :: res(size(x)) - res = 1 - tanh(x)**2 - end function tanh_prime - -end submodule mod_activation_submodule diff --git a/src/mod_io.f90 b/src/mod_io.f90 deleted file mode 100644 index 0d40ad57..00000000 --- a/src/mod_io.f90 +++ /dev/null @@ -1,29 +0,0 @@ -module mod_io - - use mod_kinds, only: ik, rk - - implicit none - - private - - public :: read_binary_file - - interface read_binary_file - - module subroutine read_binary_file_1d(filename, dtype, nrec, array) - implicit none - character(len=*), intent(in) :: filename - integer(ik), intent(in) :: dtype, nrec - real(rk), allocatable, intent(in out) :: array(:) - end subroutine read_binary_file_1d - - module subroutine read_binary_file_2d(filename, dtype, dsize, nrec, array) - implicit none - character(len=*), intent(in) :: filename - integer(ik), intent(in) :: dtype, dsize, nrec - real(rk), allocatable, intent(in out) :: array(:,:) - end subroutine read_binary_file_2d - - end interface read_binary_file - -end module mod_io diff --git a/src/mod_kinds.f90 b/src/mod_kinds.f90 deleted file mode 100644 index f39b62c7..00000000 --- a/src/mod_kinds.f90 +++ /dev/null @@ -1,24 +0,0 @@ -module mod_kinds - - use iso_fortran_env, only: int32, int64, real32, real64, real128 - - implicit none - - private - public :: ik, rk - -#ifdef REAL64 - integer,parameter :: rk = real64 -#elif REAL128 - integer,parameter :: rk = real128 -#else - integer,parameter :: rk = real32 -#endif - -#ifdef INT64 - integer, parameter :: ik = int64 -#else - integer, parameter :: ik = int32 -#endif - -end module mod_kinds diff --git a/src/mod_layer.f90 b/src/mod_layer.f90 deleted file mode 100644 index e7244241..00000000 --- a/src/mod_layer.f90 +++ /dev/null @@ -1,100 +0,0 @@ -module mod_layer - - !! Defines the layer type and its methods. - - use mod_activation - use mod_kinds, only: ik, rk - - implicit none - - private - public :: array1d, array2d, db_init, db_co_sum, dw_init, dw_co_sum, layer_type - - type :: layer_type - real(rk), allocatable :: a(:) !! activations - real(rk), allocatable :: b(:) !! biases - real(rk), allocatable :: w(:,:) !! weights - real(rk), allocatable :: z(:) !! arg. to activation function - procedure(activation_function), pointer, nopass :: activation => null() - procedure(activation_function), pointer, nopass :: activation_prime => null() - character(len=:), allocatable :: activation_str !! activation character string - contains - procedure, public, pass(self) :: set_activation - end type layer_type - - type :: array1d - real(rk), allocatable :: array(:) - end type array1d - - type :: array2d - real(rk), allocatable :: array(:,:) - end type array2d - - interface layer_type - module function constructor(this_size, next_size) result(layer) - !! Layer class constructor. this_size is the number of neurons in the layer. - !! next_size is the number of neurons in the next layer, used to allocate - !! the weights. - implicit none - integer(ik), intent(in) :: this_size, next_size - type(layer_type) layer - end function constructor - end interface layer_type - - interface array1d - pure module function array1d_constructor(length) result(a) - !! Overloads the default type constructor. - implicit none - integer(ik), intent(in) :: length - type(array1d) :: a - end function array1d_constructor - end interface array1d - - interface array2d - pure module function array2d_constructor(dims) result(a) - !! Overloads the default type constructor. - integer(ik), intent(in) :: dims(2) - type(array2d) :: a - end function array2d_constructor - end interface array2d - - interface - - pure module subroutine db_init(db, dims) - !! Initialises biases structure. - implicit none - type(array1d), allocatable, intent(in out) :: db(:) - integer(ik), intent(in) :: dims(:) - end subroutine db_init - - pure module subroutine dw_init(dw, dims) - !! Initialises weights structure. - implicit none - type(array2d), allocatable, intent(in out) :: dw(:) - integer(ik), intent(in) :: dims(:) - end subroutine dw_init - - module subroutine db_co_sum(db) - !! Performs a collective sum of bias tendencies. - implicit none - type(array1d), allocatable, intent(in out) :: db(:) - end subroutine db_co_sum - - module subroutine dw_co_sum(dw) - !! Performs a collective sum of weights tendencies. - implicit none - type(array2d), allocatable, intent(in out) :: dw(:) - end subroutine dw_co_sum - - pure elemental module subroutine set_activation(self, activation) - !! Sets the activation function. Input string must match one of - !! provided activation functions, otherwise it defaults to sigmoid. - !! If activation not present, defaults to sigmoid. - implicit none - class(layer_type), intent(in out) :: self - character(len=*), intent(in) :: activation - end subroutine set_activation - - end interface - -end module mod_layer diff --git a/src/mod_layer_submodule.f90 b/src/mod_layer_submodule.f90 deleted file mode 100644 index 514542e0..00000000 --- a/src/mod_layer_submodule.f90 +++ /dev/null @@ -1,106 +0,0 @@ -submodule(mod_layer) mod_layer_submodule - - use mod_random, only: randn - - implicit none - -contains - - module function constructor(this_size, next_size) result(layer) - integer(ik), intent(in) :: this_size, next_size - type(layer_type) :: layer - allocate(layer % a(this_size)) - allocate(layer % z(this_size)) - layer % a = 0 - layer % z = 0 - layer % w = randn(this_size, next_size) / this_size - layer % b = randn(this_size) - end function constructor - - pure module function array1d_constructor(length) result(a) - integer(ik), intent(in) :: length - type(array1d) :: a - allocate(a % array(length)) - a % array = 0 - end function array1d_constructor - - pure module function array2d_constructor(dims) result(a) - integer(ik), intent(in) :: dims(2) - type(array2d) :: a - allocate(a % array(dims(1), dims(2))) - a % array = 0 - end function array2d_constructor - - pure module subroutine db_init(db, dims) - type(array1d), allocatable, intent(in out) :: db(:) - integer(ik), intent(in) :: dims(:) - integer(ik) :: n, nm - nm = size(dims) - allocate(db(nm)) - do n = 1, nm - 1 - db(n) = array1d(dims(n)) - end do - db(n) = array1d(dims(n)) - end subroutine db_init - - pure module subroutine dw_init(dw, dims) - type(array2d), allocatable, intent(in out) :: dw(:) - integer(ik), intent(in) :: dims(:) - integer(ik) :: n, nm - nm = size(dims) - allocate(dw(nm)) - do n = 1, nm - 1 - dw(n) = array2d(dims(n:n+1)) - end do - dw(n) = array2d([dims(n), 1]) - end subroutine dw_init - - module subroutine db_co_sum(db) - type(array1d), allocatable, intent(in out) :: db(:) - integer(ik) :: n - do n = 2, size(db) - call co_sum(db(n) % array) - end do - end subroutine db_co_sum - - module subroutine dw_co_sum(dw) - type(array2d), allocatable, intent(in out) :: dw(:) - integer(ik) :: n - do n = 1, size(dw) - 1 - call co_sum(dw(n) % array) - end do - end subroutine dw_co_sum - - pure elemental module subroutine set_activation(self, activation) - class(layer_type), intent(in out) :: self - character(len=*), intent(in) :: activation - select case(trim(activation)) - case('gaussian') - self % activation => gaussian - self % activation_prime => gaussian_prime - self % activation_str = 'gaussian' - case('relu') - self % activation => relu - self % activation_prime => relu_prime - self % activation_str = 'relu' - case('sigmoid') - self % activation => sigmoid - self % activation_prime => sigmoid_prime - self % activation_str = 'sigmoid' - case('step') - self % activation => step - self % activation_prime => step_prime - self % activation_str = 'step' - case('tanh') - self % activation => tanhf - self % activation_prime => tanh_prime - self % activation_str = 'tanh' - case default - self % activation => sigmoid - self % activation_prime => sigmoid_prime - self % activation_str = 'sigmoid' - end select - end subroutine set_activation - - -end submodule mod_layer_submodule diff --git a/src/mod_mnist.f90 b/src/mod_mnist.f90 deleted file mode 100644 index e8af54cb..00000000 --- a/src/mod_mnist.f90 +++ /dev/null @@ -1,43 +0,0 @@ -module mod_mnist - - !! Procedures to work with MNIST dataset, usable with data format - !! as provided in this repo and not the original data format (idx). - - use mod_kinds, only: ik, rk - - implicit none - - private - - public :: label_digits, load_mnist, print_image - - interface - - pure module function label_digits(labels) result(res) - !! Converts an array of MNIST labels into a form - !! that can be input to the network_type instance. - implicit none - real(rk), intent(in) :: labels(:) - real(rk) :: res(10, size(labels)) - end function label_digits - - module subroutine load_mnist(tr_images, tr_labels, te_images,& - - te_labels, va_images, va_labels) - !! Loads the MNIST dataset into arrays. - implicit none - real(rk), allocatable, intent(in out) :: tr_images(:,:), tr_labels(:) - real(rk), allocatable, intent(in out) :: te_images(:,:), te_labels(:) - real(rk), allocatable, intent(in out), optional :: va_images(:,:), va_labels(:) - end subroutine load_mnist - - module subroutine print_image(images, labels, n) - !! Prints a single image and label to screen. - implicit none - real(rk), intent(in) :: images(:,:), labels(:) - integer(ik), intent(in) :: n - end subroutine print_image - - end interface - -end module mod_mnist diff --git a/src/mod_mnist_submodule.f90 b/src/mod_mnist_submodule.f90 deleted file mode 100644 index 9812f197..00000000 --- a/src/mod_mnist_submodule.f90 +++ /dev/null @@ -1,118 +0,0 @@ -submodule(mod_mnist) mod_mnist_submodule - - !! Procedures to work with MNIST dataset, usable with data format - !! as provided in this repo and not the original data format (idx). - - ! TODO make MNIST work with arbitrary precision - - use mod_io, only: read_binary_file - use mod_kinds, only: ik, rk - - implicit none - - integer, parameter :: message_len = 128 - -contains - - subroutine download_and_uncompress() - character(len=*), parameter :: download_mechanism = 'curl -LO ' - character(len=*), parameter :: base_url='https://github.com/modern-fortran/neural-fortran/files/8498876/' - character(len=*), parameter :: download_filename = 'mnist.tar.gz' - character(len=*), parameter :: download_command = download_mechanism // base_url // download_filename - character(len=*), parameter :: uncompress_file = 'tar xvzf ' // download_filename - character(len=message_len) :: command_message - character(len=:), allocatable :: error_message - integer :: exit_status, command_status - - exit_status=0 - call execute_command_line(command=download_command, wait=.true., & - exitstat=exit_status, cmdstat=command_status, cmdmsg=command_message) - - if (any([exit_status, command_status] /= 0)) then - error_message = 'command "' // download_command // '" failed' - if (command_status /= 0) error_message = error_message // " with message " // trim(command_message) - error stop error_message - end if - - call execute_command_line(command=uncompress_file, wait=.true., & - exitstat=exit_status, cmdstat=command_status, cmdmsg=command_message) - - if (any([exit_status, command_status] /= 0)) then - error_message = 'command "' // uncompress_file // '" failed' - if (command_status /= 0) error_message = error_message // " with message " // trim(command_message) - error stop error_message - end if - - end subroutine download_and_uncompress - - pure module function label_digits(labels) result(res) - real(rk), intent(in) :: labels(:) - real(rk) :: res(10, size(labels)) - integer(ik) :: i - do i = 1, size(labels) - res(:,i) = digits(labels(i)) - end do - contains - pure function digits(x) - !! Returns an array of 10 reals, with zeros everywhere - !! and a one corresponding to the input number, for example: - !! digits(0) = [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.] - !! digits(1) = [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.] - !! digits(6) = [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.] - real(rk), intent(in) :: x - real(rk) :: digits(10) - digits = 0 - digits(int(x + 1)) = 1 - end function digits - end function label_digits - - module subroutine load_mnist(tr_images, tr_labels, te_images,& - te_labels, va_images, va_labels) - real(rk), allocatable, intent(in out) :: tr_images(:,:), tr_labels(:) - real(rk), allocatable, intent(in out) :: te_images(:,:), te_labels(:) - real(rk), allocatable, intent(in out), optional :: va_images(:,:), va_labels(:) - integer(ik), parameter :: dtype = 4, image_size = 784 - integer(ik), parameter :: tr_nimages = 50000 - integer(ik), parameter :: te_nimages = 10000 - integer(ik), parameter :: va_nimages = 10000 - logical :: file_exists - - ! Check if MNIST data is present and download it if not. - inquire(file='mnist_training_images.dat', exist=file_exists) - if (.not. file_exists) call download_and_uncompress() - - call read_binary_file('mnist_training_images.dat',& - dtype, image_size, tr_nimages, tr_images) - call read_binary_file('mnist_training_labels.dat',& - dtype, tr_nimages, tr_labels) - - call read_binary_file('mnist_testing_images.dat',& - dtype, image_size, te_nimages, te_images) - call read_binary_file('mnist_testing_labels.dat',& - dtype, te_nimages, te_labels) - - if (present(va_images) .and. present(va_labels)) then - call read_binary_file('mnist_validation_images.dat',& - dtype, image_size, va_nimages, va_images) - call read_binary_file('mnist_validation_labels.dat',& - dtype, va_nimages, va_labels) - end if - - end subroutine load_mnist - - module subroutine print_image(images, labels, n) - real(rk), intent(in) :: images(:,:), labels(:) - integer(ik), intent(in) :: n - real(rk) :: image(28, 28) - character(len=1) :: char_image(28, 28) - integer(ik) i, j - image = reshape(images(:,n), [28, 28]) - char_image = '.' - where (image > 0) char_image = '#' - print *, labels(n) - do j = 1, 28 - print *, char_image(:,j) - end do - end subroutine print_image - -end submodule mod_mnist_submodule diff --git a/src/mod_network.f90 b/src/mod_network.f90 deleted file mode 100644 index 7e8d777a..00000000 --- a/src/mod_network.f90 +++ /dev/null @@ -1,204 +0,0 @@ -module mod_network - - use mod_kinds, only: ik, rk - use mod_layer, only: array1d, array2d, layer_type - - implicit none - - private - public :: network_type - - type :: network_type - - type(layer_type), allocatable :: layers(:) - integer(ik), allocatable :: dims(:) - - contains - - procedure, public, pass(self) :: accuracy - procedure, public, pass(self) :: backprop - procedure, public, pass(self) :: fwdprop - procedure, public, pass(self) :: init - procedure, public, pass(self) :: load - procedure, public, pass(self) :: loss - procedure, public, pass(self) :: output_batch - procedure, public, pass(self) :: output_single - procedure, public, pass(self) :: save - procedure, public, pass(self) :: set_activation_equal - procedure, public, pass(self) :: set_activation_layers - procedure, public, pass(self) :: sync - procedure, public, pass(self) :: train_batch - procedure, public, pass(self) :: train_epochs - procedure, public, pass(self) :: train_single - procedure, public, pass(self) :: update - - generic, public :: output => output_batch, output_single - generic, public :: set_activation => set_activation_equal, set_activation_layers - generic, public :: train => train_batch, train_epochs, train_single - - end type network_type - - interface network_type - - module function net_constructor(dims, activation) result(net) - !! Network class constructor. Size of input array dims indicates the total - !! number of layers (input + hidden + output), and the value of its elements - !! corresponds the size of each layer. - implicit none - integer(ik), intent(in) :: dims(:) - character(len=*), intent(in), optional :: activation - type(network_type) :: net - end function net_constructor - - end interface network_type - - interface - - pure real(rk) module function accuracy(self, x, y) - !! Given input x and output y, evaluates the position of the - !! maximum value of the output and returns the number of matches - !! relative to the size of the dataset. - implicit none - class(network_type), intent(in) :: self - real(rk), intent(in) :: x(:,:), y(:,:) - end function accuracy - - pure module subroutine backprop(self, y, dw, db) - !! Applies a backward propagation through the network - !! and returns the weight and bias gradients. - implicit none - class(network_type), intent(in out) :: self - real(rk), intent(in) :: y(:) - type(array2d), allocatable, intent(out) :: dw(:) - type(array1d), allocatable, intent(out) :: db(:) - end subroutine backprop - - - pure module subroutine fwdprop(self, x) - !! Performs the forward propagation and stores arguments to activation - !! functions and activations themselves for use in backprop. - implicit none - class(network_type), intent(in out) :: self - real(rk), intent(in) :: x(:) - end subroutine fwdprop - - module subroutine init(self, dims) - !! Allocates and initializes the layers with given dimensions dims. - implicit none - class(network_type), intent(in out) :: self - integer(ik), intent(in) :: dims(:) - end subroutine init - - - module subroutine load(self, filename) - !! Loads the network from file. - implicit none - class(network_type), intent(in out) :: self - character(len=*), intent(in) :: filename - end subroutine load - - - pure module real(rk) function loss(self, x, y) - !! Given input x and expected output y, returns the loss of the network. - implicit none - class(network_type), intent(in) :: self - real(rk), intent(in) :: x(:), y(:) - end function loss - - - pure module function output_single(self, x) result(a) - !! Use forward propagation to compute the output of the network. - !! This specific procedure is for a single sample of 1-d input data. - implicit none - class(network_type), intent(in) :: self - real(rk), intent(in) :: x(:) - real(rk), allocatable :: a(:) - end function output_single - - - pure module function output_batch(self, x) result(a) - !! Use forward propagation to compute the output of the network. - !! This specific procedure is for a batch of 1-d input data. - implicit none - class(network_type), intent(in) :: self - real(rk), intent(in) :: x(:,:) - real(rk), allocatable :: a(:,:) - end function output_batch - - module subroutine save(self, filename) - !! Saves the network to a file. - implicit none - class(network_type), intent(in out) :: self - character(len=*), intent(in) :: filename - end subroutine save - - - pure module subroutine set_activation_equal(self, activation) - !! A thin wrapper around layer % set_activation(). - !! This method can be used to set an activation function - !! for all layers at once. - implicit none - class(network_type), intent(in out) :: self - character(len=*), intent(in) :: activation - end subroutine set_activation_equal - - - pure module subroutine set_activation_layers(self, activation) - !! A thin wrapper around layer % set_activation(). - !! This method can be used to set different activation functions - !! for each layer separately. - implicit none - class(network_type), intent(in out) :: self - character(len=*), intent(in) :: activation(size(self % layers)) - end subroutine set_activation_layers - - module subroutine sync(self, image) - !! Broadcasts network weights and biases from - !! specified image to all others. - implicit none - class(network_type), intent(in out) :: self - integer(ik), intent(in) :: image - end subroutine sync - - - module subroutine train_batch(self, x, y, eta) - !! Trains a network using input data x and output data y, - !! and learning rate eta. The learning rate is normalized - !! with the size of the data batch. - implicit none - class(network_type), intent(in out) :: self - real(rk), intent(in) :: x(:,:), y(:,:), eta - end subroutine train_batch - - - module subroutine train_epochs(self, x, y, eta, num_epochs, batch_size) - !! Trains for num_epochs epochs with mini-bachtes of size equal to batch_size. - implicit none - class(network_type), intent(in out) :: self - integer(ik), intent(in) :: num_epochs, batch_size - real(rk), intent(in) :: x(:,:), y(:,:), eta - end subroutine train_epochs - - - pure module subroutine train_single(self, x, y, eta) - !! Trains a network using a single set of input data x and output data y, - !! and learning rate eta. - implicit none - class(network_type), intent(in out) :: self - real(rk), intent(in) :: x(:), y(:), eta - end subroutine train_single - - - pure module subroutine update(self, dw, db, eta) - !! Updates network weights and biases with gradients dw and db, - !! scaled by learning rate eta. - implicit none - class(network_type), intent(in out) :: self - class(array2d), intent(in) :: dw(:) - class(array1d), intent(in) :: db(:) - real(rk), intent(in) :: eta - end subroutine update - - end interface - -end module mod_network diff --git a/src/mod_network_submodule.f90 b/src/mod_network_submodule.f90 deleted file mode 100644 index 30757661..00000000 --- a/src/mod_network_submodule.f90 +++ /dev/null @@ -1,296 +0,0 @@ -submodule(mod_network) mod_network_submodule - - use mod_kinds, only: ik, rk - use mod_layer, only: db_init, dw_init, db_co_sum, dw_co_sum - use mod_parallel, only: tile_indices - - implicit none - -contains - - module function net_constructor(dims, activation) result(net) - integer(ik), intent(in) :: dims(:) - character(len=*), intent(in), optional :: activation - type(network_type) :: net - call net % init(dims) - if (present(activation)) then - call net % set_activation(activation) - else - call net % set_activation('sigmoid') - end if - call net % sync(1) - end function net_constructor - - pure real(rk) module function accuracy(self, x, y) - class(network_type), intent(in) :: self - real(rk), intent(in) :: x(:,:), y(:,:) - integer(ik) :: i, good - good = 0 - do i = 1, size(x, dim=2) - if (all(maxloc(self % output(x(:,i))) == maxloc(y(:,i)))) then - good = good + 1 - end if - end do - accuracy = real(good, kind=rk) / size(x, dim=2) - end function accuracy - - - pure module subroutine backprop(self, y, dw, db) - class(network_type), intent(in out) :: self - real(rk), intent(in) :: y(:) - type(array2d), allocatable, intent(out) :: dw(:) - type(array1d), allocatable, intent(out) :: db(:) - integer(ik) :: n, nm - - associate(dims => self % dims, layers => self % layers) - - call db_init(db, dims) - call dw_init(dw, dims) - - n = size(dims) - db(n) % array = (layers(n) % a - y) * self % layers(n) % activation_prime(layers(n) % z) - dw(n-1) % array = matmul(reshape(layers(n-1) % a, [dims(n-1), 1]),& - reshape(db(n) % array, [1, dims(n)])) - - do n = size(dims) - 1, 2, -1 - db(n) % array = matmul(layers(n) % w, db(n+1) % array)& - * self % layers(n) % activation_prime(layers(n) % z) - dw(n-1) % array = matmul(reshape(layers(n-1) % a, [dims(n-1), 1]),& - reshape(db(n) % array, [1, dims(n)])) - end do - - end associate - - end subroutine backprop - - - pure module subroutine fwdprop(self, x) - class(network_type), intent(in out) :: self - real(rk), intent(in) :: x(:) - integer(ik) :: n - associate(layers => self % layers) - layers(1) % a = x - do n = 2, size(layers) - layers(n) % z = matmul(transpose(layers(n-1) % w), layers(n-1) % a) + layers(n) % b - layers(n) % a = self % layers(n) % activation(layers(n) % z) - end do - end associate - end subroutine fwdprop - - - module subroutine init(self, dims) - class(network_type), intent(in out) :: self - integer(ik), intent(in) :: dims(:) - integer(ik) :: n - self % dims = dims - if (.not. allocated(self % layers)) allocate(self % layers(size(dims))) - do n = 1, size(dims) - 1 - self % layers(n) = layer_type(dims(n), dims(n+1)) - end do - self % layers(n) = layer_type(dims(n), 1) - self % layers(1) % b = 0 - self % layers(size(dims)) % w = 0 - end subroutine init - - - module subroutine load(self, filename) - class(network_type), intent(in out) :: self - character(len=*), intent(in) :: filename - integer(ik) :: fileunit, n, num_layers, layer_idx - integer(ik), allocatable :: dims(:) - character(len=100) :: buffer !! activation string - open(newunit=fileunit, file=filename, status='old', action='read') - read(fileunit, *) num_layers - allocate(dims(num_layers)) - read(fileunit, *) dims - call self % init(dims) - do n = 1, num_layers - read(fileunit, *) layer_idx, buffer - call self % layers(layer_idx) % set_activation(trim(buffer)) - end do - do n = 2, size(self % dims) - read(fileunit, *) self % layers(n) % b - end do - do n = 1, size(self % dims) - 1 - read(fileunit, *) self % layers(n) % w - end do - close(fileunit) - end subroutine load - - - pure real(rk) module function loss(self, x, y) - class(network_type), intent(in) :: self - real(rk), intent(in) :: x(:), y(:) - loss = 0.5 * sum((y - self % output(x))**2) / size(x) - end function loss - - - pure module function output_single(self, x) result(a) - class(network_type), intent(in) :: self - real(rk), intent(in) :: x(:) - real(rk), allocatable :: a(:) - integer(ik) :: n - associate(layers => self % layers) - a = self % layers(2) % activation(matmul(transpose(layers(1) % w), x) + layers(2) % b) - do n = 3, size(layers) - a = self % layers(n) % activation(matmul(transpose(layers(n-1) % w), a) + layers(n) % b) - end do - end associate - end function output_single - - - pure module function output_batch(self, x) result(a) - class(network_type), intent(in) :: self - real(rk), intent(in) :: x(:,:) - real(rk), allocatable :: a(:,:) - integer(ik) :: i - allocate(a(self % dims(size(self % dims)), size(x, dim=2))) - do i = 1, size(x, dim=2) - a(:,i) = self % output_single(x(:,i)) - end do - end function output_batch - - - module subroutine save(self, filename) - class(network_type), intent(in out) :: self - character(len=*), intent(in) :: filename - integer(ik) :: fileunit, n - open(newunit=fileunit, file=filename) - write(fileunit, fmt=*) size(self % dims) - write(fileunit, fmt=*) self % dims - do n = 1, size(self % dims) - write(fileunit, fmt=*) n, self % layers(n) % activation_str - end do - do n = 2, size(self % dims) - write(fileunit, fmt=*) self % layers(n) % b - end do - do n = 1, size(self % dims) - 1 - write(fileunit, fmt=*) self % layers(n) % w - end do - close(fileunit) - end subroutine save - - - pure module subroutine set_activation_equal(self, activation) - class(network_type), intent(in out) :: self - character(len=*), intent(in) :: activation - call self % layers(:) % set_activation(activation) - end subroutine set_activation_equal - - - pure module subroutine set_activation_layers(self, activation) - class(network_type), intent(in out) :: self - character(len=*), intent(in) :: activation(size(self % layers)) - call self % layers(:) % set_activation(activation) - end subroutine set_activation_layers - - module subroutine sync(self, image) - class(network_type), intent(in out) :: self - integer(ik), intent(in) :: image - integer(ik) :: n - if (num_images() == 1) return - layers: do n = 1, size(self % dims) - call co_broadcast(self % layers(n) % b, image) - call co_broadcast(self % layers(n) % w, image) - end do layers - end subroutine sync - - module subroutine train_batch(self, x, y, eta) - class(network_type), intent(in out) :: self - real(rk), intent(in) :: x(:,:), y(:,:), eta - type(array1d), allocatable :: db(:), db_batch(:) - type(array2d), allocatable :: dw(:), dw_batch(:) - integer(ik) :: i, im, n, nm - integer(ik) :: is, ie, indices(2) - - im = size(x, dim=2) ! mini-batch size - nm = size(self % dims) ! number of layers - - ! get start and end index for mini-batch - indices = tile_indices(im) - is = indices(1) - ie = indices(2) - - call db_init(db_batch, self % dims) - call dw_init(dw_batch, self % dims) - - do concurrent(i = is:ie) - call self % fwdprop(x(:,i)) - call self % backprop(y(:,i), dw, db) - do concurrent(n = 1:nm) - dw_batch(n) % array = dw_batch(n) % array + dw(n) % array - db_batch(n) % array = db_batch(n) % array + db(n) % array - end do - end do - - if (num_images() > 1) then - call dw_co_sum(dw_batch) - call db_co_sum(db_batch) - end if - - call self % update(dw_batch, db_batch, eta / im) - - end subroutine train_batch - - module subroutine train_epochs(self, x, y, eta, num_epochs, batch_size) - class(network_type), intent(in out) :: self - integer(ik), intent(in) :: num_epochs, batch_size - real(rk), intent(in) :: x(:,:), y(:,:), eta - - integer(ik) :: i, n, nsamples, nbatch - integer(ik) :: batch_start, batch_end - - real(rk) :: pos - - nsamples = size(y, dim=2) - nbatch = nsamples / batch_size - - epochs: do n = 1, num_epochs - batches: do i = 1, nbatch - - !pull a random mini-batch from the dataset - call random_number(pos) - batch_start = int(pos * (nsamples - batch_size + 1)) - if (batch_start == 0) batch_start = 1 - batch_end = batch_start + batch_size - 1 - - call self % train(x(:,batch_start:batch_end), y(:,batch_start:batch_end), eta) - - end do batches - end do epochs - - end subroutine train_epochs - - - pure module subroutine train_single(self, x, y, eta) - class(network_type), intent(in out) :: self - real(rk), intent(in) :: x(:), y(:), eta - type(array2d), allocatable :: dw(:) - type(array1d), allocatable :: db(:) - call self % fwdprop(x) - call self % backprop(y, dw, db) - call self % update(dw, db, eta) - end subroutine train_single - - - pure module subroutine update(self, dw, db, eta) - class(network_type), intent(in out) :: self - class(array2d), intent(in) :: dw(:) - class(array1d), intent(in) :: db(:) - real(rk), intent(in) :: eta - integer(ik) :: n - - associate(layers => self % layers, nm => size(self % dims)) - ! update biases - do concurrent(n = 2:nm) - layers(n) % b = layers(n) % b - eta * db(n) % array - end do - ! update weights - do concurrent(n = 1:nm-1) - layers(n) % w = layers(n) % w - eta * dw(n) % array - end do - end associate - - end subroutine update - -end submodule mod_network_submodule diff --git a/src/mod_parallel_submodule.f90 b/src/mod_parallel_submodule.f90 deleted file mode 100644 index 9f7cb9ce..00000000 --- a/src/mod_parallel_submodule.f90 +++ /dev/null @@ -1,28 +0,0 @@ -submodule(mod_parallel) mod_parallel_submodule - - use mod_kinds, only: ik, rk - implicit none - -contains - - pure module function tile_indices(dims) - integer(ik), intent(in) :: dims - integer(ik) :: tile_indices(2) - integer(ik) :: offset, tile_size - - tile_size = dims / num_images() - - !! start and end indices assuming equal tile sizes - tile_indices(1) = (this_image() - 1) * tile_size + 1 - tile_indices(2) = tile_indices(1) + tile_size - 1 - - !! if we have any remainder, distribute it to the tiles at the end - offset = num_images() - mod(dims, num_images()) - if (this_image() > offset) then - tile_indices(1) = tile_indices(1) + this_image() - offset - 1 - tile_indices(2) = tile_indices(2) + this_image() - offset - end if - - end function tile_indices - -end submodule mod_parallel_submodule diff --git a/src/mod_random_submodule.f90 b/src/mod_random_submodule.f90 deleted file mode 100644 index c75e4e7c..00000000 --- a/src/mod_random_submodule.f90 +++ /dev/null @@ -1,24 +0,0 @@ -submodule(mod_random) mod_random_submodule - implicit none - - real(rk), parameter :: pi = 4 * atan(1._rk) - -contains - - module function randn1d(n) result(r) - integer(ik), intent(in) :: n - real(rk) :: r(n), r2(n) - call random_number(r) - call random_number(r2) - r = sqrt(-2 * log(r)) * cos(2 * pi * r2) - end function randn1d - - module function randn2d(m, n) result(r) - integer(ik), intent(in) :: m, n - real(rk) :: r(m, n), r2(m, n) - call random_number(r) - call random_number(r2) - r = sqrt(-2 * log(r)) * cos(2 * pi * r2) - end function randn2d - -end submodule mod_random_submodule diff --git a/src/nf.f90 b/src/nf.f90 new file mode 100644 index 00000000..ca71a2c5 --- /dev/null +++ b/src/nf.f90 @@ -0,0 +1,6 @@ +module nf + use nf_datasets_mnist, only: label_digits, load_mnist + use nf_layer, only: layer + use nf_layer_constructors, only: dense, input + use nf_network, only: network +end module nf diff --git a/src/nf_activation.f90 b/src/nf_activation.f90 new file mode 100644 index 00000000..50f9362c --- /dev/null +++ b/src/nf_activation.f90 @@ -0,0 +1,155 @@ +module nf_activation + + ! A collection of activation functions and their derivatives. + + implicit none + + private + + public :: activation_function + public :: elu, elu_prime + public :: exponential + public :: gaussian, gaussian_prime + public :: relu, relu_prime + public :: sigmoid, sigmoid_prime + public :: softplus, softplus_prime + public :: step, step_prime + public :: tanhf, tanh_prime + + interface + pure function activation_function(x) + real, intent(in) :: x(:) + real :: activation_function(size(x)) + end function activation_function + end interface + +contains + + pure function elu(x, alpha) result(res) + ! Exponential Linear Unit (ELU) activation function. + real, intent(in) :: x(:) + real, intent(in) :: alpha + real :: res(size(x)) + where (x >= 0) + res = x + elsewhere + res = alpha * (exp(x) - 1) + end where + end function elu + + pure function elu_prime(x, alpha) result(res) + ! First derivative of the Exponential Linear Unit (ELU) + ! activation function. + real, intent(in) :: x(:) + real, intent(in) :: alpha + real :: res(size(x)) + where (x >= 0) + res = 1 + elsewhere + res = alpha * exp(x) + end where + end function elu_prime + + pure function exponential(x) result(res) + ! Exponential activation function. + real, intent(in) :: x(:) + real :: res(size(x)) + res = exp(x) + end function exponential + + pure function gaussian(x) result(res) + ! Gaussian activation function. + real, intent(in) :: x(:) + real :: res(size(x)) + res = exp(-x**2) + end function gaussian + + pure function gaussian_prime(x) result(res) + ! First derivative of the Gaussian activation function. + real, intent(in) :: x(:) + real :: res(size(x)) + res = -2 * x * gaussian(x) + end function gaussian_prime + + pure function relu(x) result(res) + !! Rectified Linear Unit (ReLU) activation function. + real, intent(in) :: x(:) + real :: res(size(x)) + res = max(0., x) + end function relu + + pure function relu_prime(x) result(res) + ! First derivative of the Rectified Linear Unit (ReLU) activation function. + real, intent(in) :: x(:) + real :: res(size(x)) + where (x > 0) + res = 1 + elsewhere + res = 0 + end where + end function relu_prime + + pure function sigmoid(x) result(res) + ! Sigmoid activation function. + real, intent(in) :: x(:) + real :: res(size(x)) + res = 1 / (1 + exp(-x)) + endfunction sigmoid + + pure function sigmoid_prime(x) result(res) + ! First derivative of the sigmoid activation function. + real, intent(in) :: x(:) + real :: res(size(x)) + res = sigmoid(x) * (1 - sigmoid(x)) + end function sigmoid_prime + + pure function softplus(x) result(res) + ! Softplus activation function. + real, intent(in) :: x(:) + real :: res(size(x)) + res = log(exp(x) + 1) + end function softplus + + pure function softplus_prime(x) result(res) + ! First derivative of the softplus activation function. + real, intent(in) :: x(:) + real :: res(size(x)) + res = exp(x) / (exp(x) + 1) + end function softplus_prime + + pure function step(x) result(res) + ! Step activation function. + real, intent(in) :: x(:) + real :: res(size(x)) + where (x > 0) + res = 1 + elsewhere + res = 0 + end where + end function step + + pure function step_prime(x) result(res) + ! First derivative of the step activation function. + real, intent(in) :: x(:) + real :: res(size(x)) + res = 0 + end function step_prime + + pure function tanhf(x) result(res) + ! Tangent hyperbolic activation function. + ! Same as the intrinsic tanh, but must be + ! defined here so that we can use procedure + ! pointer with it. + real, intent(in) :: x(:) + real :: res(size(x)) + res = tanh(x) + end function tanhf + + pure function tanh_prime(x) result(res) + ! First derivative of the tanh activation function. + real, intent(in) :: x(:) + real :: res(size(x)) + res = 1 - tanh(x)**2 + end function tanh_prime + +end module nf_activation diff --git a/src/nf_base_layer.f90 b/src/nf_base_layer.f90 new file mode 100644 index 00000000..9a5ae1d9 --- /dev/null +++ b/src/nf_base_layer.f90 @@ -0,0 +1,53 @@ +module nf_base_layer + + !! This module provides the abstract base layer type, to be extended by + !! specific concrete types. + + use nf_activation, only: activation_function + + implicit none + + private + public :: base_layer + + type, abstract :: base_layer + + !! This type is the base for creating concrete layer instances. + !! Extend this type when creating other concrete layer types. + + character(:), allocatable :: activation_name + procedure(activation_function), pointer, nopass :: & + activation => null() + procedure(activation_function), pointer, nopass :: & + activation_prime => null() + + contains + + procedure(init_interface), deferred :: init + procedure :: set_activation + + end type base_layer + + abstract interface + subroutine init_interface(self, input_shape) + !! Initialize the internal layer data structures. + import :: base_layer + class(base_layer), intent(in out) :: self + !! Layer instance + integer, intent(in) :: input_shape(:) + !! Shape of the input layer, i.e. the layer that precedes + !! this layer + end subroutine init_interface + end interface + + interface + elemental module subroutine set_activation(self, activation) + !! Set the activation functions. + class(base_layer), intent(in out) :: self + !! Layer instance + character(*), intent(in) :: activation + !! String with the activation function name + end subroutine set_activation + end interface + +end module nf_base_layer diff --git a/src/nf_base_layer_submodule.f90 b/src/nf_base_layer_submodule.f90 new file mode 100644 index 00000000..99f1532c --- /dev/null +++ b/src/nf_base_layer_submodule.f90 @@ -0,0 +1,73 @@ +submodule(nf_base_layer) nf_base_layer_submodule + + use nf_activation, only: activation_function, & + elu, elu_prime, & + exponential, & + gaussian, gaussian_prime, & + relu, relu_prime, & + sigmoid, sigmoid_prime, & + softplus, softplus_prime, & + step, step_prime, & + tanhf, tanh_prime + + implicit none + +contains + + elemental module subroutine set_activation(self, activation) + class(base_layer), intent(in out) :: self + character(*), intent(in) :: activation + + select case(trim(activation)) + + ! TODO need to figure out how to handle the alpha param + !case('elu') + ! self % activation => elu + ! self % activation_prime => elu_prime + ! self % activation_name = 'elu' + + case('exponential') + self % activation => exponential + self % activation_prime => exponential + self % activation_name = 'exponential' + + case('gaussian') + self % activation => gaussian + self % activation_prime => gaussian_prime + self % activation_name = 'gaussian' + + case('relu') + self % activation => relu + self % activation_prime => relu_prime + self % activation_name = 'relu' + + case('sigmoid') + self % activation => sigmoid + self % activation_prime => sigmoid_prime + self % activation_name = 'sigmoid' + + case('softplus') + self % activation => softplus + self % activation_prime => softplus_prime + self % activation_name = 'softplus' + + case('step') + self % activation => step + self % activation_prime => step_prime + self % activation_name = 'step' + + case('tanh') + self % activation => tanhf + self % activation_prime => tanh_prime + self % activation_name = 'tanh' + + case default + error stop 'Activation must be one of: ' // & + '"elu", "exponential", "gaussian", "relu", ' // & + '"sigmoid", "softplus", "step", or "tanh".' + + end select + + end subroutine set_activation + +end submodule nf_base_layer_submodule diff --git a/src/nf_conv2d_layer.f90 b/src/nf_conv2d_layer.f90 new file mode 100644 index 00000000..455a811a --- /dev/null +++ b/src/nf_conv2d_layer.f90 @@ -0,0 +1,84 @@ +module nf_conv2d_layer + + !! This is a placeholder module that will later define a concrete conv2d + !! layer type. + + use nf_base_layer, only: base_layer + implicit none + + private + public :: conv2d_layer + + type, extends(base_layer) :: conv2d_layer + + integer :: width + integer :: height + integer :: channels + integer :: window_size + integer :: filters + + real, allocatable :: biases(:) ! as many as there are filters + real, allocatable :: kernel(:,:,:,:) + real, allocatable :: output(:,:,:) + + contains + + procedure :: init + procedure :: forward + procedure :: backward + + end type conv2d_layer + + interface conv2d_layer + module procedure :: conv2d_layer_cons + end interface conv2d_layer + +contains + + pure function conv2d_layer_cons(window_size, filters, activation) result(res) + integer, intent(in) :: window_size + integer, intent(in) :: filters + character(*), intent(in) :: activation + type(conv2d_layer) :: res + res % window_size = window_size + res % filters = filters + call res % set_activation(activation) + end function conv2d_layer_cons + + + subroutine init(self, input_shape) + class(conv2d_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + + self % width = input_shape(1) - self % window_size + 1 + self % height = input_shape(2) - self % window_size + 1 + self % channels = input_shape(3) + + allocate(self % output(self % width, self % height, self % filters)) + self % output = 0 + + allocate(self % kernel(self % window_size, self % window_size, & + self % channels, self % filters)) + self % kernel = 0 ! TODO 4-d randn + + allocate(self % biases(self % filters)) + self % biases = 0 + + end subroutine init + + + subroutine forward(self, input) + class(conv2d_layer), intent(in out) :: self + real, intent(in) :: input(:,:,:) + print *, 'Warning: conv2d forward pass not implemented' + end subroutine forward + + + subroutine backward(self, input, gradient) + class(conv2d_layer), intent(in out) :: self + real, intent(in) :: input(:,:,:) + real, intent(in) :: gradient(:,:,:) + print *, 'Warning: conv2d backward pass not implemented' + end subroutine backward + +end module nf_conv2d_layer diff --git a/src/nf_datasets_mnist.f90 b/src/nf_datasets_mnist.f90 new file mode 100644 index 00000000..574679fa --- /dev/null +++ b/src/nf_datasets_mnist.f90 @@ -0,0 +1,47 @@ +module nf_datasets_mnist + + !! Procedures to work with MNIST dataset, usable with data format + !! as provided in this repo and not the original data format (idx). + + implicit none + + private + public :: label_digits, load_mnist, print_image + + interface + + pure module function label_digits(labels) result(res) + !! Converts an array of individual MNIST labels (e.g. 3) + !! into a form that can be used to evaluate against dense layer output, + !! e.g. [0, 0, 0, 1, 0, 0, 0, 0, 0]. + implicit none + real, intent(in) :: labels(:) + !! Array of labels with single digit values in the range 0-9 + real :: res(10, size(labels)) + !! 10-element array of zeros and a single one indicating the digit + end function label_digits + + module subroutine load_mnist(training_images, training_labels, & + validation_images, validation_labels, & + testing_images, testing_labels) + !! Loads the MNIST dataset into arrays. + implicit none + real, allocatable, intent(in out) :: training_images(:,:) + real, allocatable, intent(in out) :: training_labels(:) + real, allocatable, intent(in out) :: validation_images(:,:) + real, allocatable, intent(in out) :: validation_labels(:) + real, allocatable, intent(in out), optional :: testing_images(:,:) + real, allocatable, intent(in out), optional :: testing_labels(:) + end subroutine load_mnist + + module subroutine print_image(images, labels, n) + !! Print a single image and label to the screen. + implicit none + real, intent(in) :: images(:,:) + real, intent(in) :: labels(:) + integer, intent(in) :: n + end subroutine print_image + + end interface + +end module nf_datasets_mnist diff --git a/src/nf_datasets_mnist_submodule.f90 b/src/nf_datasets_mnist_submodule.f90 new file mode 100644 index 00000000..e1ff0dfa --- /dev/null +++ b/src/nf_datasets_mnist_submodule.f90 @@ -0,0 +1,132 @@ +submodule(nf_datasets_mnist) nf_datasets_mnist_submodule + + use nf_io, only: read_binary_file + + implicit none + + integer, parameter :: message_len = 128 + +contains + + subroutine download_and_uncompress() + character(*), parameter :: download_mechanism = 'curl -LO ' + character(*), parameter :: base_url='https://github.com/modern-fortran/neural-fortran/files/8498876/' + character(*), parameter :: download_filename = 'mnist.tar.gz' + character(*), parameter :: download_command = download_mechanism // base_url // download_filename + character(*), parameter :: uncompress_file = 'tar xvzf ' // download_filename + character(message_len) :: command_message + character(:), allocatable :: error_message + integer :: exit_status, command_status + + exit_status=0 + call execute_command_line(command=download_command, wait=.true., & + exitstat=exit_status, cmdstat=command_status, cmdmsg=command_message) + + if (any([exit_status, command_status] /= 0)) then + error_message = 'command "' // download_command // '" failed' + if (command_status /= 0) error_message = error_message // " with message " // trim(command_message) + error stop error_message + end if + + call execute_command_line(command=uncompress_file, wait=.true., & + exitstat=exit_status, cmdstat=command_status, cmdmsg=command_message) + + if (any([exit_status, command_status] /= 0)) then + error_message = 'command "' // uncompress_file // '" failed' + if (command_status /= 0) error_message = error_message // " with message " // trim(command_message) + error stop error_message + end if + + end subroutine download_and_uncompress + + + pure module function label_digits(labels) result(res) + real, intent(in) :: labels(:) + real :: res(10, size(labels)) + integer :: i + do i = 1, size(labels) + res(:,i) = digits(labels(i)) + end do + contains + pure function digits(x) + !! Returns an array of 10 reals, with zeros everywhere + !! and a one corresponding to the input digit. + !! + !! Example + !! + !! ``` + !! digits(0) = [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.] + !! digits(1) = [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.] + !! digits(6) = [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.] + !! ``` + real, intent(in) :: x + !! Input digit (0-9) + real :: digits(10) + !! 10-element array of zeros with a single one + !! indicating the input digit + digits = 0 + digits(int(x + 1)) = 1 + end function digits + end function label_digits + + + module subroutine load_mnist(training_images, training_labels, & + validation_images, validation_labels, & + testing_images, testing_labels) + real, allocatable, intent(in out) :: training_images(:,:) + real, allocatable, intent(in out) :: training_labels(:) + real, allocatable, intent(in out) :: validation_images(:,:) + real, allocatable, intent(in out) :: validation_labels(:) + real, allocatable, intent(in out), optional :: testing_images(:,:) + real, allocatable, intent(in out), optional :: testing_labels(:) + + integer, parameter :: dtype = 4, image_size = 784 + integer, parameter :: num_training_images = 50000 + integer, parameter :: num_validation_images = 10000 + integer, parameter :: num_testing_images = 10000 + logical :: file_exists + + ! Check if MNIST data is present and download it if not. + inquire(file='mnist_training_images.dat', exist=file_exists) + if (.not. file_exists) call download_and_uncompress() + + ! Load the training dataset (50000 samples) + call read_binary_file('mnist_training_images.dat', & + dtype, image_size, num_training_images, training_images) + call read_binary_file('mnist_training_labels.dat', & + dtype, num_training_images, training_labels) + + ! Load the validation dataset (10000 samples), for use while training + call read_binary_file('mnist_validation_images.dat', & + dtype, image_size, num_validation_images, validation_images) + call read_binary_file('mnist_validation_labels.dat', & + dtype, num_validation_images, validation_labels) + + ! Load the testing dataset (10000 samples), to test after training + if (present(testing_images) .and. present(testing_labels)) then + call read_binary_file('mnist_testing_images.dat', & + dtype, image_size, num_testing_images, testing_images) + call read_binary_file('mnist_testing_labels.dat', & + dtype, num_testing_images, testing_labels) + end if + + end subroutine load_mnist + + + module subroutine print_image(images, labels, n) + real, intent(in) :: images(:,:) + real, intent(in) :: labels(:) + integer, intent(in) :: n + real :: image(28, 28) + character :: char_image(28, 28) + integer i, j + image = reshape(images(:,n), [28, 28]) + char_image = '.' + where (image > 0) char_image = '#' + print *, labels(n) + do j = 1, 28 + print *, char_image(:,j) + end do + end subroutine print_image + +end submodule nf_datasets_mnist_submodule diff --git a/src/nf_dense_layer.f90 b/src/nf_dense_layer.f90 new file mode 100644 index 00000000..2c31898e --- /dev/null +++ b/src/nf_dense_layer.f90 @@ -0,0 +1,97 @@ +module nf_dense_layer + + !! This module provides the concrete dense layer type. + !! It is used internally by the layer type. + !! It is not intended to be used directly by the user. + + use nf_activation, only: activation_function + use nf_base_layer, only: base_layer + + implicit none + + private + public :: dense_layer + + type, extends(base_layer) :: dense_layer + + !! Concrete implementation of a dense (fully-connected) layer type + + integer :: input_size + integer :: output_size + + real, allocatable :: weights(:,:) + real, allocatable :: biases(:) + real, allocatable :: z(:) ! matmul(x, w) + b + real, allocatable :: output(:) ! activation(z) + real, allocatable :: gradient(:) ! matmul(w, db) + real, allocatable :: dw(:,:) ! weight gradients + real, allocatable :: db(:) ! bias gradients + + contains + + procedure :: backward + procedure :: forward + procedure :: init + procedure :: update + + end type dense_layer + + interface dense_layer + elemental module function dense_layer_cons(output_size, activation) & + result(res) + !! This function returns the `dense_layer` instance. + integer, intent(in) :: output_size + !! Number of neurons in this layer + character(*), intent(in) :: activation + !! Name of the activation function to use; + !! See nf_activation.f90 for available functions. + type(dense_layer) :: res + !! dense_layer instance + end function dense_layer_cons + end interface dense_layer + + interface + + pure module subroutine backward(self, input, gradient) + !! Apply the backward gradient descent pass. + !! Only weight and bias gradients are updated in this subroutine, + !! while the weights and biases themselves are untouched. + class(dense_layer), intent(in out) :: self + !! Dense layer instance + real, intent(in) :: input(:) + !! Input from the previous layer + real, intent(in) :: gradient(:) + !! Gradient from the next layer + end subroutine backward + + pure module subroutine forward(self, input) + !! Propagate forward the layer. + !! Calling this subroutine updates the values of a few data components + !! of `dense_layer` that are needed for the backward pass. + class(dense_layer), intent(in out) :: self + !! Dense layer instance + real, intent(in) :: input(:) + !! Input from the previous layer + end subroutine forward + + module subroutine init(self, input_shape) + !! Initialize the layer data structures. + !! + !! This is a deferred procedure from the `base_layer` abstract type. + class(dense_layer), intent(in out) :: self + !! Dense layer instance + integer, intent(in) :: input_shape(:) + !! Shape of the input layer + end subroutine init + + module subroutine update(self, learning_rate) + !! Update the weights and biases. + class(dense_layer), intent(in out) :: self + !! Dense layer instance + real, intent(in) :: learning_rate + !! Learning rate (must be > 0) + end subroutine update + + end interface + +end module nf_dense_layer diff --git a/src/nf_dense_layer_submodule.f90 b/src/nf_dense_layer_submodule.f90 new file mode 100644 index 00000000..ef6aefcf --- /dev/null +++ b/src/nf_dense_layer_submodule.f90 @@ -0,0 +1,97 @@ +submodule(nf_dense_layer) nf_dense_layer_submodule + + use nf_base_layer, only: base_layer + use nf_random, only: randn + + implicit none + +contains + + elemental module function dense_layer_cons(output_size, activation) & + result(res) + integer, intent(in) :: output_size + character(*), intent(in) :: activation + type(dense_layer) :: res + res % output_size = output_size + call res % set_activation(activation) + end function dense_layer_cons + + + pure module subroutine backward(self, input, gradient) + class(dense_layer), intent(in out) :: self + real, intent(in) :: input(:) + real, intent(in) :: gradient(:) + real :: db(self % output_size) + real :: dw(self % input_size, self % output_size) + + db = gradient * self % activation_prime(self % z) + dw = matmul(reshape(input, [size(input), 1]), reshape(db, [1, size(db)])) + self % gradient = matmul(self % weights, db) + self % dw = self % dw + dw + self % db = self % db + db + + end subroutine backward + + + pure module subroutine forward(self, input) + class(dense_layer), intent(in out) :: self + real, intent(in) :: input(:) + + self % z = matmul(input, self % weights) + self % biases + self % output = self % activation(self % z) + + end subroutine forward + + + module subroutine init(self, input_shape) + class(dense_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + + self % input_size = input_shape(1) + + ! Weights are a 2-d array of shape previous layer size + ! times this layer size. + allocate(self % weights(self % input_size, self % output_size)) + self % weights = randn(self % input_size, self % output_size) & + / self % input_size + + ! Broadcast weights to all other images, if any. + call co_broadcast(self % weights, 1) + + allocate(self % biases(self % output_size)) + self % biases = 0 + + allocate(self % output(self % output_size)) + self % output = 0 + + allocate(self % z(self % output_size)) + self % z = 0 + + allocate(self % dw(self % input_size, self % output_size)) + self % dw = 0 + + allocate(self % db(self % output_size)) + self % db = 0 + + allocate(self % gradient(self % output_size)) + self % gradient = 0 + + end subroutine init + + + module subroutine update(self, learning_rate) + class(dense_layer), intent(in out) :: self + real, intent(in) :: learning_rate + + ! Sum weight and bias gradients across images, if any + call co_sum(self % dw) + call co_sum(self % db) + + self % weights = self % weights - learning_rate * self % dw + self % biases = self % biases - learning_rate * self % db + self % dw = 0 + self % db = 0 + + end subroutine update + +end submodule nf_dense_layer_submodule diff --git a/src/nf_input1d_layer.f90 b/src/nf_input1d_layer.f90 new file mode 100644 index 00000000..ae3be12e --- /dev/null +++ b/src/nf_input1d_layer.f90 @@ -0,0 +1,50 @@ +module nf_input1d_layer + + !! This module provides the `input1d_layer` type. + + use nf_base_layer, only: base_layer + + implicit none + + private + public :: input1d_layer + + type, extends(base_layer) :: input1d_layer + real, allocatable :: output(:) + contains + procedure :: init + procedure :: set + end type input1d_layer + + interface input1d_layer + pure module function input1d_layer_cons(output_size) result(res) + !! Create a new instance of the 1-d input layer. + !! Only used internally by the `layer % init` method. + integer, intent(in) :: output_size + !! Size of the input layer + type(input1d_layer) :: res + !! 1-d input layer instance + end function input1d_layer_cons + end interface input1d_layer + + interface + + module subroutine init(self, input_shape) + !! Only here to satisfy the language rules + !! about deferred methods of abstract types. + !! This method does nothing for this type and should not be called. + class(input1d_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + end subroutine init + + pure module subroutine set(self, values) + !! Set values on this layer. + class(input1d_layer), intent(in out) :: self + !! Layer instance + real, intent(in) :: values(:) + !! Values to set + end subroutine set + + end interface + +end module nf_input1d_layer diff --git a/src/nf_input1d_layer_submodule.f90 b/src/nf_input1d_layer_submodule.f90 new file mode 100644 index 00000000..8fa8c49b --- /dev/null +++ b/src/nf_input1d_layer_submodule.f90 @@ -0,0 +1,23 @@ +submodule(nf_input1d_layer) nf_input1d_layer_submodule + implicit none +contains + + pure module function input1d_layer_cons(output_size) result(res) + integer, intent(in) :: output_size + type(input1d_layer) :: res + allocate(res % output(output_size)) + res % output = 0 + end function input1d_layer_cons + + module subroutine init(self, input_shape) + class(input1d_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + end subroutine init + + pure module subroutine set(self, values) + class(input1d_layer), intent(in out) :: self + real, intent(in) :: values(:) + self % output = values + end subroutine set + +end submodule nf_input1d_layer_submodule diff --git a/src/nf_input3d_layer.f90 b/src/nf_input3d_layer.f90 new file mode 100644 index 00000000..511fa926 --- /dev/null +++ b/src/nf_input3d_layer.f90 @@ -0,0 +1,48 @@ +module nf_input3d_layer + + !! This module provides the `input3d_layer` type. + + use nf_base_layer, only: base_layer + implicit none + + private + public :: input3d_layer + + type, extends(base_layer) :: input3d_layer + real, allocatable :: output(:,:,:) + contains + procedure :: init + procedure :: set + end type input3d_layer + + interface input3d_layer + pure module function input3d_layer_cons(output_shape) result(res) + !! Create a new instance of the 3-d input layer. + !! Only used internally by the `layer % init` method. + integer, intent(in) :: output_shape(3) + !! Shape of the input layer + type(input3d_layer) :: res + !! 3-d input layer instance + end function input3d_layer_cons + end interface input3d_layer + + interface + + module subroutine init(self, input_shape) + !! Only here to satisfy the language rules + !! about deferred methods of abstract types. + !! This method does nothing for this type and should not be called. + class(input3d_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + end subroutine init + + pure module subroutine set(self, values) + class(input3d_layer), intent(in out) :: self + !! Layer instance + real, intent(in) :: values(:,:,:) + !! Values to set + end subroutine set + + end interface + +end module nf_input3d_layer diff --git a/src/nf_input3d_layer_submodule.f90 b/src/nf_input3d_layer_submodule.f90 new file mode 100644 index 00000000..4cfe5126 --- /dev/null +++ b/src/nf_input3d_layer_submodule.f90 @@ -0,0 +1,23 @@ +submodule(nf_input3d_layer) nf_input3d_layer_submodule + implicit none +contains + + pure module function input3d_layer_cons(output_shape) result(res) + integer, intent(in) :: output_shape(3) + type(input3d_layer) :: res + allocate(res % output(output_shape(1), output_shape(2), output_shape(3))) + res % output = 0 + end function input3d_layer_cons + + module subroutine init(self, input_shape) + class(input3d_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + end subroutine init + + pure module subroutine set(self, values) + class(input3d_layer), intent(in out) :: self + real, intent(in) :: values(:,:,:) + self % output = values + end subroutine set + +end submodule nf_input3d_layer_submodule diff --git a/src/nf_io.f90 b/src/nf_io.f90 new file mode 100644 index 00000000..c4f898f1 --- /dev/null +++ b/src/nf_io.f90 @@ -0,0 +1,42 @@ +module nf_io + + !! This module provides subroutines to read binary files using direct access. + + implicit none + + private + public :: read_binary_file + + interface read_binary_file + + module subroutine read_binary_file_1d(filename, dtype, nrec, array) + !! Read a binary file into a 1-d real array using direct access. + implicit none + character(*), intent(in) :: filename + !! Path to the file to read + integer, intent(in) :: dtype + !! Number of bytes per element + integer, intent(in) :: nrec + !! Number of records to read + real, allocatable, intent(in out) :: array(:) + !! Array to store the data in + end subroutine read_binary_file_1d + + module subroutine read_binary_file_2d(filename, dtype, dsize, nrec, array) + !! Read a binary file into a 2-d real array using direct access. + implicit none + character(*), intent(in) :: filename + !! Path to the file to read + integer, intent(in) :: dtype + !! Number of bytes per element + integer, intent(in) :: dsize + !! Number of elements in a record + integer, intent(in) :: nrec + !! Number of records to read + real, allocatable, intent(in out) :: array(:,:) + !! Array to store the data in + end subroutine read_binary_file_2d + + end interface read_binary_file + +end module nf_io diff --git a/src/mod_io_submodule.f90 b/src/nf_io_submodule.f90 similarity index 66% rename from src/mod_io_submodule.f90 rename to src/nf_io_submodule.f90 index 89a69f5f..2ac35ba3 100644 --- a/src/mod_io_submodule.f90 +++ b/src/nf_io_submodule.f90 @@ -1,4 +1,4 @@ -submodule(mod_io) mod_io_submodule +submodule(nf_io) nf_io_submodule implicit none @@ -7,11 +7,11 @@ contains module subroutine read_binary_file_1d(filename, dtype, nrec, array) - character(len=*), intent(in) :: filename - integer(ik), intent(in) :: dtype, nrec - real(rk), allocatable, intent(in out) :: array(:) - integer(ik) :: fileunit - character(len=message_len) :: io_message + character(*), intent(in) :: filename + integer, intent(in) :: dtype, nrec + real, allocatable, intent(in out) :: array(:) + integer :: fileunit + character(message_len) :: io_message integer :: io_status io_status = 0 open(newunit=fileunit, file=filename, access='direct', action='read', & @@ -23,11 +23,11 @@ module subroutine read_binary_file_1d(filename, dtype, nrec, array) end subroutine read_binary_file_1d module subroutine read_binary_file_2d(filename, dtype, dsize, nrec, array) - character(len=*), intent(in) :: filename - integer(ik), intent(in) :: dtype, dsize, nrec - real(rk), allocatable, intent(in out) :: array(:,:) - integer(ik) :: fileunit, i - character(len=message_len) :: io_message + character(*), intent(in) :: filename + integer, intent(in) :: dtype, dsize, nrec + real, allocatable, intent(in out) :: array(:,:) + integer :: fileunit, i + character(message_len) :: io_message integer :: io_status io_status = 0 open(newunit=fileunit, file=filename, access='direct', action='read', & @@ -40,4 +40,4 @@ module subroutine read_binary_file_2d(filename, dtype, dsize, nrec, array) close(fileunit) end subroutine read_binary_file_2d -end submodule mod_io_submodule +end submodule nf_io_submodule diff --git a/src/nf_layer.f90 b/src/nf_layer.f90 new file mode 100644 index 00000000..3f40185d --- /dev/null +++ b/src/nf_layer.f90 @@ -0,0 +1,101 @@ +module nf_layer + + !! This module provides the `layer` type that is part of the public + !! user-facing API. + + use nf_base_layer, only: base_layer + + implicit none + + private + public :: layer + + type :: layer + + !! Main layer type. Use custom constructor functions from + !! nf_layer_constructors.f90 to create `layer` instances. + + class(base_layer), allocatable :: p + character(:), allocatable :: name + character(:), allocatable :: activation + integer, allocatable :: layer_shape(:) + integer, allocatable :: input_layer_shape(:) + logical :: initialized = .false. + + contains + + procedure :: backward + procedure :: forward + procedure :: get_output + procedure :: init + procedure :: print_info + procedure :: update + + end type layer + + interface + + pure module subroutine backward(self, previous, gradient) + !! Apply a backward pass on the layer. + !! This changes the internal state of the layer. + !! This is normally called internally by the `network % backward` + !! method. + class(layer), intent(in out) :: self + !! Layer instance + class(layer), intent(in) :: previous + !! Previous layer instance + real, intent(in) :: gradient(:) + !! Array of gradient values from the next layer + end subroutine backward + + pure module subroutine forward(self, input) + !! Apply a forward pass on the layer. + !! This changes the internal state of the layer. + !! This is normally called internally by the `network % forward` + !! method. + class(layer), intent(in out) :: self + !! Layer instance + class(layer), intent(in) :: input + !! Input layer instance + end subroutine forward + + pure module subroutine get_output(self, output) + !! Returns the output values (activations) from this layer. + class(layer), intent(in) :: self + !! Layer instance + real, allocatable, intent(out) :: output(:) + !! Output values from this layer + end subroutine get_output + + impure elemental module subroutine init(self, input) + !! Initialize the layer, using information from the input layer, + !! i.e. the layer that precedes this one. + class(layer), intent(in out) :: self + !! Layer instance + class(layer), intent(in) :: input + !! Input layer instance + end subroutine init + + impure elemental module subroutine print_info(self) + !! Prints a summary information about this layer to the screen. + !! This method is called by `network % print_info` for all layers + !! on that network. + class(layer), intent(in) :: self + !! Layer instance + end subroutine print_info + + impure elemental module subroutine update(self, learning_rate) + !! Update the weights and biases on the layer using the stored + !! gradients (from backward passes), and flush those same stored + !! gradients to zero. + !! This changes the state of the layer. + !! Typically used only internally from the `network % update` method. + class(layer), intent(in out) :: self + !! Layer instance + real, intent(in) :: learning_rate + !! Learning rate to use; must be > 0. + end subroutine update + + end interface + +end module nf_layer diff --git a/src/nf_layer_constructors.f90 b/src/nf_layer_constructors.f90 new file mode 100644 index 00000000..b20982c2 --- /dev/null +++ b/src/nf_layer_constructors.f90 @@ -0,0 +1,116 @@ +module nf_layer_constructors + + !! This module provides the functions to instantiate specific layers. + + use nf_layer, only: layer + + implicit none + + private + public :: conv2d, dense, input + + interface input + + pure module function input1d(layer_size) result(res) + !! 1-d input layer constructor. + !! + !! This layer is for inputting 1-d data to the network. + !! Currently, this layer must be followed by a dense layer. + !! An input layer must be the first layer in the network. + !! + !! This is a specific function that is available + !! under a generic name `input`. + !! + !! Example: + !! + !! ``` + !! use nf, only :: input, layer + !! type(layer) :: input_layer + !! input_layer = input(768) + !! ``` + integer, intent(in) :: layer_size + !! Size of the input layer + type(layer) :: res + !! Resulting layer instance + end function input1d + + pure module function input3d(layer_shape) result(res) + !! 3-d input layer constructor. + !! + !! This layer is for inputting 3-d data to the network. + !! Currently, this layer must be followed by a conv2d layer. + !! An input layer must be the first layer in the network. + !! + !! This is a specific function that is available + !! under a generic name `input`. + !! + !! Example: + !! + !! ``` + !! use nf, only :: input, layer + !! type(layer) :: input_layer + !! input_layer = input([28, 28, 1]) + !! ``` + integer, intent(in) :: layer_shape(3) + !! Shape of the input layer + type(layer) :: res + !! Resulting layer instance + end function input3d + + end interface input + + interface + + pure module function dense(layer_size, activation) result(res) + !! Dense (fully-connected) layer constructor. + !! + !! This layer is a building block for dense, fully-connected networks, + !! or for an output layer of a convolutional network. + !! A dense layer must not be the first layer in the network. + !! + !! Example: + !! + !! ``` + !! use nf, only :: dense, layer + !! type(layer) :: dense_layer + !! dense_layer = dense(10) + !! dense_layer = dense(10, activation='relu') + !! ``` + integer, intent(in) :: layer_size + !! The number of neurons in a dense layer + character(*), intent(in), optional :: activation + !! Activation function (default 'sigmoid') + type(layer) :: res + !! Resulting layer instance + end function dense + + pure module function conv2d(window_size, filters, activation) result(res) + !! 2-d convolutional layer constructor. + !! + !! This layer is for building 2-d convolutional network. + !! Although the established convention is to call these layers 2-d, + !! the shape of the data is actuall 3-d: image width, image height, + !! and the number of channels. + !! A conv2d layer must not be the first layer in the network. + !! + !! Example: + !! + !! ``` + !! use nf, only :: conv2d, layer + !! type(layer) :: conv2d_layer + !! conv2d_layer = dense(window_size=3, filters=32) + !! conv2d_layer = dense(window_size=3, filters=32, activation='relu') + !! ``` + integer, intent(in) :: window_size + !! Width of the convolution window, commonly 3 or 5 + integer, intent(in) :: filters + !! Number of filters in the output of the layer + character(*), intent(in), optional :: activation + !! Activation function (default 'sigmoid') + type(layer) :: res + !! Resulting layer instance + end function conv2d + + end interface + +end module nf_layer_constructors diff --git a/src/nf_layer_constructors_submodule.f90 b/src/nf_layer_constructors_submodule.f90 new file mode 100644 index 00000000..e51713d2 --- /dev/null +++ b/src/nf_layer_constructors_submodule.f90 @@ -0,0 +1,75 @@ +submodule(nf_layer_constructors) nf_layer_constructors_submodule + + use nf_layer, only: layer + use nf_conv2d_layer, only: conv2d_layer + use nf_dense_layer, only: dense_layer + use nf_input1d_layer, only: input1d_layer + use nf_input3d_layer, only: input3d_layer + + implicit none + +contains + + pure module function input1d(layer_size) result(res) + integer, intent(in) :: layer_size + type(layer) :: res + res % name = 'input' + res % layer_shape = [layer_size] + res % input_layer_shape = [integer ::] + allocate(res % p, source=input1d_layer(layer_size)) + res % initialized = .true. + end function input1d + + + pure module function input3d(layer_shape) result(res) + integer, intent(in) :: layer_shape(3) + type(layer) :: res + res % name = 'input' + res % layer_shape = layer_shape + res % input_layer_shape = [integer ::] + allocate(res % p, source=input3d_layer(layer_shape)) + res % initialized = .true. + end function input3d + + + pure module function dense(layer_size, activation) result(res) + integer, intent(in) :: layer_size + character(*), intent(in), optional :: activation + type(layer) :: res + + res % name = 'dense' + res % layer_shape = [layer_size] + + if (present(activation)) then + res % activation = activation + else + res % activation = 'sigmoid' + end if + + allocate(res % p, source=dense_layer(layer_size, res % activation)) + + end function dense + + + pure module function conv2d(window_size, filters, activation) result(res) + integer, intent(in) :: window_size + integer, intent(in) :: filters + character(*), intent(in), optional :: activation + type(layer) :: res + + res % name = 'conv2d' + + if (present(activation)) then + res % activation = activation + else + res % activation = 'sigmoid' + end if + + allocate( & + res % p, & + source=conv2d_layer(window_size, filters, res % activation) & + ) + + end function conv2d + +end submodule nf_layer_constructors_submodule diff --git a/src/nf_layer_submodule.f90 b/src/nf_layer_submodule.f90 new file mode 100644 index 00000000..ed03b188 --- /dev/null +++ b/src/nf_layer_submodule.f90 @@ -0,0 +1,116 @@ +submodule(nf_layer) nf_layer_submodule + + use nf_conv2d_layer, only: conv2d_layer + use nf_dense_layer, only: dense_layer + use nf_input1d_layer, only: input1d_layer + use nf_input3d_layer, only: input3d_layer + + implicit none + +contains + + pure module subroutine backward(self, previous, gradient) + class(layer), intent(in out) :: self + class(layer), intent(in) :: previous + real, intent(in) :: gradient(:) + + ! Backward pass currently implemented only for dense layers + select type(this_layer => self % p); type is(dense_layer) + + ! Previous layer is the input layer to this layer. + ! For a backward pass on a dense layer, we must accept either an input layer + ! or another dense layer as input. + select type(prev_layer => previous % p) + + type is(input1d_layer) + call this_layer % backward(prev_layer % output, gradient) + type is(dense_layer) + call this_layer % backward(prev_layer % output, gradient) + + end select + end select + + end subroutine backward + + + pure module subroutine forward(self, input) + class(layer), intent(in out) :: self + class(layer), intent(in) :: input + + select type(this_layer => self % p) + + ! Only dense layer is supported for now + type is(dense_layer) + + ! Input layers permitted: input1d, dense + select type(prev_layer => input % p) + type is(input1d_layer) + call this_layer % forward(prev_layer % output) + type is(dense_layer) + call this_layer % forward(prev_layer % output) + end select + + end select + + end subroutine forward + + + pure module subroutine get_output(self, output) + class(layer), intent(in) :: self + real, allocatable, intent(out) :: output(:) + + select type(this_layer => self % p) + + type is(input1d_layer) + allocate(output, source=this_layer % output) + type is(dense_layer) + allocate(output, source=this_layer % output) + + end select + + end subroutine get_output + + + impure elemental module subroutine init(self, input) + class(layer), intent(in out) :: self + class(layer), intent(in) :: input + + select type(this_layer => self % p); class default + call this_layer % init(input % layer_shape) + end select + + ! The shape of a conv2d layer is not known until we receive an input layer. + select type(this_layer => self % p); type is(conv2d_layer) + self % layer_shape = shape(this_layer % output) + end select + + self % input_layer_shape = input % layer_shape + self % initialized = .true. + + end subroutine init + + + impure elemental module subroutine print_info(self) + class(layer), intent(in) :: self + print '("Layer: ", a)', self % name + print '(60("-"))' + if (.not. self % name == 'input') & + print '("Input shape: ", *(i0, 1x))', self % input_layer_shape + print '("Output shape: ", *(i0, 1x))', self % layer_shape + if (.not. self % name == 'input') & + print '("Activation: ", a)', self % activation + print * + end subroutine print_info + + + impure elemental module subroutine update(self, learning_rate) + class(layer), intent(in out) :: self + real, intent(in) :: learning_rate + + select type(this_layer => self % p); type is(dense_layer) + call this_layer % update(learning_rate) + end select + + end subroutine update + +end submodule nf_layer_submodule diff --git a/src/nf_loss.f90 b/src/nf_loss.f90 new file mode 100644 index 00000000..62687fd8 --- /dev/null +++ b/src/nf_loss.f90 @@ -0,0 +1,42 @@ +module nf_loss + + !! This module will eventually provide a collection of loss functions and + !! their derivatives. For the time being it provides only the quadratic + !! function. + + implicit none + + private + public :: quadratic, quadratic_derivative + + interface + + pure module function quadratic(true, predicted) result(res) + !! Quadratic loss function: + !! + !! L = (predicted - true)**2 / 2 + !! + real, intent(in) :: true(:) + !! True values, i.e. labels from training datasets + real, intent(in) :: predicted(:) + !! Values predicted by the network + real :: res(size(true)) + !! Resulting loss values + end function quadratic + + pure module function quadratic_derivative(true, predicted) result(res) + !! First derivative of the quadratic loss function: + !! + !! L' = predicted - true + !! + real, intent(in) :: true(:) + !! True values, i.e. labels from training datasets + real, intent(in) :: predicted(:) + !! Values predicted by the network + real :: res(size(true)) + !! Resulting loss values + end function quadratic_derivative + + end interface + +end module nf_loss diff --git a/src/nf_loss_submodule.f90 b/src/nf_loss_submodule.f90 new file mode 100644 index 00000000..f8ad8a5e --- /dev/null +++ b/src/nf_loss_submodule.f90 @@ -0,0 +1,21 @@ +submodule(nf_loss) nf_loss_submodule + + implicit none + +contains + + pure module function quadratic(true, predicted) result(res) + real, intent(in) :: true(:) + real, intent(in) :: predicted(:) + real :: res(size(true)) + res = (predicted - true)**2 / 2 + end function quadratic + + pure module function quadratic_derivative(true, predicted) result(res) + real, intent(in) :: true(:) + real, intent(in) :: predicted(:) + real :: res(size(true)) + res = predicted - true + end function quadratic_derivative + +end submodule nf_loss_submodule diff --git a/src/nf_network.f90 b/src/nf_network.f90 new file mode 100644 index 00000000..9923e946 --- /dev/null +++ b/src/nf_network.f90 @@ -0,0 +1,113 @@ +module nf_network + + !! This module provides the network type to create new models. + + use nf_layer, only: layer + use nf_optimizers, only: sgd + + implicit none + + private + public :: network + + type :: network + type(layer), allocatable :: layers(:) + contains + procedure :: backward + procedure :: forward + procedure :: output + procedure :: print_info + procedure :: train + procedure :: update + end type network + + interface network + module function network_cons(layers) result(res) + !! Create a new `network` instance. + type(layer), intent(in) :: layers(:) + !! Input array of layer instances; + !! the first element must be an input layer. + type(network) :: res + !! An instance of the `network` type + end function network_cons + end interface network + + interface + + pure module subroutine backward(self, output) + !! Apply one backward pass through the network. + !! This changes the state of layers on the network. + !! Typically used only internally from the `train` method, + !! but can be invoked by the user when creating custom optimizers. + class(network), intent(in out) :: self + !! Network instance + real, intent(in) :: output(:) + !! Output data + end subroutine backward + + pure module subroutine forward(self, input) + !! Apply a forward pass through the network. + !! This changes the state of layers on the network. + !! Typically used only internally from the `train` method, + !! but can be invoked by the user when creating custom optimizers. + class(network), intent(in out) :: self + !! Network instance + real, intent(in) :: input(:) + !! Input data + end subroutine forward + + module function output(self, input) result(res) + !! Return the output of the network given the input array. + class(network), intent(in out) :: self + !! Network instance + real, intent(in) :: input(:) + !! Input data + real, allocatable :: res(:) + !! Output of the network + end function output + + module subroutine print_info(self) + !! Prints a brief summary of the network and its layers to the screen. + class(network), intent(in) :: self + !! Network instance + end subroutine print_info + + module subroutine train(self, input_data, output_data, batch_size, & + epochs, optimizer) + class(network), intent(in out) :: self + !! Network instance + real, intent(in) :: input_data(:,:) + !! Input data to train on; + !! first dimension contains a single sample + !! and its size must match the size of the input layer. + real, intent(in) :: output_data(:,:) + !! Output data to train on; + !! first dimension contains a single sample + !! and its size must match the size of the input layer. + integer, intent(in) :: batch_size + !! Batch size to use. + !! Set to 1 for a pure stochastic gradient descent. + !! Set to `size(input_data, dim=2)` for a batch gradient descent. + integer, intent(in) :: epochs + !! Number of epochs to run + type(sgd), intent(in) :: optimizer + !! Optimizer instance; currently this is an `sgd` optimizer type + !! and it will be made to be a more general optimizer type. + end subroutine train + + module subroutine update(self, learning_rate) + !! Update the weights and biases on all layers using the stored + !! gradients (from backward passes) on those layers, and flush those + !! same stored gradients to zero. + !! This changes the state of layers on the network. + !! Typically used only internally from the `train` method, + !! but can be invoked by the user when creating custom optimizers. + class(network), intent(in out) :: self + !! Network instance + real, intent(in) :: learning_rate + !! Learning rate to use; must be > 0. + end subroutine update + + end interface + +end module nf_network diff --git a/src/nf_network_submodule.f90 b/src/nf_network_submodule.f90 new file mode 100644 index 00000000..b8584eac --- /dev/null +++ b/src/nf_network_submodule.f90 @@ -0,0 +1,176 @@ +submodule(nf_network) nf_network_submodule + + use nf_dense_layer, only: dense_layer + use nf_input1d_layer, only: input1d_layer + use nf_layer, only: layer + use nf_loss, only: quadratic_derivative + use nf_optimizers, only: sgd + use nf_parallel, only: tile_indices + + implicit none + +contains + + module function network_cons(layers) result(res) + type(layer), intent(in) :: layers(:) + type(network) :: res + integer :: n + + ! Error handling + + ! There must be at least two layers + if (size(layers) < 2) & + error stop 'Error: A network must have at least 2 layers.' + + ! The first layer must be an input layer + if (.not. layers(1) % name == 'input') & + error stop 'Error: First layer in the network must be an input layer.' + + !TODO Ensure that the layers are in allowed sequence: + !TODO input1d -> dense + !TODO dense -> dense + !TODO input3d -> conv2d + !TODO conv2d -> conv2d + !TODO conv2d -> maxpool2d + !TODO maxpool2d -> conv2d + !TODO conv2d -> flatten + + res % layers = layers + + ! Loop over each layer in order and call the init methods. + ! This will allocate the data internal to each layer (e.g. weights, biases) + ! according to the size of the previous layer. + do n = 2, size(layers) + call res % layers(n) % init(res % layers(n - 1)) + end do + + end function network_cons + + + pure module subroutine backward(self, output) + class(network), intent(in out) :: self + real, intent(in) :: output(:) + real, allocatable :: gradient(:) + integer :: n, num_layers + + num_layers = size(self % layers) + + ! Iterate backward over layers, from the output layer + ! to the first non-input layer + do n = num_layers, 2, -1 + + if (n == num_layers) then + ! Output layer; apply the loss function + select type(this_layer => self % layers(n) % p) + type is(dense_layer) + gradient = quadratic_derivative(output, this_layer % output) + end select + else + ! Hidden layer; take the gradient from the next layer + select type(next_layer => self % layers(n + 1) % p) + type is(dense_layer) + gradient = next_layer % gradient + end select + end if + + call self % layers(n) % backward(self % layers(n - 1), gradient) + + end do + + end subroutine backward + + + pure module subroutine forward(self, input) + class(network), intent(in out) :: self + real, intent(in) :: input(:) + integer :: n + + ! Set the input array into the input layer + select type(input_layer => self % layers(1) % p); type is(input1d_layer) + call input_layer % set(input) + end select + + do n = 2, size(self % layers) + call self % layers(n) % forward(self % layers(n - 1)) + end do + + end subroutine forward + + + module function output(self, input) result(res) + class(network), intent(in out) :: self + real, intent(in) :: input(:) + real, allocatable :: res(:) + integer :: num_layers + + num_layers = size(self % layers) + + call self % forward(input) + + select type(output_layer => self % layers(num_layers) % p); type is(dense_layer) + res = output_layer % output + end select + + end function output + + + module subroutine print_info(self) + class(network), intent(in) :: self + call self % layers % print_info() + end subroutine print_info + + + module subroutine train(self, input_data, output_data, batch_size, & + epochs, optimizer) + class(network), intent(in out) :: self + real, intent(in) :: input_data(:,:) + real, intent(in) :: output_data(:,:) + integer, intent(in) :: batch_size + integer, intent(in) :: epochs + type(sgd), intent(in) :: optimizer + + real :: pos + integer :: dataset_size + integer :: batch_start, batch_end + integer :: i, j, n + integer :: istart, iend, indices(2) + + dataset_size = size(output_data, dim=2) + + epoch_loop: do n = 1, epochs + batch_loop: do i = 1, dataset_size / batch_size + + ! Pull a random mini-batch from the dataset + call random_number(pos) + batch_start = int(pos * (dataset_size - batch_size + 1)) + 1 + batch_end = batch_start + batch_size - 1 + + ! FIXME shuffle in a way that doesn't require co_broadcast + call co_broadcast(batch_start, 1) + call co_broadcast(batch_end, 1) + + ! Distribute the batch in nearly equal pieces to all images + indices = tile_indices(batch_size) + istart = indices(1) + batch_start - 1 + iend = indices(2) + batch_start - 1 + + do concurrent(j = istart:iend) + call self % forward(input_data(:,j)) + call self % backward(output_data(:,j)) + end do + + call self % update(optimizer % learning_rate / batch_size) + + end do batch_loop + end do epoch_loop + + end subroutine train + + + module subroutine update(self, learning_rate) + class(network), intent(in out) :: self + real, intent(in) :: learning_rate + call self % layers % update(learning_rate) + end subroutine update + +end submodule nf_network_submodule diff --git a/src/nf_optimizers.f90 b/src/nf_optimizers.f90 new file mode 100644 index 00000000..2ba89904 --- /dev/null +++ b/src/nf_optimizers.f90 @@ -0,0 +1,17 @@ +module nf_optimizers + + !! This module provides optimizer types to pass to the network constructor. + + implicit none + + private + public :: sgd + + type :: sgd + !! Stochastic Gradient Descent optimizer + real :: learning_rate + real :: momentum = 0 !TODO + logical :: nesterov = .false. !TODO + end type sgd + +end module nf_optimizers diff --git a/src/mod_parallel.f90 b/src/nf_parallel.f90 similarity index 58% rename from src/mod_parallel.f90 rename to src/nf_parallel.f90 index 2c558cea..ac847cb4 100644 --- a/src/mod_parallel.f90 +++ b/src/nf_parallel.f90 @@ -1,6 +1,5 @@ -module mod_parallel +module nf_parallel - use mod_kinds, only: ik, rk implicit none private @@ -8,14 +7,14 @@ module mod_parallel interface - pure module function tile_indices(dims) + pure module function tile_indices(dims) result(res) !! Given input global array size, return start and end index !! of a parallel 1-d tile that correspond to this image. implicit none - integer(ik), intent(in) :: dims - integer(ik) :: tile_indices(2) + integer, intent(in) :: dims + integer :: res(2) end function tile_indices end interface -end module mod_parallel +end module nf_parallel diff --git a/src/nf_parallel_submodule.f90 b/src/nf_parallel_submodule.f90 new file mode 100644 index 00000000..6af1b57b --- /dev/null +++ b/src/nf_parallel_submodule.f90 @@ -0,0 +1,25 @@ +submodule(nf_parallel) nf_parallel_submodule + implicit none +contains + + pure module function tile_indices(dims) result(res) + integer, intent(in) :: dims + integer :: res(2) + integer :: offset, tile_size + + tile_size = dims / num_images() + + ! start and end indices assuming equal tile sizes + res(1) = (this_image() - 1) * tile_size + 1 + res(2) = res(1) + tile_size - 1 + + ! if we have any remainder, distribute it to the tiles at the end + offset = num_images() - mod(dims, num_images()) + if (this_image() > offset) then + res(1) = res(1) + this_image() - offset - 1 + res(2) = res(2) + this_image() - offset + end if + + end function tile_indices + +end submodule nf_parallel_submodule diff --git a/src/mod_random.f90 b/src/nf_random.f90 similarity index 59% rename from src/mod_random.f90 rename to src/nf_random.f90 index 6470d2c9..7c6544b6 100644 --- a/src/mod_random.f90 +++ b/src/nf_random.f90 @@ -1,10 +1,8 @@ -module mod_random +module nf_random !! Provides a random number generator with !! normal distribution, centered on zero. - use mod_kinds, only: ik, rk - implicit none private @@ -13,19 +11,21 @@ module mod_random interface randn module function randn1d(n) result(r) - !! Generates n random numbers with a normal distribution. + !! Generates n random numbers with a normal distribution, + !! using the Box-Muller method. implicit none - integer(ik), intent(in) :: n - real(rk) :: r(n) + integer, intent(in) :: n + real :: r(n) end function randn1d module function randn2d(m, n) result(r) - !! Generates m x n random numbers with a normal distribution. + !! Generates m x n random numbers with a normal distribution, + !! using the Box-Muller method. implicit none - integer(ik), intent(in) :: m, n - real(rk) :: r(m, n) + integer, intent(in) :: m, n + real :: r(m,n) end function randn2d end interface randn -end module mod_random +end module nf_random diff --git a/src/nf_random_submodule.f90 b/src/nf_random_submodule.f90 new file mode 100644 index 00000000..7ee8de6a --- /dev/null +++ b/src/nf_random_submodule.f90 @@ -0,0 +1,26 @@ +submodule(nf_random) nf_random_submodule + implicit none + + real, parameter :: pi = 4 * atan(1.d0) + +contains + + module function randn1d(n) result(x) + integer, intent(in) :: n + real :: x(n) + real :: u(n), v(n) + call random_number(u) + call random_number(v) + x = sqrt(-2 * log(u)) * cos(2 * pi * v) + end function randn1d + + module function randn2d(m, n) result(x) + integer, intent(in) :: m, n + real :: x(m,n) + real :: u(m,n), v(m,n) + call random_number(u) + call random_number(v) + x = sqrt(-2 * log(u)) * cos(2 * pi * v) + end function randn2d + +end submodule nf_random_submodule From 908f76b90728183bbf2065c05087dfbcde1eb5b1 Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Fri, 29 Apr 2022 13:26:51 -0400 Subject: [PATCH 02/13] Update example programs --- example/example_mnist.f90 | 55 --------------------------- example/example_mnist_epochs.f90 | 36 ------------------ example/example_save_and_load.f90 | 32 ---------------- example/example_simple.f90 | 14 ------- example/example_sine.f90 | 18 --------- example/mnist.f90 | 63 +++++++++++++++++++++++++++++++ example/simple.f90 | 34 +++++++++++++++++ example/sine.f90 | 43 +++++++++++++++++++++ 8 files changed, 140 insertions(+), 155 deletions(-) delete mode 100644 example/example_mnist.f90 delete mode 100644 example/example_mnist_epochs.f90 delete mode 100644 example/example_save_and_load.f90 delete mode 100644 example/example_simple.f90 delete mode 100644 example/example_sine.f90 create mode 100644 example/mnist.f90 create mode 100644 example/simple.f90 create mode 100644 example/sine.f90 diff --git a/example/example_mnist.f90 b/example/example_mnist.f90 deleted file mode 100644 index 1192e07e..00000000 --- a/example/example_mnist.f90 +++ /dev/null @@ -1,55 +0,0 @@ -program example_mnist - - ! A training example with the MNIST dataset. - ! Uses stochastic gradient descent and mini-batch size of 100. - ! Can be run in serial or parallel mode without modifications. - - use mod_kinds, only: ik, rk - use mod_mnist, only: label_digits, load_mnist - use mod_network, only: network_type - - implicit none - - real(rk), allocatable :: tr_images(:,:), tr_labels(:) - real(rk), allocatable :: te_images(:,:), te_labels(:) - real(rk), allocatable :: input(:,:), output(:,:) - - type(network_type) :: net - - integer(ik) :: i, n, num_epochs - integer(ik) :: batch_size, batch_start, batch_end - real(rk) :: pos - - call load_mnist(tr_images, tr_labels, te_images, te_labels) - - net = network_type([784, 30, 10]) - - batch_size = 100 - num_epochs = 10 - - if (this_image() == 1) print '(a,f5.2,a)', 'Initial accuracy: ', & - net % accuracy(te_images, label_digits(te_labels)) * 100, ' %' - - epochs: do n = 1, num_epochs - batches: do i = 1, size(tr_labels) / batch_size - - ! pull a random mini-batch from the dataset - call random_number(pos) - batch_start = int(pos * (size(tr_labels) - batch_size + 1)) - batch_end = batch_start + batch_size - 1 - - ! prepare mini-batch - input = tr_images(:,batch_start:batch_end) - output = label_digits(tr_labels(batch_start:batch_end)) - - ! train the network on the mini-batch - call net % train(input, output, eta=3._rk) - - end do batches - - if (this_image() == 1) print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', & - net % accuracy(te_images, label_digits(te_labels)) * 100, ' %' - - end do epochs - -end program example_mnist diff --git a/example/example_mnist_epochs.f90 b/example/example_mnist_epochs.f90 deleted file mode 100644 index 08ba04a8..00000000 --- a/example/example_mnist_epochs.f90 +++ /dev/null @@ -1,36 +0,0 @@ -program example_mnist - - ! A training example with the MNIST dataset. - ! Uses stochastic gradient descent and mini-batch size of 100. - ! Can be run in serial or parallel mode without modifications. - - use mod_kinds, only: ik, rk - use mod_mnist, only: label_digits, load_mnist - use mod_network, only: network_type - - implicit none - - real(rk), allocatable :: tr_images(:,:), tr_labels(:) - real(rk), allocatable :: te_images(:,:), te_labels(:) - - type(network_type) :: net - - integer(ik) :: i, n, num_epochs - integer(ik) :: batch_size - - call load_mnist(tr_images, tr_labels, te_images, te_labels) - - net = network_type([size(tr_images, dim=1), 10, size(label_digits(tr_labels), dim=1)]) - - batch_size = 100 - num_epochs = 10 - - if (this_image() == 1) print '(a,f5.2,a)', 'Initial accuracy: ', & - net % accuracy(te_images, label_digits(te_labels)) * 100, ' %' - - call net % train(tr_images, label_digits(tr_labels), 3._rk, num_epochs, batch_size) - - if (this_image() == 1) print '(a,f5.2,a)', 'Epochs done, Accuracy: ', & - net % accuracy(te_images, label_digits(te_labels)) * 100, ' %' - -end program example_mnist diff --git a/example/example_save_and_load.f90 b/example/example_save_and_load.f90 deleted file mode 100644 index f9ef7f1c..00000000 --- a/example/example_save_and_load.f90 +++ /dev/null @@ -1,32 +0,0 @@ -program example_save_and_load - - use mod_network, only: network_type - implicit none - - type(network_type) :: net1, net2 - real, allocatable :: input(:), output(:) - integer :: i - - net1 = network_type([3, 5, 2]) - - input = [0.2, 0.4, 0.6] - output = [0.123456, 0.246802] - - ! train network 1 - do i = 1, 500 - call net1 % train(input, output, eta=1.0) - end do - - ! save network 1 to file - call net1 % save('my_simple_net.txt') - - ! load network 2 from file - !net2 = network_type([3, 5, 2]) - call net2 % load('my_simple_net.txt') - call net2 % set_activation('sigmoid') - - print *, 'Network 1 output: ', net1 % output(input) - print *, 'Network 2 output: ', net2 % output(input) - print *, 'Outputs match: ', all(net1 % output(input) == net2 % output(input)) - -end program example_save_and_load diff --git a/example/example_simple.f90 b/example/example_simple.f90 deleted file mode 100644 index 6fe3ba01..00000000 --- a/example/example_simple.f90 +++ /dev/null @@ -1,14 +0,0 @@ -program example_simple - use mod_network, only: network_type - implicit none - type(network_type) :: net - real, allocatable :: input(:), output(:) - integer :: i - net = network_type([3, 5, 2]) - input = [0.2, 0.4, 0.6] - output = [0.123456, 0.246802] - do i = 1, 500 - call net % train(input, output, eta=1.0) - print *, 'Iteration: ', i, 'Output:', net % output(input) - end do -end program example_simple diff --git a/example/example_sine.f90 b/example/example_sine.f90 deleted file mode 100644 index 1b5931c9..00000000 --- a/example/example_sine.f90 +++ /dev/null @@ -1,18 +0,0 @@ -program example_sine - use mod_kinds, only: ik, rk - use mod_network, only: network_type - implicit none - type(network_type) :: net - real(rk) :: cumloss, x, y - real(rk), parameter :: pi = 4 * atan(1._rk) - integer(ik) :: i - net = network_type([1, 5, 1]) - cumloss = 0 - do i = 1, 1000000 - call random_number(x) - y = (sin(x * 2 * pi) + 1) * 0.5 - call net % train([x], [y], eta=10._rk) - cumloss = cumloss + net % loss([x], [y]) - print *, i, cumloss / i - end do -end program example_sine diff --git a/example/mnist.f90 b/example/mnist.f90 new file mode 100644 index 00000000..c711f993 --- /dev/null +++ b/example/mnist.f90 @@ -0,0 +1,63 @@ +program mnist + use nf, only: dense, input, network + use nf_datasets_mnist, only: label_digits, load_mnist + use nf_optimizers, only: sgd + + implicit none + + type(network) :: net + real, allocatable :: training_images(:,:), training_labels(:) + real, allocatable :: validation_images(:,:), validation_labels(:) + integer :: n, num_epochs + + call load_mnist(training_images, training_labels, & + validation_images, validation_labels) + + print '("MNIST")' + print '(60("="))' + + net = network([ & + input(784), & + dense(30), & + dense(10) & + ]) + num_epochs = 10 + + call net % print_info() + + if (this_image() == 1) & + print '(a,f5.2,a)', 'Initial accuracy: ', accuracy( & + net, validation_images, label_digits(validation_labels)) * 100, ' %' + + epochs: do n = 1, num_epochs + + call net % train( & + training_images, & + label_digits(training_labels), & + batch_size=100, & + epochs=1, & + optimizer=sgd(learning_rate=3.) & + ) + + if (this_image() == 1) & + print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', accuracy( & + net, validation_images, label_digits(validation_labels)) * 100, ' %' + + end do epochs + +contains + + real function accuracy(net, x, y) + type(network), intent(in out) :: net + real, intent(in) :: x(:,:), y(:,:) + integer :: i, good + good = 0 + do i = 1, size(x, dim=2) + if (all(maxloc(net % output(x(:,i))) == maxloc(y(:,i)))) then + good = good + 1 + end if + end do + accuracy = real(good) / size(x, dim=2) + end function accuracy + +end program mnist diff --git a/example/simple.f90 b/example/simple.f90 new file mode 100644 index 00000000..44c87051 --- /dev/null +++ b/example/simple.f90 @@ -0,0 +1,34 @@ +program simple + use nf, only: dense, input, network + implicit none + type(network) :: net + real, allocatable :: x(:), y(:) + integer, parameter :: num_iterations = 500 + integer :: n + + print '("Simple")' + print '(60("="))' + + net = network([ & + input(3), & + dense(5), & + dense(2) & + ]) + + call net % print_info() + + x = [0.2, 0.4, 0.6] + y = [0.123456, 0.246802] + + do n = 0, num_iterations + + call net % forward(x) + call net % backward(y) + call net % update(1.) + + if (mod(n, 50) == 0) & + print '(i4,2(3x,f8.6))', n, net % output(x) + + end do + +end program simple diff --git a/example/sine.f90 b/example/sine.f90 new file mode 100644 index 00000000..fd7483d9 --- /dev/null +++ b/example/sine.f90 @@ -0,0 +1,43 @@ +program sine + use nf, only: dense, input, network + implicit none + type(network) :: net + real :: x(1), y(1) + real, parameter :: pi = 4 * atan(1.) + integer, parameter :: num_iterations = 100000 + integer, parameter :: test_size = 30 + real :: xtest(test_size), ytest(test_size), ypred(test_size) + integer :: i, n + + print '("Sine training")' + print '(60("="))' + + net = network([ & + input(1), & + dense(5), & + dense(1) & + ]) + + call net % print_info() + + xtest = [((i - 1) * 2 * pi / test_size, i = 1, test_size)] + ytest = (sin(xtest) + 1) / 2 + + do n = 0, num_iterations + + call random_number(x) + x = x * 2 * pi + y = (sin(x) + 1) / 2 + + call net % forward(x) + call net % backward(y) + call net % update(1.) + + if (mod(n, 10000) == 0) then + ypred = [(net % output([xtest(i)]), i = 1, test_size)] + print '(i0,1x,f9.6)', n, sum((ypred - ytest)**2) / size(ypred) + end if + + end do + +end program sine From 5acdc522797801fe4ac97d4001ff437569314fa6 Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Fri, 29 Apr 2022 13:27:54 -0400 Subject: [PATCH 03/13] Update test programs --- test/test_dense_layer.f90 | 51 ++++++++++++++++++++++ test/test_input1d_layer.f90 | 51 ++++++++++++++++++++++ test/test_mnist.f90 | 24 ---------- test/test_network_save.f90 | 32 -------------- test/test_network_sync.f90 | 9 ---- test/test_set_activation_function.f90 | 63 --------------------------- 6 files changed, 102 insertions(+), 128 deletions(-) create mode 100644 test/test_dense_layer.f90 create mode 100644 test/test_input1d_layer.f90 delete mode 100644 test/test_mnist.f90 delete mode 100644 test/test_network_save.f90 delete mode 100644 test/test_network_sync.f90 delete mode 100644 test/test_set_activation_function.f90 diff --git a/test/test_dense_layer.f90 b/test/test_dense_layer.f90 new file mode 100644 index 00000000..ca45d13f --- /dev/null +++ b/test/test_dense_layer.f90 @@ -0,0 +1,51 @@ +program test_dense_layer + use nf, only: dense, layer + implicit none + type(layer) :: layer1, layer2 + logical :: ok = .true. + + layer1 = dense(10) + + if (.not. layer1 % name == 'dense') then + ok = .false. + print '(a)', 'dense layer has its name set correctly.. failed' + end if + + if (.not. all(layer1 % layer_shape == [10])) then + ok = .false. + print '(a)', 'dense layer is created with requested size.. failed' + end if + + if (layer1 % initialized) then + ok = .false. + print '(a)', 'dense layer should not be marked as initialized yet.. failed' + end if + + if (.not. layer1 % activation == 'sigmoid') then + ok = .false. + print '(a)', 'dense layer is defaults to sigmoid activation.. failed' + end if + + layer1 = dense(10, activation='relu') + + if (.not. layer1 % activation == 'relu') then + ok = .false. + print '(a)', 'dense layer is created with the specified activation.. failed' + end if + + layer2 = dense(20) + call layer2 % init(layer1) + + if (.not. layer2 % initialized) then + ok = .false. + print '(a)', 'dense layer should now be marked as initialized.. failed' + end if + + if (.not. all(layer2 % input_layer_shape == [10])) then + ok = .false. + print '(a)', 'dense layer should have a correct input layer shape.. failed' + end if + + if (ok) print '(a)', 'test_dense_layer: All tests passed.' + +end program test_dense_layer diff --git a/test/test_input1d_layer.f90 b/test/test_input1d_layer.f90 new file mode 100644 index 00000000..96a4a22d --- /dev/null +++ b/test/test_input1d_layer.f90 @@ -0,0 +1,51 @@ +program test_input1d_layer + use nf, only: input, layer + use nf_input1d_layer, only: input1d_layer + implicit none + type(layer) :: test_layer + real, allocatable :: output(:) + logical :: ok = .true. + + test_layer = input(3) + + if (.not. test_layer % name == 'input') then + ok = .false. + print '(a)', 'input1d layer has its name set correctly.. failed' + end if + + if (.not. test_layer % initialized) then + ok = .false. + print '(a)', 'input1d layer should be marked as initialized.. failed' + end if + + if (.not. all(test_layer % layer_shape == [3])) then + ok = .false. + print '(a)', 'input1d layer is created with requested size.. failed' + end if + + if (.not. size(test_layer % input_layer_shape) == 0) then + ok = .false. + print '(a)', 'input1d layer has no input layer shape.. failed' + end if + + call test_layer % get_output(output) + + if (.not. all(output == 0)) then + ok = .false. + print '(a)', 'input1d layer values are all initialized to 0.. failed' + end if + + select type(input_layer => test_layer % p); type is(input1d_layer) + call input_layer % set([1., 2., 3.]) + end select + + call test_layer % get_output(output) + + if (.not. all(output == [1., 2., 3.])) then + ok = .false. + print '(a)', 'input1d layer can have its values set.. failed' + end if + + if (ok) print '(a)', 'test_input1d_layer: All tests passed.' + +end program test_input1d_layer diff --git a/test/test_mnist.f90 b/test/test_mnist.f90 deleted file mode 100644 index 183de0fb..00000000 --- a/test/test_mnist.f90 +++ /dev/null @@ -1,24 +0,0 @@ -program test_mnist - - use mod_mnist, only: load_mnist - use mod_kinds, only: ik, rk - - implicit none - - real(rk), allocatable :: tr_images(:,:), tr_labels(:) - real(rk), allocatable :: te_images(:,:), te_labels(:) - real(rk), allocatable :: va_images(:,:), va_labels(:) - - print *, 'Reading MNIST data..' - call load_mnist(tr_images, tr_labels, te_images, te_labels, va_images, va_labels) - print *, 'Training data:' - print *, shape(tr_images), minval(tr_images), maxval(tr_images), sum(tr_images) / size(tr_images) - print *, shape(tr_labels), sum(tr_labels) / size(tr_labels) - print *, 'Testing data:' - print *, shape(te_images), minval(te_images), maxval(te_images), sum(te_images) / size(te_images) - print *, shape(te_labels), sum(te_labels) / size(te_labels) - print *, 'Validation data:' - print *, shape(va_images), minval(va_images), maxval(va_images), sum(va_images) / size(va_images) - print *, shape(va_labels), sum(va_labels) / size(va_labels) - -end program test_mnist diff --git a/test/test_network_save.f90 b/test/test_network_save.f90 deleted file mode 100644 index 00aea0d2..00000000 --- a/test/test_network_save.f90 +++ /dev/null @@ -1,32 +0,0 @@ -program test_network_save - use mod_network, only: network_type - implicit none - type(network_type) :: net1, net2 - integer :: n - print *, 'Initializing 2 networks with random weights and biases' - net1 = network_type([768, 30, 10]) - net2 = network_type([768, 30, 10]) - - print *, 'Save network 1 into file' - call net1 % save('test_network.dat') - call net2 % load('test_network.dat') - print *, 'Load network 2 from file' - do n = 1, size(net1 % layers) - print *, 'Layer ', n, ', weights equal: ',& - all(net1 % layers(n) % w == net2 % layers(n) % w),& - ', biases equal:', all(net1 % layers(n) % b == net2 % layers(n) % b) - end do - print *, '' - - print *, 'Setting different activation functions for each layer of network 1' - call net1 % set_activation([character(len=10) :: 'sigmoid', 'tanh', 'gaussian']) - print *, 'Save network 1 into file' - call net1 % save('test_network.dat') - call net2 % load('test_network.dat') - print *, 'Load network 2 from file' - do n = 1, size(net1 % layers) - print *, 'Layer ', n, ', activation functions equal:',& - associated(net1 % layers(n) % activation, net2 % layers(n) % activation),& - '(network 1: ', net1 % layers(n) % activation_str, ', network 2: ', net2 % layers(n) % activation_str,')' - end do -end program test_network_save diff --git a/test/test_network_sync.f90 b/test/test_network_sync.f90 deleted file mode 100644 index 9b905b95..00000000 --- a/test/test_network_sync.f90 +++ /dev/null @@ -1,9 +0,0 @@ -program test_network_sync - use mod_network, only: network_type - implicit none - type(network_type) :: net - - net = network_type([5, 3, 2]) - print *, this_image(), net % layers(1) % w - -end program test_network_sync diff --git a/test/test_set_activation_function.f90 b/test/test_set_activation_function.f90 deleted file mode 100644 index 479055f0..00000000 --- a/test/test_set_activation_function.f90 +++ /dev/null @@ -1,63 +0,0 @@ -program test_set_activation_function - - ! This program will test whether per-network and per-layer - ! setting of activation functions works as expected. - ! First we create an array of random variables. - ! Then we set different activation functions to different - ! layers in the network. - ! Finally, we test whether each function produces same - ! values as the activation functions set in the layers. - - use mod_activation - use mod_network, only: network_type - use mod_random, only: randn - - implicit none - type(network_type) :: net - real, allocatable :: x(:) - integer :: n - logical, allocatable :: tests(:) - - tests = [logical ::] - - x = randn(100) - - ! the network will be created with - ! sigmoid activation functions for all layers - net = network_type([1, 1, 1, 1, 1]) - - do n = 1, size(net % layers) - tests = [tests, all(sigmoid(x) == net % layers(n) % activation(x))] - tests = [tests, all(sigmoid_prime(x) == net % layers(n) % activation_prime(x))] - end do - - ! now set the various functions for other layers - call net % layers(2) % set_activation('gaussian') - call net % layers(3) % set_activation('step') - call net % layers(4) % set_activation('tanh') - call net % layers(5) % set_activation('relu') - - tests = [tests, all(sigmoid(x) == net % layers(1) % activation(x))] - tests = [tests, all(sigmoid_prime(x) == net % layers(1) % activation_prime(x))] - - tests = [tests, all(gaussian(x) == net % layers(2) % activation(x))] - tests = [tests, all(gaussian_prime(x) == net % layers(2) % activation_prime(x))] - - tests = [tests, all(step(x) == net % layers(3) % activation(x))] - tests = [tests, all(step_prime(x) == net % layers(3) % activation_prime(x))] - - tests = [tests, all(tanhf(x) == net % layers(4) % activation(x))] - tests = [tests, all(tanh_prime(x) == net % layers(4) % activation_prime(x))] - - tests = [tests, all(relu(x) == net % layers(5) % activation(x))] - tests = [tests, all(relu_prime(x) == net % layers(5) % activation_prime(x))] - - print *, tests - - if (all(tests)) then - print *, 'All tests passed.' - else - error stop 'some tests failed.' - end if - -end program test_set_activation_function From c9cf69b313379e32fad69efeb7588a87478707aa Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Fri, 29 Apr 2022 13:42:21 -0400 Subject: [PATCH 04/13] Emit test errors to stderr; exit with status 1 on failure --- test/test_dense_layer.f90 | 22 ++++++++++++++-------- test/test_input1d_layer.f90 | 20 +++++++++++++------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/test/test_dense_layer.f90 b/test/test_dense_layer.f90 index ca45d13f..5bc68aef 100644 --- a/test/test_dense_layer.f90 +++ b/test/test_dense_layer.f90 @@ -1,4 +1,5 @@ program test_dense_layer + use iso_fortran_env, only: stderr => error_unit use nf, only: dense, layer implicit none type(layer) :: layer1, layer2 @@ -8,29 +9,29 @@ program test_dense_layer if (.not. layer1 % name == 'dense') then ok = .false. - print '(a)', 'dense layer has its name set correctly.. failed' + write(stderr, '(a)') 'dense layer has its name set correctly.. failed' end if if (.not. all(layer1 % layer_shape == [10])) then ok = .false. - print '(a)', 'dense layer is created with requested size.. failed' + write(stderr, '(a)') 'dense layer is created with requested size.. failed' end if if (layer1 % initialized) then ok = .false. - print '(a)', 'dense layer should not be marked as initialized yet.. failed' + write(stderr, '(a)') 'dense layer should not be marked as initialized yet.. failed' end if if (.not. layer1 % activation == 'sigmoid') then ok = .false. - print '(a)', 'dense layer is defaults to sigmoid activation.. failed' + write(stderr, '(a)') 'dense layer is defaults to sigmoid activation.. failed' end if layer1 = dense(10, activation='relu') if (.not. layer1 % activation == 'relu') then ok = .false. - print '(a)', 'dense layer is created with the specified activation.. failed' + write(stderr, '(a)') 'dense layer is created with the specified activation.. failed' end if layer2 = dense(20) @@ -38,14 +39,19 @@ program test_dense_layer if (.not. layer2 % initialized) then ok = .false. - print '(a)', 'dense layer should now be marked as initialized.. failed' + write(stderr, '(a)') 'dense layer should now be marked as initialized.. failed' end if if (.not. all(layer2 % input_layer_shape == [10])) then ok = .false. - print '(a)', 'dense layer should have a correct input layer shape.. failed' + write(stderr, '(a)') 'dense layer should have a correct input layer shape.. failed' end if - if (ok) print '(a)', 'test_dense_layer: All tests passed.' + if (ok) then + print '(a)', 'test_dense_layer: All tests passed.' + else + write(stderr, '(a)') 'test_dense_layer: One or more tests failed.' + stop 1 + end if end program test_dense_layer diff --git a/test/test_input1d_layer.f90 b/test/test_input1d_layer.f90 index 96a4a22d..4404348d 100644 --- a/test/test_input1d_layer.f90 +++ b/test/test_input1d_layer.f90 @@ -1,4 +1,5 @@ program test_input1d_layer + use iso_fortran_env, only: stderr => error_unit use nf, only: input, layer use nf_input1d_layer, only: input1d_layer implicit none @@ -10,29 +11,29 @@ program test_input1d_layer if (.not. test_layer % name == 'input') then ok = .false. - print '(a)', 'input1d layer has its name set correctly.. failed' + write(stderr, '(a)') 'input1d layer has its name set correctly.. failed' end if if (.not. test_layer % initialized) then ok = .false. - print '(a)', 'input1d layer should be marked as initialized.. failed' + write(stderr, '(a)') 'input1d layer should be marked as initialized.. failed' end if if (.not. all(test_layer % layer_shape == [3])) then ok = .false. - print '(a)', 'input1d layer is created with requested size.. failed' + write(stderr, '(a)') 'input1d layer is created with requested size.. failed' end if if (.not. size(test_layer % input_layer_shape) == 0) then ok = .false. - print '(a)', 'input1d layer has no input layer shape.. failed' + write(stderr, '(a)') 'input1d layer has no input layer shape.. failed' end if call test_layer % get_output(output) if (.not. all(output == 0)) then ok = .false. - print '(a)', 'input1d layer values are all initialized to 0.. failed' + write(stderr, '(a)') 'input1d layer values are all initialized to 0.. failed' end if select type(input_layer => test_layer % p); type is(input1d_layer) @@ -43,9 +44,14 @@ program test_input1d_layer if (.not. all(output == [1., 2., 3.])) then ok = .false. - print '(a)', 'input1d layer can have its values set.. failed' + write(stderr, '(a)') 'input1d layer can have its values set.. failed' end if - if (ok) print '(a)', 'test_input1d_layer: All tests passed.' + if (ok) then + print '(a)', 'test_dense_layer: All tests passed.' + else + write(stderr, '(a)') 'test_dense_layer: One or more tests failed.' + stop 1 + end if end program test_input1d_layer From eb06b9690a5e47ed0eea249ad1ae846b2607fcb5 Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Fri, 29 Apr 2022 13:42:35 -0400 Subject: [PATCH 05/13] Update CMake build for refactor --- CMakeLists.txt | 74 +++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 412d245f..c9b3d150 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,24 +13,6 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "release") endif() -# handle integer size -if(INT) - message(STATUS "Configuring build for ${INT}-bit integers") - add_definitions(-DINT${INT}) -else() - message(STATUS "Configuring build for 32-bit integers") - add_definitions(-DINT32) -endif() - -# handle real size -if(REAL) - message(STATUS "Configuring build for ${REAL}-bit reals") - add_definitions(-DREAL${REAL}) -else() - message(STATUS "Configuring build for 32-bit reals") - add_definitions(-DREAL32) -endif() - if(SERIAL) message(STATUS "Configuring build for serial execution") else() @@ -51,7 +33,6 @@ if(CMAKE_Fortran_COMPILER_ID MATCHES GNU) message(STATUS "Configuring build to use BLAS from ${BLAS}") endif() - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp") set(CMAKE_Fortran_FLAGS_DEBUG "-O0 -g -C -fbacktrace") set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -ffast-math") endif() @@ -64,7 +45,7 @@ if(CMAKE_Fortran_COMPILER_ID MATCHES Intel) set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -coarray=single") endif() - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fpp -assume byterecl,realloc_lhs -heap-arrays") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -assume byterecl") set(CMAKE_Fortran_FLAGS_DEBUG "-O0 -g -C -traceback") set(CMAKE_Fortran_FLAGS_RELEASE "-O3") @@ -83,21 +64,34 @@ endif() # library to archive (libneural.a) add_library(neural - src/mod_activation.f90 - src/mod_activation_submodule.f90 - src/mod_io.f90 - src/mod_io_submodule.f90 - src/mod_kinds.f90 - src/mod_layer.f90 - src/mod_layer_submodule.f90 - src/mod_mnist.f90 - src/mod_mnist_submodule.f90 - src/mod_network.f90 - src/mod_network_submodule.f90 - src/mod_parallel.f90 - src/mod_parallel_submodule.f90 - src/mod_random.f90 - src/mod_random_submodule.f90 + src/nf_activation.f90 + src/nf_base_layer.f90 + src/nf_base_layer_submodule.f90 + src/nf_conv2d_layer.f90 + src/nf_datasets_mnist.f90 + src/nf_datasets_mnist_submodule.f90 + src/nf_dense_layer.f90 + src/nf_dense_layer_submodule.f90 + src/nf.f90 + src/nf_input1d_layer.f90 + src/nf_input1d_layer_submodule.f90 + src/nf_input3d_layer.f90 + src/nf_input3d_layer_submodule.f90 + src/nf_io.f90 + src/nf_io_submodule.f90 + src/nf_layer_constructors.f90 + src/nf_layer_constructors_submodule.f90 + src/nf_layer.f90 + src/nf_layer_submodule.f90 + src/nf_loss.f90 + src/nf_loss_submodule.f90 + src/nf_network.f90 + src/nf_network_submodule.f90 + src/nf_optimizers.f90 + src/nf_parallel.f90 + src/nf_parallel_submodule.f90 + src/nf_random.f90 + src/nf_random_submodule.f90 ) # Remove leading or trailing whitespace @@ -105,14 +99,14 @@ string(REGEX REPLACE "^ | $" "" LIBS "${LIBS}") # tests enable_testing() -foreach(execid mnist network_save network_sync set_activation_function) +foreach(execid dense_layer input1d_layer) add_executable(test_${execid} test/test_${execid}.f90) target_link_libraries(test_${execid} neural ${LIBS}) add_test(test_${execid} bin/test_${execid}) endforeach() -foreach(execid mnist mnist_epochs save_and_load simple sine) - add_executable(example_${execid} example/example_${execid}.f90) - target_link_libraries(example_${execid} neural ${LIBS}) - add_test(example_${execid} bin/example_${execid}) +foreach(execid mnist simple sine) + add_executable(${execid} example/${execid}.f90) + target_link_libraries(${execid} neural ${LIBS}) + #add_test(example_${execid} bin/example_${execid}) endforeach() From d0067553aba5facf4c0bd84d596cf6708d69c625 Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Fri, 29 Apr 2022 13:42:49 -0400 Subject: [PATCH 06/13] Bump version --- fpm.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fpm.toml b/fpm.toml index a3caa168..c5b32417 100644 --- a/fpm.toml +++ b/fpm.toml @@ -1,5 +1,5 @@ name = "neural-fortran" -version = "0.2.0" +version = "0.3.0" license = "MIT" author = "Milan Curcic" maintainer = "milancurcic@hey.com" From 65a46c543e626d71bfb73e169d67bf8674764cdc Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Fri, 29 Apr 2022 13:43:45 -0400 Subject: [PATCH 07/13] Remove travis CI stuff; not used anymore --- .travis.yml | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 963a464c..00000000 --- a/.travis.yml +++ /dev/null @@ -1,28 +0,0 @@ -language: c -group: travis_latest -dist: xenial - -git: - depth: 3 - quiet: true - -addons: - apt: - sources: ubuntu-toolchain-r-test - packages: gfortran-8 - -matrix: - include: - - os: linux - env: FC=gfortran-8 - -before_install: -- cd data/mnist && tar xzvf mnist.tar.gz && cd - - -install: -- mkdir build -- cd build -- cmake .. -DSERIAL=1 -- make - -script: ctest --output-on-failure From 52f3472d24ec78e46f89e79759a25a45f45d804e Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Fri, 29 Apr 2022 13:44:04 -0400 Subject: [PATCH 08/13] Remove travis badge --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 1233bf5e..f91db401 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,5 @@ # neural-fortran -[![Build Status](https://travis-ci.org/modern-fortran/neural-fortran.svg?branch=master)](https://travis-ci.org/modern-fortran/neural-fortran) [![GitHub issues](https://img.shields.io/github/issues/modern-fortran/neural-fortran.svg)](https://github.com/modern-fortran/neural-fortran/issues) A parallel neural net microframework. From c872ca89a30ef6537514f62921c6f8fa8862ab18 Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Fri, 29 Apr 2022 14:18:00 -0400 Subject: [PATCH 09/13] Update README --- README.md | 246 +++--------------------------------------------------- 1 file changed, 11 insertions(+), 235 deletions(-) diff --git a/README.md b/README.md index f91db401..a1c11c71 100644 --- a/README.md +++ b/README.md @@ -10,12 +10,7 @@ Read the paper [here](https://arxiv.org/abs/1902.06714). - [Building with fpm](https://github.com/modern-fortran/neural-fortran#building-with-fpm) - [Building with CMake](https://github.com/modern-fortran/neural-fortran#building-with-cmake) * [Examples](https://github.com/modern-fortran/neural-fortran#examples) - - [Creating a network](https://github.com/modern-fortran/neural-fortran#creating-a-network) - - [Training the network](https://github.com/modern-fortran/neural-fortran#training-the-network) - - [Saving and loading from file](https://github.com/modern-fortran/neural-fortran#saving-and-loading-from-file) - - [MNIST training example](https://github.com/modern-fortran/neural-fortran#mnist-training-example) * [API documentation](https://github.com/modern-fortran/neural-fortran#api-documentation) -* [Contributing](https://github.com/modern-fortran/neural-fortran#contributing) * [Contributors](https://github.com/modern-fortran/neural-fortran#contributors) * [Related projects](https://github.com/modern-fortran/neural-fortran#related-projects) @@ -25,7 +20,6 @@ Read the paper [here](https://arxiv.org/abs/1902.06714). * Backprop with Mean Square Error cost function * Data-based parallelism * Several activation functions -* Support for 32, 64, and 128-bit floating point numbers ## Getting started @@ -44,7 +38,7 @@ Dependencies: Compilers tested include: -* gfortran-10.3.0 +* gfortran-9.4.0 * ifort-2021.4 * ifx-2021.4 @@ -53,7 +47,7 @@ Compilers tested include: #### Building in serial mode ``` -fpm build --flag "-cpp -O3 -ffast-math -fcoarray=single" +fpm build ``` #### Building in parallel mode @@ -64,13 +58,13 @@ Once installed, use the compiler wrappers `caf` and `cafrun` to build and execut in parallel, respectively: ``` -fpm build --compiler caf --flag "-cpp -O3 -ffast-math" +fpm build --compiler caf ``` #### Testing with fpm ``` -fpm test --flag "-cpp -O3 -ffast-math -fcoarray=single" +fpm test ``` For the time being, you need to specify the same compiler flags to `fpm test` @@ -103,7 +97,7 @@ in parallel, respectively: ``` FC=caf cmake .. make -cafrun -n 4 bin/example_mnist # run MNIST example on 4 cores +cafrun -n 4 bin/mnist # run MNIST example on 4 cores ``` #### Building with a different compiler @@ -128,22 +122,6 @@ where the value of `-DBLAS` should point to the desired BLAS implementation, which has to be available in the linking path. This option is currently available only with gfortran. -#### Building in double or quad precision - -By default, neural-fortran is built in single precision mode -(32-bit floating point numbers). Alternatively, you can configure to build -in 64 or 128-bit floating point mode: - -``` -cmake .. -DREAL=64 -``` - -or - -``` -cmake .. -DREAL=128 -``` - #### Building in debug mode To build with debugging flags enabled, type: @@ -164,202 +142,12 @@ to run the tests. ## Examples -### Creating a network - -Creating a network with 3 layers, -one input, one hidden, and one output layer, -with 3, 5, and 2 neurons each: - -```fortran -use mod_network, only: network_type -type(network_type) :: net -net = network_type([3, 5, 2]) -``` - -### Setting the activation function - -By default, the network will be initialized with the sigmoid activation -function for all layers. You can specify a different activation function: - -```fortran -net = network_type([3, 5, 2], activation='tanh') -``` - -or set it after the fact: - -```fortran -net = network_type([3, 5, 2]) -call net % set_activation('tanh') -``` - -It's possible to set different activation functions for each layer. -For example, this snippet will create a network with a Gaussian -activation functions for all layers except the output layer, -and a RELU function for the output layer: - -```fortran -net = network_type([3, 5, 2], activation='gaussian') -call net % layers(3) % set_activation('relu') -``` - -Available activation function options are: `gaussian`, `relu`, `sigmoid`, -`step`, and `tanh`. -See [mod_activation.f90](https://github.com/modern-fortran/neural-fortran/blob/master/src/lib/mod_activation.f90) -for specifics. - -### Training the network - -To train the network, pass the training input and output data sample, -and a learning rate, to `net % train()`: - -```fortran -program example_simple - use mod_network, only: network_type - implicit none - type(network_type) :: net - real, allocatable :: input(:), output(:) - integer :: i - net = network_type([3, 5, 2]) - input = [0.2, 0.4, 0.6] - output = [0.123456, 0.246802] - do i = 1, 500 - call net % train(input, output, eta=1.0) - print *, 'Iteration: ', i, 'Output:', net % output(input) - end do -end program example_simple -``` - -The size of `input` and `output` arrays must match the sizes of the -input and output layers, respectively. The learning rate `eta` determines -how quickly are weights and biases updated. - -The output is: - -``` - Iteration: 1 Output: 0.470592350 0.764851630 - Iteration: 2 Output: 0.409876496 0.713752568 - Iteration: 3 Output: 0.362703383 0.654729187 - ... - Iteration: 500 Output: 0.123456128 0.246801868 -``` - -The initial values will vary between runs because we initialize weights -and biases randomly. - -### Saving and loading from file - -To save a network to a file, do: - -```fortran -call net % save('my_net.txt') -``` - -Loading from file works the same way: - -```fortran -call net % load('my_net.txt') -``` - -### Synchronizing networks in parallel mode +The easiest way to get a sense of how to use neural-fortran is to look at +examples, in increasing level of complexity: -When running in parallel mode, you may need to synchronize the weights -and biases between images. You can do it like this: - -```fortran -call net % sync(1) -``` - -The argument to `net % sync()` refers to the source image from which to -broadcast. It can be any positive number not greater than `num_images()`. - -### MNIST training example - -Here's the complete program: - -```fortran -program example_mnist - - ! A training example with the MNIST dataset. - ! Uses stochastic gradient descent and mini-batch size of 100. - ! Can be run in serial or parallel mode without modifications. - - use mod_kinds, only: ik, rk - use mod_mnist, only: label_digits, load_mnist - use mod_network, only: network_type - - implicit none - - real(rk), allocatable :: tr_images(:,:), tr_labels(:) - real(rk), allocatable :: te_images(:,:), te_labels(:) - real(rk), allocatable :: input(:,:), output(:,:) - - type(network_type) :: net - - integer(ik) :: i, n, num_epochs - integer(ik) :: batch_size, batch_start, batch_end - real(rk) :: pos - - call load_mnist(tr_images, tr_labels, te_images, te_labels) - - net = network_type([784, 30, 10]) - - batch_size = 100 - num_epochs = 10 - - if (this_image() == 1) print '(a,f5.2,a)', 'Initial accuracy: ', & - net % accuracy(te_images, label_digits(te_labels)) * 100, ' %' - - epochs: do n = 1, num_epochs - batches: do i = 1, size(tr_labels) / batch_size - - ! pull a random mini-batch from the dataset - call random_number(pos) - batch_start = int(pos * (size(tr_labels) - batch_size + 1)) - batch_end = batch_start + batch_size - 1 - - ! prepare mini-batch - input = tr_images(:,batch_start:batch_end) - output = label_digits(tr_labels(batch_start:batch_end)) - - ! train the network on the mini-batch - call net % train(input, output, eta=3._rk) - - end do batches - - if (this_image() == 1) print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', & - net % accuracy(te_images, label_digits(te_labels)) * 100, ' %' - - end do epochs - -end program example_mnist -``` - -The MNIST data will be automatically downloaded at the first attempt at loading it -with the `load_mnist` subroutine. - -Running the program will report the accuracy after each epoch: - -``` -$ ./example_mnist -Initial accuracy: 10.32 % -Epoch 1 done, Accuracy: 91.06 % -Epoch 2 done, Accuracy: 92.35 % -Epoch 3 done, Accuracy: 93.32 % -Epoch 4 done, Accuracy: 93.62 % -Epoch 5 done, Accuracy: 93.97 % -Epoch 6 done, Accuracy: 94.16 % -Epoch 7 done, Accuracy: 94.42 % -Epoch 8 done, Accuracy: 94.55 % -Epoch 9 done, Accuracy: 94.67 % -Epoch 10 done, Accuracy: 94.81 % -``` - -You can also run this example without any modifications in parallel, -for example on 16 cores using [OpenCoarrays](https://github.com/sourceryinstitute/OpenCoarrays): - -``` -$ cafrun -n 16 ./example_mnist -``` +1. [simple](example/simple.f90): Approximating a simple, constant data relationship +2. [sine](example/sine.f90): Approximating a sine function +3. [mnist](example/mnist.f90): Hand-written digit recognition using the MNIST dataset ## API documentation @@ -373,23 +161,11 @@ ford ford.md from the neural-fortran top-level directory to generate the API documentation in doc/html. Point your browser to doc/html/index.html to read it. -## Contributing - -neural-fortran is currently a proof-of-concept with potential for -use in production. Contributions are welcome, especially for: - -* Expanding the network class to other network infrastructures -* Adding other cost functions such as cross-entropy. -* Model-based (`matmul`) parallelism -* Adding more examples -* Others? - -You can start at the list of open [issues](https://github.com/modern-fortran/neural-fortran/issues). - ## Contributors Thanks to all open-source contributors to neural-fortran: +* [@awvwgk](https://github.com/awvwgk) * [@ivan-pi](https://github.com/ivan-pi) * [@jvdp1](https://github.com/jvdp1) * [@milancurcic](https://github.com/milancurcic) From 3702c20a32d2c714e9d6979bb2d2772b1095d3ae Mon Sep 17 00:00:00 2001 From: milancurcic Date: Sun, 1 May 2022 16:29:12 -0400 Subject: [PATCH 10/13] Fix test name in report --- test/test_input1d_layer.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_input1d_layer.f90 b/test/test_input1d_layer.f90 index 4404348d..f6a50369 100644 --- a/test/test_input1d_layer.f90 +++ b/test/test_input1d_layer.f90 @@ -48,9 +48,9 @@ program test_input1d_layer end if if (ok) then - print '(a)', 'test_dense_layer: All tests passed.' + print '(a)', 'test_input1d_layer: All tests passed.' else - write(stderr, '(a)') 'test_dense_layer: One or more tests failed.' + write(stderr, '(a)') 'test_input1d_layer: One or more tests failed.' stop 1 end if From 3d2b14814ff1697e1b4b3db6d31f8dc12095aa9c Mon Sep 17 00:00:00 2001 From: milancurcic Date: Mon, 2 May 2022 21:47:10 -0400 Subject: [PATCH 11/13] Fix indentation --- src/nf_dense_layer_submodule.f90 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/nf_dense_layer_submodule.f90 b/src/nf_dense_layer_submodule.f90 index ef6aefcf..543b86cd 100644 --- a/src/nf_dense_layer_submodule.f90 +++ b/src/nf_dense_layer_submodule.f90 @@ -24,11 +24,11 @@ pure module subroutine backward(self, input, gradient) real :: db(self % output_size) real :: dw(self % input_size, self % output_size) - db = gradient * self % activation_prime(self % z) - dw = matmul(reshape(input, [size(input), 1]), reshape(db, [1, size(db)])) - self % gradient = matmul(self % weights, db) - self % dw = self % dw + dw - self % db = self % db + db + db = gradient * self % activation_prime(self % z) + dw = matmul(reshape(input, [size(input), 1]), reshape(db, [1, size(db)])) + self % gradient = matmul(self % weights, db) + self % dw = self % dw + dw + self % db = self % db + db end subroutine backward From 6ccb92ac210fc2e6241de39c1b4bbf5f3c6e9274 Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Tue, 3 May 2022 11:55:57 -0400 Subject: [PATCH 12/13] Update build instructions for -fno-frontend-optimize --- CMakeLists.txt | 4 ++-- README.md | 27 +++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c9b3d150..42847f3e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,8 +33,8 @@ if(CMAKE_Fortran_COMPILER_ID MATCHES GNU) message(STATUS "Configuring build to use BLAS from ${BLAS}") endif() - set(CMAKE_Fortran_FLAGS_DEBUG "-O0 -g -C -fbacktrace") - set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -ffast-math") + set(CMAKE_Fortran_FLAGS_DEBUG "-O0 -g -fcheck=bounds -fbacktrace") + set(CMAKE_Fortran_FLAGS_RELEASE "-Ofast -fno-frontend-optimize") endif() # compiler flags for ifort diff --git a/README.md b/README.md index a1c11c71..b12e3708 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,23 @@ Compilers tested include: fpm build ``` +By default, without specifying the build profile, fpm will build neural-fortran +using the debug compiler flags, and without optimization. +To build optimized code, build with the release profile: + +``` +fpm build --profile release +``` + +If you're using GFortran, make sure to also pass an additional flag: + +``` +fpm build --profile release --flag "-fno-frontend-optimize" +``` + +The `-fno-frontend-optimize` disables some optimizations that may be harmful +when building neural-fortran. + #### Building in parallel mode If you use GFortran and want to run neural-fortran in parallel, @@ -58,7 +75,7 @@ Once installed, use the compiler wrappers `caf` and `cafrun` to build and execut in parallel, respectively: ``` -fpm build --compiler caf +fpm build --compiler caf --profile release --flag "-fno-frontend-optimize" ``` #### Testing with fpm @@ -68,7 +85,7 @@ fpm test ``` For the time being, you need to specify the same compiler flags to `fpm test` -as you did in `fpm build` so that fpm can figure out to use the same build +as you did in `fpm build` so that fpm knows it should use the same build profile. See [Fortran Package Manager](https://github.com/fortran-lang/fpm) for more info on fpm. @@ -149,6 +166,12 @@ examples, in increasing level of complexity: 2. [sine](example/sine.f90): Approximating a sine function 3. [mnist](example/mnist.f90): Hand-written digit recognition using the MNIST dataset +The MNIST example uses [curl](https://curl.se/) to download the dataset, +so make sure you have it installed on your system. +Most Linux OSs have it out of the box. +The dataset will be downloaded only the first time you run the example in any +given directory. + ## API documentation API documentation can be generated with [FORD](https://github.com/Fortran-FOSS-Programmers/ford/). From 11c3e5ab2e20ae9806dc506895d410482783ec78 Mon Sep 17 00:00:00 2001 From: Milan Curcic Date: Tue, 3 May 2022 12:31:35 -0400 Subject: [PATCH 13/13] Add a few tests for a dense network --- CMakeLists.txt | 2 +- test/test_dense_network.f90 | 69 +++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 test/test_dense_network.f90 diff --git a/CMakeLists.txt b/CMakeLists.txt index 42847f3e..75d29ff6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,7 +99,7 @@ string(REGEX REPLACE "^ | $" "" LIBS "${LIBS}") # tests enable_testing() -foreach(execid dense_layer input1d_layer) +foreach(execid input1d_layer dense_layer dense_network) add_executable(test_${execid} test/test_${execid}.f90) target_link_libraries(test_${execid} neural ${LIBS}) add_test(test_${execid} bin/test_${execid}) diff --git a/test/test_dense_network.f90 b/test/test_dense_network.f90 new file mode 100644 index 00000000..9df7e71b --- /dev/null +++ b/test/test_dense_network.f90 @@ -0,0 +1,69 @@ +program test_dense_network + use iso_fortran_env, only: stderr => error_unit + use nf, only: dense, input, network + implicit none + type(network) :: net + logical :: ok = .true. + + ! Minimal 2-layer network + net = network([ & + input(1), & + dense(1) & + ]) + + if (.not. size(net % layers) == 2) then + write(stderr, '(a)') 'dense network should have 2 layers.. failed' + ok = .false. + end if + + if (.not. all(net % output([0.]) == 0.5)) then + write(stderr, '(a)') & + 'dense network should output exactly 0.5 for input 0.. failed' + ok = .false. + end if + + training: block + real :: x(1), y(1) + real :: tolerance = 1e-3 + integer :: n + integer, parameter :: num_iterations = 1000 + + x = [0.123] + y = [0.765] + + do n = 1, num_iterations + call net % forward(x) + call net % backward(y) + call net % update(1.) + if (all(abs(net % output(x) - y) < tolerance)) exit + end do + + if (.not. n <= num_iterations) then + write(stderr, '(a)') & + 'dense network should converge in simple training.. failed' + ok = .false. + end if + + end block training + + ! A bit larger multi-layer network + net = network([ & + input(784), & + dense(30), & + dense(20), & + dense(10) & + ]) + + if (.not. size(net % layers) == 4) then + write(stderr, '(a)') 'dense network should have 4 layers.. failed' + ok = .false. + end if + + if (ok) then + print '(a)', 'test_dense_network: All tests passed.' + else + write(stderr, '(a)') 'test_dense_network: One or more tests failed.' + stop 1 + end if + +end program test_dense_network