diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 963a464c..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-language: c
-group: travis_latest
-dist: xenial
-
-git:
-  depth: 3
-  quiet: true
-
-addons:
-  apt:
-    sources: ubuntu-toolchain-r-test
-    packages: gfortran-8
-
-matrix:
-  include:
-  - os: linux
-    env: FC=gfortran-8
-
-before_install:
-- cd data/mnist && tar xzvf mnist.tar.gz && cd -
-
-install:
-- mkdir build
-- cd build
-- cmake .. -DSERIAL=1 
-- make
-    
-script: ctest --output-on-failure
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 412d245f..75d29ff6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,24 +13,6 @@ if(NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE "release")
 endif()
 
-# handle integer size
-if(INT)
-  message(STATUS "Configuring build for ${INT}-bit integers")
-  add_definitions(-DINT${INT})
-else()
-  message(STATUS "Configuring build for 32-bit integers")
-  add_definitions(-DINT32)
-endif()
-
-# handle real size
-if(REAL)
-  message(STATUS "Configuring build for ${REAL}-bit reals")
-  add_definitions(-DREAL${REAL})
-else()
-  message(STATUS "Configuring build for 32-bit reals")
-  add_definitions(-DREAL32)
-endif()
-
 if(SERIAL)
   message(STATUS "Configuring build for serial execution")
 else()
@@ -51,9 +33,8 @@ if(CMAKE_Fortran_COMPILER_ID MATCHES GNU)
     message(STATUS "Configuring build to use BLAS from ${BLAS}")
   endif()
     
-  set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -cpp")
-  set(CMAKE_Fortran_FLAGS_DEBUG "-O0 -g -C -fbacktrace")
-  set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -ffast-math")
+  set(CMAKE_Fortran_FLAGS_DEBUG "-O0 -g -fcheck=bounds -fbacktrace")
+  set(CMAKE_Fortran_FLAGS_RELEASE "-Ofast -fno-frontend-optimize")
 endif()
 
 # compiler flags for ifort
@@ -64,7 +45,7 @@ if(CMAKE_Fortran_COMPILER_ID MATCHES Intel)
     set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -coarray=single")
   endif()
 
-  set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fpp -assume byterecl,realloc_lhs -heap-arrays")
+  set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -assume byterecl")
   set(CMAKE_Fortran_FLAGS_DEBUG "-O0 -g -C -traceback")
   set(CMAKE_Fortran_FLAGS_RELEASE "-O3")
 
@@ -83,21 +64,34 @@ endif()
 
 # library to archive (libneural.a)
 add_library(neural
-  src/mod_activation.f90
-  src/mod_activation_submodule.f90
-  src/mod_io.f90
-  src/mod_io_submodule.f90
-  src/mod_kinds.f90
-  src/mod_layer.f90
-  src/mod_layer_submodule.f90
-  src/mod_mnist.f90
-  src/mod_mnist_submodule.f90
-  src/mod_network.f90
-  src/mod_network_submodule.f90
-  src/mod_parallel.f90
-  src/mod_parallel_submodule.f90
-  src/mod_random.f90
-  src/mod_random_submodule.f90
+  src/nf_activation.f90
+  src/nf_base_layer.f90
+  src/nf_base_layer_submodule.f90
+  src/nf_conv2d_layer.f90
+  src/nf_datasets_mnist.f90
+  src/nf_datasets_mnist_submodule.f90
+  src/nf_dense_layer.f90
+  src/nf_dense_layer_submodule.f90
+  src/nf.f90
+  src/nf_input1d_layer.f90
+  src/nf_input1d_layer_submodule.f90
+  src/nf_input3d_layer.f90
+  src/nf_input3d_layer_submodule.f90
+  src/nf_io.f90
+  src/nf_io_submodule.f90
+  src/nf_layer_constructors.f90
+  src/nf_layer_constructors_submodule.f90
+  src/nf_layer.f90
+  src/nf_layer_submodule.f90
+  src/nf_loss.f90
+  src/nf_loss_submodule.f90
+  src/nf_network.f90
+  src/nf_network_submodule.f90
+  src/nf_optimizers.f90
+  src/nf_parallel.f90
+  src/nf_parallel_submodule.f90
+  src/nf_random.f90
+  src/nf_random_submodule.f90
 )
 
 # Remove leading or trailing whitespace
@@ -105,14 +99,14 @@ string(REGEX REPLACE "^ | $" "" LIBS "${LIBS}")
 
 # tests
 enable_testing()
-foreach(execid mnist network_save network_sync set_activation_function)
+foreach(execid input1d_layer dense_layer dense_network)
   add_executable(test_${execid} test/test_${execid}.f90)
   target_link_libraries(test_${execid} neural ${LIBS})
   add_test(test_${execid} bin/test_${execid})
 endforeach()
   
-foreach(execid mnist mnist_epochs save_and_load simple sine)
-  add_executable(example_${execid} example/example_${execid}.f90)
-  target_link_libraries(example_${execid} neural ${LIBS})
-  add_test(example_${execid} bin/example_${execid})
+foreach(execid mnist simple sine)
+  add_executable(${execid} example/${execid}.f90)
+  target_link_libraries(${execid} neural ${LIBS})
+  #add_test(example_${execid} bin/example_${execid})
 endforeach()
diff --git a/README.md b/README.md
index 27d943b0..b12e3708 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,5 @@
 # neural-fortran
 
-[![Build Status](https://travis-ci.org/modern-fortran/neural-fortran.svg?branch=master)](https://travis-ci.org/modern-fortran/neural-fortran)
 [![GitHub issues](https://img.shields.io/github/issues/modern-fortran/neural-fortran.svg)](https://github.com/modern-fortran/neural-fortran/issues)
 
 A parallel neural net microframework. 
@@ -11,12 +10,7 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
   - [Building with fpm](https://github.com/modern-fortran/neural-fortran#building-with-fpm)
   - [Building with CMake](https://github.com/modern-fortran/neural-fortran#building-with-cmake)
 * [Examples](https://github.com/modern-fortran/neural-fortran#examples)
-  - [Creating a network](https://github.com/modern-fortran/neural-fortran#creating-a-network)
-  - [Training the network](https://github.com/modern-fortran/neural-fortran#training-the-network)
-  - [Saving and loading from file](https://github.com/modern-fortran/neural-fortran#saving-and-loading-from-file)
-  - [MNIST training example](https://github.com/modern-fortran/neural-fortran#mnist-training-example)
 * [API documentation](https://github.com/modern-fortran/neural-fortran#api-documentation)
-* [Contributing](https://github.com/modern-fortran/neural-fortran#contributing)
 * [Contributors](https://github.com/modern-fortran/neural-fortran#contributors)
 * [Related projects](https://github.com/modern-fortran/neural-fortran#related-projects)
 
@@ -26,7 +20,6 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
 * Backprop with Mean Square Error cost function
 * Data-based parallelism
 * Several activation functions
-* Support for 32, 64, and 128-bit floating point numbers
 
 ## Getting started
 
@@ -54,9 +47,26 @@ Compilers tested include:
 #### Building in serial mode
 
 ```
-fpm build --flag "-cpp -O3 -ffast-math -fcoarray=single"
+fpm build
 ```
 
+By default, without specifying the build profile, fpm will build neural-fortran
+using the debug compiler flags, and without optimization.
+To build optimized code, build with the release profile:
+
+```
+fpm build --profile release
+```
+
+If you're using GFortran, make sure to also pass an additional flag:
+
+```
+fpm build --profile release --flag "-fno-frontend-optimize"
+```
+
+The `-fno-frontend-optimize` disables some optimizations that may be harmful
+when building neural-fortran.
+
 #### Building in parallel mode
 
 If you use GFortran and want to run neural-fortran in parallel,
@@ -65,17 +75,17 @@ Once installed, use the compiler wrappers `caf` and `cafrun` to build and execut
 in parallel, respectively:
 
 ```
-fpm build --compiler caf --flag "-cpp -O3 -ffast-math"
+fpm build --compiler caf --profile release --flag "-fno-frontend-optimize"
 ```
 
 #### Testing with fpm
 
 ```
-fpm test --flag "-cpp -O3 -ffast-math -fcoarray=single"
+fpm test
 ```
 
 For the time being, you need to specify the same compiler flags to `fpm test`
-as you did in `fpm build` so that fpm can figure out to use the same build
+as you did in `fpm build` so that fpm knows it should use the same build
 profile.
 
 See [Fortran Package Manager](https://github.com/fortran-lang/fpm) for more info on fpm.
@@ -104,7 +114,7 @@ in parallel, respectively:
 ```
 FC=caf cmake ..
 make
-cafrun -n 4 bin/example_mnist # run MNIST example on 4 cores
+cafrun -n 4 bin/mnist # run MNIST example on 4 cores
 ```
 
 #### Building with a different compiler
@@ -129,22 +139,6 @@ where the value of `-DBLAS` should point to the desired BLAS implementation,
 which has to be available in the linking path.
 This option is currently available only with gfortran.
 
-#### Building in double or quad precision
-
-By default, neural-fortran is built in single precision mode
-(32-bit floating point numbers). Alternatively, you can configure to build
-in 64 or 128-bit floating point mode:
-
-```
-cmake .. -DREAL=64
-```
-
-or
-
-```
-cmake .. -DREAL=128
-```
-
 #### Building in debug mode
 
 To build with debugging flags enabled, type:
@@ -165,202 +159,18 @@ to run the tests.
 
 ## Examples
 
-### Creating a network
-
-Creating a network with 3 layers,
-one input, one hidden, and one output layer,
-with 3, 5, and 2 neurons each:
-
-```fortran
-use mod_network, only: network_type
-type(network_type) :: net
-net = network_type([3, 5, 2])
-```
-
-### Setting the activation function
-
-By default, the network will be initialized with the sigmoid activation
-function for all layers. You can specify a different activation function:
-
-```fortran
-net = network_type([3, 5, 2], activation='tanh')
-```
-
-or set it after the fact:
-
-```fortran
-net = network_type([3, 5, 2])
-call net % set_activation('tanh')
-```
-
-It's possible to set different activation functions for each layer.
-For example, this snippet will create a network with a Gaussian
-activation functions for all layers except the output layer,
-and a RELU function for the output layer:
-
-```fortran
-net = network_type([3, 5, 2], activation='gaussian')
-call net % layers(3) % set_activation('relu')
-```
-
-Available activation function options are: `gaussian`, `relu`, `sigmoid`,
-`step`, and `tanh`.
-See [mod_activation.f90](https://github.com/modern-fortran/neural-fortran/blob/master/src/lib/mod_activation.f90)
-for specifics.
-
-### Training the network
-
-To train the network, pass the training input and output data sample,
-and a learning rate, to `net % train()`:
-
-```fortran
-program example_simple
-  use mod_network, only: network_type
-  implicit none
-  type(network_type) :: net
-  real, allocatable :: input(:), output(:)
-  integer :: i
-  net = network_type([3, 5, 2])
-  input = [0.2, 0.4, 0.6]
-  output = [0.123456, 0.246802]
-  do i = 1, 500
-    call net % train(input, output, eta=1.0)
-    print *, 'Iteration: ', i, 'Output:', net % output(input)
-  end do
-end program example_simple
-```
-
-The size of `input` and `output` arrays must match the sizes of the
-input and output layers, respectively. The learning rate `eta` determines
-how quickly are weights and biases updated.
-
-The output is:
-
-```
- Iteration:            1 Output:  0.470592350      0.764851630    
- Iteration:            2 Output:  0.409876496      0.713752568    
- Iteration:            3 Output:  0.362703383      0.654729187  
- ...
- Iteration:          500 Output:  0.123456128      0.246801868
-```
-
-The initial values will vary between runs because we initialize weights
-and biases randomly.
-
-### Saving and loading from file
-
-To save a network to a file, do:
-
-```fortran
-call net % save('my_net.txt')
-```
-
-Loading from file works the same way:
-
-```fortran
-call net % load('my_net.txt')
-```
-
-### Synchronizing networks in parallel mode
-
-When running in parallel mode, you may need to synchronize the weights
-and biases between images. You can do it like this:
-
-```fortran
-call net % sync(1)
-```
-
-The argument to `net % sync()` refers to the source image from which to
-broadcast. It can be any positive number not greater than `num_images()`.
-
-### MNIST training example
-
-Here's the complete program:
-
-```fortran
-program example_mnist
-
-  ! A training example with the MNIST dataset.
-  ! Uses stochastic gradient descent and mini-batch size of 100.
-  ! Can be run in serial or parallel mode without modifications.
-
-  use mod_kinds, only: ik, rk
-  use mod_mnist, only: label_digits, load_mnist
-  use mod_network, only: network_type
-
-  implicit none
-
-  real(rk), allocatable :: tr_images(:,:), tr_labels(:)
-  real(rk), allocatable :: te_images(:,:), te_labels(:)
-  real(rk), allocatable :: input(:,:), output(:,:)
-
-  type(network_type) :: net
-
-  integer(ik) :: i, n, num_epochs
-  integer(ik) :: batch_size, batch_start, batch_end
-  real(rk) :: pos
-
-  call load_mnist(tr_images, tr_labels, te_images, te_labels)
-
-  net = network_type([784, 30, 10])
-
-  batch_size = 100
-  num_epochs = 10
+The easiest way to get a sense of how to use neural-fortran is to look at
+examples, in increasing level of complexity:
 
-  if (this_image() == 1) print '(a,f5.2,a)', 'Initial accuracy: ', &
-    net % accuracy(te_images, label_digits(te_labels)) * 100, ' %'
+1. [simple](example/simple.f90): Approximating a simple, constant data relationship
+2. [sine](example/sine.f90): Approximating a sine function
+3. [mnist](example/mnist.f90): Hand-written digit recognition using the MNIST dataset
 
-  epochs: do n = 1, num_epochs
-    batches: do i = 1, size(tr_labels) / batch_size
-
-      ! pull a random mini-batch from the dataset
-      call random_number(pos)
-      batch_start = int(pos * (size(tr_labels) - batch_size + 1))
-      batch_end = batch_start + batch_size - 1
-
-      ! prepare mini-batch
-      input = tr_images(:,batch_start:batch_end)
-      output = label_digits(tr_labels(batch_start:batch_end))
-
-      ! train the network on the mini-batch
-      call net % train(input, output, eta=3._rk)
-
-    end do batches
-
-    if (this_image() == 1) print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', &
-      net % accuracy(te_images, label_digits(te_labels)) * 100, ' %'
-
-  end do epochs
-
-end program example_mnist
-```
-
-The MNIST data will be automatically downloaded at the first attempt at loading it
-with the `load_mnist` subroutine.
-
-Running the program will report the accuracy after each epoch:
-
-```
-$ ./example_mnist
-Initial accuracy: 10.32 %
-Epoch  1 done, Accuracy: 91.06 %
-Epoch  2 done, Accuracy: 92.35 %
-Epoch  3 done, Accuracy: 93.32 %
-Epoch  4 done, Accuracy: 93.62 %
-Epoch  5 done, Accuracy: 93.97 %
-Epoch  6 done, Accuracy: 94.16 %
-Epoch  7 done, Accuracy: 94.42 %
-Epoch  8 done, Accuracy: 94.55 %
-Epoch  9 done, Accuracy: 94.67 %
-Epoch 10 done, Accuracy: 94.81 %
-```
-
-You can also run this example without any modifications in parallel,
-for example on 16 cores using [OpenCoarrays](https://github.com/sourceryinstitute/OpenCoarrays):
-
-```
-$ cafrun -n 16 ./example_mnist
-```
+The MNIST example uses [curl](https://curl.se/) to download the dataset,
+so make sure you have it installed on your system.
+Most Linux OSs have it out of the box.
+The dataset will be downloaded only the first time you run the example in any
+given directory.
 
 ## API documentation
 
@@ -374,23 +184,11 @@ ford ford.md
 from the neural-fortran top-level directory to generate the API documentation in doc/html.
 Point your browser to doc/html/index.html to read it.
 
-## Contributing
-
-neural-fortran is currently a proof-of-concept with potential for
-use in production. Contributions are welcome, especially for:
-
-* Expanding the network class to other network infrastructures
-* Adding other cost functions such as cross-entropy.
-* Model-based (`matmul`) parallelism
-* Adding more examples
-* Others?
-
-You can start at the list of open [issues](https://github.com/modern-fortran/neural-fortran/issues).
-
 ## Contributors
 
 Thanks to all open-source contributors to neural-fortran:
 
+* [@awvwgk](https://github.com/awvwgk)
 * [@ivan-pi](https://github.com/ivan-pi)
 * [@jvdp1](https://github.com/jvdp1)
 * [@milancurcic](https://github.com/milancurcic)
diff --git a/example/example_mnist.f90 b/example/example_mnist.f90
deleted file mode 100644
index 1192e07e..00000000
--- a/example/example_mnist.f90
+++ /dev/null
@@ -1,55 +0,0 @@
-program example_mnist
-
-  ! A training example with the MNIST dataset.
-  ! Uses stochastic gradient descent and mini-batch size of 100.
-  ! Can be run in serial or parallel mode without modifications.
-
-  use mod_kinds, only: ik, rk
-  use mod_mnist, only: label_digits, load_mnist
-  use mod_network, only: network_type
-
-  implicit none
-
-  real(rk), allocatable :: tr_images(:,:), tr_labels(:)
-  real(rk), allocatable :: te_images(:,:), te_labels(:)
-  real(rk), allocatable :: input(:,:), output(:,:)
-
-  type(network_type) :: net
-
-  integer(ik) :: i, n, num_epochs
-  integer(ik) :: batch_size, batch_start, batch_end
-  real(rk) :: pos
-
-  call load_mnist(tr_images, tr_labels, te_images, te_labels)
-
-  net = network_type([784, 30, 10])
-
-  batch_size = 100
-  num_epochs = 10
-
-  if (this_image() == 1) print '(a,f5.2,a)', 'Initial accuracy: ', &
-    net % accuracy(te_images, label_digits(te_labels)) * 100, ' %'
-
-  epochs: do n = 1, num_epochs
-    batches: do i = 1, size(tr_labels) / batch_size
-
-      ! pull a random mini-batch from the dataset
-      call random_number(pos)
-      batch_start = int(pos * (size(tr_labels) - batch_size + 1))
-      batch_end = batch_start + batch_size - 1
-
-      ! prepare mini-batch
-      input = tr_images(:,batch_start:batch_end)
-      output = label_digits(tr_labels(batch_start:batch_end))
-
-      ! train the network on the mini-batch
-      call net % train(input, output, eta=3._rk)
-
-    end do batches
-
-    if (this_image() == 1) print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', &
-      net % accuracy(te_images, label_digits(te_labels)) * 100, ' %'
-
-  end do epochs
-
-end program example_mnist
diff --git a/example/example_mnist_epochs.f90 b/example/example_mnist_epochs.f90
deleted file mode 100644
index 08ba04a8..00000000
--- a/example/example_mnist_epochs.f90
+++ /dev/null
@@ -1,36 +0,0 @@
-program example_mnist
-
-  ! A training example with the MNIST dataset.
-  ! Uses stochastic gradient descent and mini-batch size of 100.
-  ! Can be run in serial or parallel mode without modifications.
-
-  use mod_kinds, only: ik, rk
-  use mod_mnist, only: label_digits, load_mnist
-  use mod_network, only: network_type
-
-  implicit none
-
-  real(rk), allocatable :: tr_images(:,:), tr_labels(:)
-  real(rk), allocatable :: te_images(:,:), te_labels(:)
-
-  type(network_type) :: net
-
-  integer(ik) :: i, n, num_epochs
-  integer(ik) :: batch_size
-
-  call load_mnist(tr_images, tr_labels, te_images, te_labels)
-
-  net = network_type([size(tr_images, dim=1), 10, size(label_digits(tr_labels), dim=1)])
-
-  batch_size = 100
-  num_epochs = 10
-
-  if (this_image() == 1) print '(a,f5.2,a)', 'Initial accuracy: ', &
-    net % accuracy(te_images, label_digits(te_labels)) * 100, ' %'
-
-  call net % train(tr_images, label_digits(tr_labels), 3._rk, num_epochs, batch_size)
-   
-  if (this_image() == 1) print '(a,f5.2,a)', 'Epochs done, Accuracy: ', &
-    net % accuracy(te_images, label_digits(te_labels)) * 100, ' %'
-
-end program example_mnist
diff --git a/example/example_save_and_load.f90 b/example/example_save_and_load.f90
deleted file mode 100644
index f9ef7f1c..00000000
--- a/example/example_save_and_load.f90
+++ /dev/null
@@ -1,32 +0,0 @@
-program example_save_and_load
-
-  use mod_network, only: network_type
-  implicit none
-
-  type(network_type) :: net1, net2
-  real, allocatable :: input(:), output(:)
-  integer :: i
-
-  net1 = network_type([3, 5, 2])
-
-  input = [0.2, 0.4, 0.6]
-  output = [0.123456, 0.246802]
-
-  ! train network 1
-  do i = 1, 500
-    call net1 % train(input, output, eta=1.0)
-  end do
-
-  ! save network 1 to file
-  call net1 % save('my_simple_net.txt')
-
-  ! load network 2 from file 
-  !net2 = network_type([3, 5, 2])
-  call net2 % load('my_simple_net.txt')
-  call net2 % set_activation('sigmoid')
-
-  print *, 'Network 1 output: ', net1 % output(input) 
-  print *, 'Network 2 output: ', net2 % output(input)
-  print *, 'Outputs match: ', all(net1 % output(input) == net2 % output(input))
-
-end program example_save_and_load
diff --git a/example/example_simple.f90 b/example/example_simple.f90
deleted file mode 100644
index 6fe3ba01..00000000
--- a/example/example_simple.f90
+++ /dev/null
@@ -1,14 +0,0 @@
-program example_simple
-  use mod_network, only: network_type
-  implicit none
-  type(network_type) :: net
-  real, allocatable :: input(:), output(:)
-  integer :: i
-  net = network_type([3, 5, 2])
-  input = [0.2, 0.4, 0.6]
-  output = [0.123456, 0.246802]
-  do i = 1, 500
-    call net % train(input, output, eta=1.0)
-    print *, 'Iteration: ', i, 'Output:', net % output(input)
-  end do
-end program example_simple
diff --git a/example/example_sine.f90 b/example/example_sine.f90
deleted file mode 100644
index 1b5931c9..00000000
--- a/example/example_sine.f90
+++ /dev/null
@@ -1,18 +0,0 @@
-program example_sine
-  use mod_kinds, only: ik, rk
-  use mod_network, only: network_type
-  implicit none
-  type(network_type) :: net
-  real(rk) :: cumloss, x, y
-  real(rk), parameter :: pi = 4 * atan(1._rk)
-  integer(ik) :: i
-  net = network_type([1, 5, 1])
-  cumloss = 0
-  do i = 1, 1000000
-    call random_number(x)
-    y = (sin(x * 2 * pi) + 1) * 0.5
-    call net % train([x], [y], eta=10._rk)
-    cumloss = cumloss + net % loss([x], [y])
-    print *, i, cumloss / i
-  end do
-end program example_sine
diff --git a/example/mnist.f90 b/example/mnist.f90
new file mode 100644
index 00000000..c711f993
--- /dev/null
+++ b/example/mnist.f90
@@ -0,0 +1,63 @@
+program mnist
+  use nf, only: dense, input, network
+  use nf_datasets_mnist, only: label_digits, load_mnist
+  use nf_optimizers, only: sgd
+
+  implicit none
+
+  type(network) :: net
+  real, allocatable :: training_images(:,:), training_labels(:)
+  real, allocatable :: validation_images(:,:), validation_labels(:)
+  integer :: n, num_epochs
+
+  call load_mnist(training_images, training_labels, &
+                  validation_images, validation_labels)
+
+  print '("MNIST")'
+  print '(60("="))'
+
+  net = network([ &
+    input(784), &
+    dense(30), &
+    dense(10) &
+  ])
+  num_epochs = 10
+
+  call net % print_info()
+
+  if (this_image() == 1) &
+    print '(a,f5.2,a)', 'Initial accuracy: ', accuracy( &
+      net, validation_images, label_digits(validation_labels)) * 100, ' %'
+
+  epochs: do n = 1, num_epochs
+
+    call net % train( &
+      training_images, &
+      label_digits(training_labels), &
+      batch_size=100, &
+      epochs=1, &
+      optimizer=sgd(learning_rate=3.) &
+    )
+
+    if (this_image() == 1) &
+      print '(a,i2,a,f5.2,a)', 'Epoch ', n, ' done, Accuracy: ', accuracy( &
+        net, validation_images, label_digits(validation_labels)) * 100, ' %'
+
+  end do epochs
+
+contains
+
+  real function accuracy(net, x, y)
+    type(network), intent(in out) :: net
+    real, intent(in) :: x(:,:), y(:,:)
+    integer :: i, good
+    good = 0
+    do i = 1, size(x, dim=2)
+      if (all(maxloc(net % output(x(:,i))) == maxloc(y(:,i)))) then
+        good = good + 1
+      end if
+    end do
+    accuracy = real(good) / size(x, dim=2)
+  end function accuracy
+
+end program mnist
diff --git a/example/simple.f90 b/example/simple.f90
new file mode 100644
index 00000000..44c87051
--- /dev/null
+++ b/example/simple.f90
@@ -0,0 +1,34 @@
+program simple
+  use nf, only: dense, input, network
+  implicit none
+  type(network) :: net
+  real, allocatable :: x(:), y(:)
+  integer, parameter :: num_iterations = 500
+  integer :: n
+
+  print '("Simple")'
+  print '(60("="))'
+
+  net = network([ &
+    input(3), &
+    dense(5), &
+    dense(2) &
+  ])
+
+  call net % print_info()
+
+  x = [0.2, 0.4, 0.6]
+  y = [0.123456, 0.246802]
+
+  do n = 0, num_iterations
+
+    call net % forward(x)
+    call net % backward(y)
+    call net % update(1.)
+
+    if (mod(n, 50) == 0) &
+      print '(i4,2(3x,f8.6))', n, net % output(x)
+
+  end do
+
+end program simple
diff --git a/example/sine.f90 b/example/sine.f90
new file mode 100644
index 00000000..fd7483d9
--- /dev/null
+++ b/example/sine.f90
@@ -0,0 +1,43 @@
+program sine
+  use nf, only: dense, input, network
+  implicit none
+  type(network) :: net
+  real :: x(1), y(1)
+  real, parameter :: pi = 4 * atan(1.)
+  integer, parameter :: num_iterations = 100000
+  integer, parameter :: test_size = 30
+  real :: xtest(test_size), ytest(test_size), ypred(test_size)
+  integer :: i, n
+
+  print '("Sine training")'
+  print '(60("="))'
+
+  net = network([ &
+    input(1), &
+    dense(5), &
+    dense(1) &
+  ])
+
+  call net % print_info()
+
+  xtest = [((i - 1) * 2 * pi / test_size, i = 1, test_size)]
+  ytest = (sin(xtest) + 1) / 2
+
+  do n = 0, num_iterations
+
+    call random_number(x)
+    x = x * 2 * pi
+    y = (sin(x) + 1) / 2
+
+    call net % forward(x)
+    call net % backward(y)
+    call net % update(1.)
+
+    if (mod(n, 10000) == 0) then
+      ypred = [(net % output([xtest(i)]), i = 1, test_size)]
+      print '(i0,1x,f9.6)', n, sum((ypred - ytest)**2) / size(ypred)
+    end if
+
+  end do
+
+end program sine
diff --git a/fpm.toml b/fpm.toml
index a3caa168..c5b32417 100644
--- a/fpm.toml
+++ b/fpm.toml
@@ -1,5 +1,5 @@
 name = "neural-fortran"
-version = "0.2.0"
+version = "0.3.0"
 license = "MIT"
 author = "Milan Curcic"
 maintainer = "milancurcic@hey.com"
diff --git a/src/mod_activation.f90 b/src/mod_activation.f90
deleted file mode 100644
index 4aa19317..00000000
--- a/src/mod_activation.f90
+++ /dev/null
@@ -1,101 +0,0 @@
-module mod_activation
-
-  !! A collection of activation functions and their derivatives.
-
-  use mod_kinds, only: ik, rk
-
-  implicit none
-
-  private
-
-  public :: activation_function
-  public :: gaussian, gaussian_prime
-  public :: relu, relu_prime
-  public :: sigmoid, sigmoid_prime
-  public :: step, step_prime
-  public :: tanhf, tanh_prime
-
-  interface
-
-    pure function activation_function(x)
-      import :: rk
-      real(rk), intent(in) :: x(:)
-      real(rk) :: activation_function(size(x))
-    end function activation_function
-
-    pure module function gaussian(x) result(res)
-      !! Gaussian activation function.
-      implicit none
-      real(rk), intent(in) :: x(:)
-      real(rk) :: res(size(x))
-    end function gaussian
-
-    pure module function gaussian_prime(x) result(res)
-      !! First derivative of the Gaussian activation function.
-      implicit none
-      real(rk), intent(in) :: x(:)
-      real(rk) :: res(size(x))
-    end function gaussian_prime
-
-    pure module function relu(x) result(res)
-      !! REctified Linear Unit (RELU) activation function.
-      implicit none
-      real(rk), intent(in) :: x(:)
-      real(rk) :: res(size(x))
-    end function relu
-
-    pure module function relu_prime(x) result(res)
-      !! First derivative of the REctified Linear Unit (RELU) activation function.
-      implicit none
-      real(rk), intent(in) :: x(:)
-      real(rk) :: res(size(x))
-    end function relu_prime
-
-    pure module function sigmoid(x) result(res)
-      !! Sigmoid activation function.
-      implicit none
-      real(rk), intent(in) :: x(:)
-      real(rk) :: res(size(x))
-    end function sigmoid
-
-    pure module function sigmoid_prime(x) result(res)
-      !! First derivative of the sigmoid activation function.
-      implicit none
-      real(rk), intent(in) :: x(:)
-      real(rk) :: res(size(x))
-    end function sigmoid_prime
-
-    pure module function step(x) result(res)
-      !! Step activation function.
-      implicit none
-      real(rk), intent(in) :: x(:)
-      real(rk) :: res(size(x))
-    end function step
-
-    pure module function step_prime(x) result(res)
-      !! First derivative of the step activation function.
-      implicit none
-      real(rk), intent(in) :: x(:)
-      real(rk) :: res(size(x))
-    end function step_prime
-
-    pure module function tanhf(x) result(res)
-      !! Tangent hyperbolic activation function. 
-      !! Same as the intrinsic tanh, but must be 
-      !! defined here so that we can use procedure
-      !! pointer with it.
-      implicit none
-      real(rk), intent(in) :: x(:)
-      real(rk) :: res(size(x))
-    end function tanhf
-
-    pure module function tanh_prime(x) result(res)
-      !! First derivative of the tanh activation function.
-      implicit none
-      real(rk), intent(in) :: x(:)
-      real(rk) :: res(size(x))
-    end function tanh_prime
-
-  end interface
-
-end module mod_activation
diff --git a/src/mod_activation_submodule.f90 b/src/mod_activation_submodule.f90
deleted file mode 100644
index c01fa2f9..00000000
--- a/src/mod_activation_submodule.f90
+++ /dev/null
@@ -1,77 +0,0 @@
-submodule(mod_activation) mod_activation_submodule
-
-  !! A collection of activation functions and their derivatives.
-
-  implicit none
-
-contains
-
-  pure module function gaussian(x) result(res)
-    real(rk), intent(in) :: x(:)
-    real(rk) :: res(size(x))
-    res = exp(-x**2)
-  end function gaussian
-
-  pure module function gaussian_prime(x) result(res)
-    real(rk), intent(in) :: x(:)
-    real(rk) :: res(size(x))
-    res = -2 * x * gaussian(x)
-  end function gaussian_prime
-
-  pure module function relu(x) result(res)
-    real(rk), intent(in) :: x(:)
-    real(rk) :: res(size(x))
-    res = max(0., x)
-  end function relu
-
-  pure module function relu_prime(x) result(res)
-    real(rk), intent(in) :: x(:)
-    real(rk) :: res(size(x))
-    where (x > 0)
-      res = 1
-    elsewhere
-      res = 0
-    end where
-  end function relu_prime
-
-  pure module function sigmoid(x) result(res)
-    real(rk), intent(in) :: x(:)
-    real(rk) :: res(size(x))
-    res = 1 / (1 + exp(-x))
-  endfunction sigmoid
-
-  pure module function sigmoid_prime(x) result(res)
-    real(rk), intent(in) :: x(:)
-    real(rk) :: res(size(x))
-    res = sigmoid(x) * (1 - sigmoid(x))
-  end function sigmoid_prime
-
-  pure module function step(x) result(res)
-    real(rk), intent(in) :: x(:)
-    real(rk) :: res(size(x))
-    where (x > 0)
-      res = 1
-    elsewhere
-      res = 0
-    end where
-  end function step
-
-  pure module function step_prime(x) result(res)
-    real(rk), intent(in) :: x(:)
-    real(rk) :: res(size(x))
-    res = 0
-  end function step_prime
-
-  pure module function tanhf(x) result(res)
-    real(rk), intent(in) :: x(:)
-    real(rk) :: res(size(x))
-    res = tanh(x)
-  end function tanhf
-
-  pure module function tanh_prime(x) result(res)
-    real(rk), intent(in) :: x(:)
-    real(rk) :: res(size(x))
-    res = 1 - tanh(x)**2
-  end function tanh_prime
-
-end submodule mod_activation_submodule
diff --git a/src/mod_io.f90 b/src/mod_io.f90
deleted file mode 100644
index 0d40ad57..00000000
--- a/src/mod_io.f90
+++ /dev/null
@@ -1,29 +0,0 @@
-module mod_io
-
-  use mod_kinds, only: ik, rk
-
-  implicit none
-
-  private
-
-  public :: read_binary_file
-
-  interface read_binary_file
-
-    module subroutine read_binary_file_1d(filename, dtype, nrec, array)
-      implicit none
-      character(len=*), intent(in) :: filename
-      integer(ik), intent(in) :: dtype, nrec
-      real(rk), allocatable, intent(in out) :: array(:)
-    end subroutine read_binary_file_1d
-
-    module subroutine read_binary_file_2d(filename, dtype, dsize, nrec, array)
-      implicit none
-      character(len=*), intent(in) :: filename
-      integer(ik), intent(in) :: dtype, dsize, nrec
-      real(rk), allocatable, intent(in out) :: array(:,:)
-    end subroutine read_binary_file_2d
-
-  end interface read_binary_file
-
-end module mod_io
diff --git a/src/mod_kinds.f90 b/src/mod_kinds.f90
deleted file mode 100644
index f39b62c7..00000000
--- a/src/mod_kinds.f90
+++ /dev/null
@@ -1,24 +0,0 @@
-module mod_kinds
-
-  use iso_fortran_env, only: int32, int64, real32, real64, real128
-
-  implicit none
-
-  private
-  public :: ik, rk
-
-#ifdef REAL64
-  integer,parameter :: rk = real64
-#elif REAL128
-  integer,parameter :: rk = real128
-#else
-  integer,parameter :: rk = real32
-#endif
-
-#ifdef INT64
-  integer, parameter :: ik = int64
-#else
-  integer, parameter :: ik = int32
-#endif
-
-end module mod_kinds
diff --git a/src/mod_layer.f90 b/src/mod_layer.f90
deleted file mode 100644
index e7244241..00000000
--- a/src/mod_layer.f90
+++ /dev/null
@@ -1,100 +0,0 @@
-module mod_layer
-
-  !! Defines the layer type and its methods.
-
-  use mod_activation
-  use mod_kinds, only: ik, rk
-
-  implicit none
-
-  private
-  public :: array1d, array2d, db_init, db_co_sum, dw_init, dw_co_sum, layer_type
-
-  type :: layer_type
-    real(rk), allocatable :: a(:) !! activations
-    real(rk), allocatable :: b(:) !! biases
-    real(rk), allocatable :: w(:,:) !! weights
-    real(rk), allocatable :: z(:) !! arg. to activation function
-    procedure(activation_function), pointer, nopass :: activation => null()
-    procedure(activation_function), pointer, nopass :: activation_prime => null()
-    character(len=:), allocatable :: activation_str !! activation character string
-  contains
-    procedure, public, pass(self) :: set_activation
-  end type layer_type
-
-  type :: array1d
-    real(rk), allocatable :: array(:)
-  end type array1d
-
-  type :: array2d
-    real(rk), allocatable :: array(:,:)
-  end type array2d
-
-  interface layer_type
-    module function constructor(this_size, next_size) result(layer)
-      !! Layer class constructor. this_size is the number of neurons in the layer.
-      !! next_size is the number of neurons in the next layer, used to allocate
-      !! the weights.
-      implicit none
-      integer(ik), intent(in) :: this_size, next_size
-      type(layer_type) layer
-    end function constructor
-  end interface layer_type
-
-  interface array1d
-    pure module function array1d_constructor(length) result(a)
-      !! Overloads the default type constructor.
-      implicit none
-      integer(ik), intent(in) :: length
-      type(array1d) :: a
-    end function array1d_constructor  
-  end interface array1d
-
-  interface array2d  
-    pure module function array2d_constructor(dims) result(a)
-      !! Overloads the default type constructor.
-      integer(ik), intent(in) :: dims(2)
-      type(array2d) :: a
-    end function array2d_constructor
-  end interface array2d
-  
-  interface
-
-    pure module subroutine db_init(db, dims)
-      !! Initialises biases structure.
-      implicit none
-      type(array1d), allocatable, intent(in out) :: db(:)
-      integer(ik), intent(in) :: dims(:)
-    end subroutine db_init  
-
-    pure module subroutine dw_init(dw, dims)
-      !! Initialises weights structure.
-      implicit none
-      type(array2d), allocatable, intent(in out) :: dw(:)
-      integer(ik), intent(in) :: dims(:)
-    end subroutine dw_init
-    
-    module subroutine db_co_sum(db)
-      !! Performs a collective sum of bias tendencies.
-      implicit none
-      type(array1d), allocatable, intent(in out) :: db(:)
-    end subroutine db_co_sum
-    
-    module subroutine dw_co_sum(dw)
-      !! Performs a collective sum of weights tendencies.
-      implicit none
-      type(array2d), allocatable, intent(in out) :: dw(:)
-    end subroutine dw_co_sum
-
-    pure elemental module subroutine set_activation(self, activation)
-      !! Sets the activation function. Input string must match one of
-      !! provided activation functions, otherwise it defaults to sigmoid.
-      !! If activation not present, defaults to sigmoid.
-      implicit none
-      class(layer_type), intent(in out) :: self
-      character(len=*), intent(in) :: activation
-    end subroutine set_activation
-  
-  end interface
-
-end module mod_layer
diff --git a/src/mod_layer_submodule.f90 b/src/mod_layer_submodule.f90
deleted file mode 100644
index 514542e0..00000000
--- a/src/mod_layer_submodule.f90
+++ /dev/null
@@ -1,106 +0,0 @@
-submodule(mod_layer) mod_layer_submodule
-  
-  use mod_random, only: randn
-
-  implicit none
-
-contains
-
-  module function constructor(this_size, next_size) result(layer)
-    integer(ik), intent(in) :: this_size, next_size
-    type(layer_type) :: layer
-    allocate(layer % a(this_size))
-    allocate(layer % z(this_size))
-    layer % a = 0
-    layer % z = 0
-    layer % w = randn(this_size, next_size) / this_size
-    layer % b = randn(this_size)
-  end function constructor
-
-  pure module function array1d_constructor(length) result(a)
-    integer(ik), intent(in) :: length
-    type(array1d) :: a
-    allocate(a % array(length))
-    a % array = 0
-  end function array1d_constructor
-  
-  pure module function array2d_constructor(dims) result(a)
-    integer(ik), intent(in) :: dims(2)
-    type(array2d) :: a
-    allocate(a % array(dims(1), dims(2)))
-    a % array = 0
-  end function array2d_constructor
-  
-  pure module subroutine db_init(db, dims)
-    type(array1d), allocatable, intent(in out) :: db(:)
-    integer(ik), intent(in) :: dims(:)
-    integer(ik) :: n, nm
-    nm = size(dims)
-    allocate(db(nm))
-    do n = 1, nm - 1
-      db(n) = array1d(dims(n))
-    end do
-    db(n) = array1d(dims(n))
-  end subroutine db_init
-  
-  pure module subroutine dw_init(dw, dims)
-    type(array2d), allocatable, intent(in out) :: dw(:)
-    integer(ik), intent(in) :: dims(:)
-    integer(ik) :: n, nm
-    nm = size(dims)
-    allocate(dw(nm))
-    do n = 1, nm - 1
-      dw(n) = array2d(dims(n:n+1))
-    end do
-    dw(n) = array2d([dims(n), 1])
-  end subroutine dw_init
-  
-  module subroutine db_co_sum(db)
-    type(array1d), allocatable, intent(in out) :: db(:)
-    integer(ik) :: n
-    do n = 2, size(db)
-      call co_sum(db(n) % array)
-    end do
-  end subroutine db_co_sum
-  
-  module subroutine dw_co_sum(dw)
-    type(array2d), allocatable, intent(in out) :: dw(:)
-    integer(ik) :: n
-    do n = 1, size(dw) - 1
-      call co_sum(dw(n) % array)
-    end do
-  end subroutine dw_co_sum
-  
-  pure elemental module subroutine set_activation(self, activation)
-    class(layer_type), intent(in out) :: self
-    character(len=*), intent(in) :: activation
-    select case(trim(activation))
-      case('gaussian')
-        self % activation => gaussian
-        self % activation_prime => gaussian_prime
-        self % activation_str = 'gaussian'
-      case('relu')
-        self % activation => relu
-        self % activation_prime => relu_prime
-        self % activation_str = 'relu'
-      case('sigmoid')
-        self % activation => sigmoid
-        self % activation_prime => sigmoid_prime
-        self % activation_str = 'sigmoid'
-      case('step')
-        self % activation => step
-        self % activation_prime => step_prime
-        self % activation_str = 'step'
-      case('tanh')
-        self % activation => tanhf
-        self % activation_prime => tanh_prime
-        self % activation_str = 'tanh'
-      case default
-        self % activation => sigmoid
-        self % activation_prime => sigmoid_prime
-        self % activation_str = 'sigmoid'
-    end select
-  end subroutine set_activation
-
-
-end submodule mod_layer_submodule
diff --git a/src/mod_mnist.f90 b/src/mod_mnist.f90
deleted file mode 100644
index e8af54cb..00000000
--- a/src/mod_mnist.f90
+++ /dev/null
@@ -1,43 +0,0 @@
-module mod_mnist
-
-  !! Procedures to work with MNIST dataset, usable with data format
-  !! as provided in this repo and not the original data format (idx).
-
-  use mod_kinds, only: ik, rk
-
-  implicit none
-
-  private
-
-  public :: label_digits, load_mnist, print_image
-
-  interface
-  
-    pure module function label_digits(labels) result(res)
-      !! Converts an array of MNIST labels into a form
-      !! that can be input to the network_type instance.
-      implicit none
-      real(rk), intent(in) :: labels(:)
-      real(rk) :: res(10, size(labels))
-    end function label_digits
-  
-    module subroutine load_mnist(tr_images, tr_labels, te_images,&
-  
-                          te_labels, va_images, va_labels)
-      !! Loads the MNIST dataset into arrays.
-      implicit none
-      real(rk), allocatable, intent(in out) :: tr_images(:,:), tr_labels(:)
-      real(rk), allocatable, intent(in out) :: te_images(:,:), te_labels(:)
-      real(rk), allocatable, intent(in out), optional :: va_images(:,:), va_labels(:)
-    end subroutine load_mnist
-  
-    module subroutine print_image(images, labels, n)
-      !! Prints a single image and label to screen.
-      implicit none
-      real(rk), intent(in) :: images(:,:), labels(:)
-      integer(ik), intent(in) :: n
-    end subroutine print_image
-  
-  end interface
-
-end module mod_mnist
diff --git a/src/mod_mnist_submodule.f90 b/src/mod_mnist_submodule.f90
deleted file mode 100644
index 9812f197..00000000
--- a/src/mod_mnist_submodule.f90
+++ /dev/null
@@ -1,118 +0,0 @@
-submodule(mod_mnist) mod_mnist_submodule
-
-  !! Procedures to work with MNIST dataset, usable with data format
-  !! as provided in this repo and not the original data format (idx).
-
-  ! TODO make MNIST work with arbitrary precision
-
-  use mod_io, only: read_binary_file
-  use mod_kinds, only: ik, rk
-
-  implicit none
-
-  integer, parameter :: message_len = 128
-
-contains
-
-  subroutine download_and_uncompress()
-    character(len=*), parameter :: download_mechanism = 'curl -LO '
-    character(len=*), parameter :: base_url='https://github.com/modern-fortran/neural-fortran/files/8498876/'
-    character(len=*), parameter :: download_filename = 'mnist.tar.gz'
-    character(len=*), parameter :: download_command = download_mechanism // base_url // download_filename
-    character(len=*), parameter :: uncompress_file = 'tar xvzf ' // download_filename
-    character(len=message_len) :: command_message
-    character(len=:), allocatable :: error_message
-    integer :: exit_status, command_status
-
-    exit_status=0
-    call execute_command_line(command=download_command, wait=.true., &
-      exitstat=exit_status, cmdstat=command_status, cmdmsg=command_message)
-
-    if (any([exit_status, command_status] /= 0)) then
-      error_message = 'command "' // download_command // '" failed'
-      if (command_status /= 0) error_message = error_message // " with message " // trim(command_message)
-      error stop error_message
-    end if
-
-    call execute_command_line(command=uncompress_file, wait=.true., &
-      exitstat=exit_status, cmdstat=command_status, cmdmsg=command_message)
-
-    if (any([exit_status, command_status] /= 0)) then
-      error_message = 'command "' // uncompress_file // '" failed'
-      if (command_status /= 0) error_message = error_message // " with message " // trim(command_message)
-      error stop  error_message
-    end if
-
-  end subroutine download_and_uncompress
-
-  pure module function label_digits(labels) result(res)
-    real(rk), intent(in) :: labels(:)
-    real(rk) :: res(10, size(labels))
-    integer(ik) :: i
-    do i = 1, size(labels)
-      res(:,i) = digits(labels(i))
-    end do
-  contains
-    pure function digits(x)
-      !! Returns an array of 10 reals, with zeros everywhere
-      !! and a one corresponding to the input number, for example:
-      !!   digits(0) = [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]
-      !!   digits(1) = [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]
-      !!   digits(6) = [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]
-      real(rk), intent(in) :: x
-      real(rk) :: digits(10)
-      digits = 0
-      digits(int(x + 1)) = 1
-    end function digits
-  end function label_digits
-
-  module subroutine load_mnist(tr_images, tr_labels, te_images,&
-                        te_labels, va_images, va_labels)
-    real(rk), allocatable, intent(in out) :: tr_images(:,:), tr_labels(:)
-    real(rk), allocatable, intent(in out) :: te_images(:,:), te_labels(:)
-    real(rk), allocatable, intent(in out), optional :: va_images(:,:), va_labels(:)
-    integer(ik), parameter :: dtype = 4, image_size = 784
-    integer(ik), parameter :: tr_nimages = 50000
-    integer(ik), parameter :: te_nimages = 10000
-    integer(ik), parameter :: va_nimages = 10000
-    logical :: file_exists
-
-    ! Check if MNIST data is present and download it if not.
-    inquire(file='mnist_training_images.dat', exist=file_exists)
-    if (.not. file_exists) call download_and_uncompress()
-
-    call read_binary_file('mnist_training_images.dat',&
-                          dtype, image_size, tr_nimages, tr_images)
-    call read_binary_file('mnist_training_labels.dat',&
-                          dtype, tr_nimages, tr_labels)
-
-    call read_binary_file('mnist_testing_images.dat',&
-                          dtype, image_size, te_nimages, te_images)
-    call read_binary_file('mnist_testing_labels.dat',&
-                          dtype, te_nimages, te_labels)
-
-    if (present(va_images) .and. present(va_labels)) then
-      call read_binary_file('mnist_validation_images.dat',&
-                            dtype, image_size, va_nimages, va_images)
-      call read_binary_file('mnist_validation_labels.dat',&
-                            dtype, va_nimages, va_labels)
-    end if
-
-  end subroutine load_mnist
-
-  module subroutine print_image(images, labels, n)
-    real(rk), intent(in) :: images(:,:), labels(:)
-    integer(ik), intent(in) :: n
-    real(rk) :: image(28, 28)
-    character(len=1) :: char_image(28, 28)
-    integer(ik) i, j
-    image = reshape(images(:,n), [28, 28])
-    char_image = '.'
-    where (image > 0) char_image = '#'
-    print *, labels(n)
-    do j = 1, 28
-      print *, char_image(:,j)
-    end do
-  end subroutine print_image
-
-end submodule mod_mnist_submodule
diff --git a/src/mod_network.f90 b/src/mod_network.f90
deleted file mode 100644
index 7e8d777a..00000000
--- a/src/mod_network.f90
+++ /dev/null
@@ -1,204 +0,0 @@
-module mod_network
-
-  use mod_kinds, only: ik, rk
-  use mod_layer, only: array1d, array2d, layer_type
-
-  implicit none
-
-  private
-  public :: network_type
-
-  type :: network_type
-
-    type(layer_type), allocatable :: layers(:)
-    integer(ik), allocatable :: dims(:)
-
-  contains
-
-    procedure, public, pass(self) :: accuracy
-    procedure, public, pass(self) :: backprop
-    procedure, public, pass(self) :: fwdprop
-    procedure, public, pass(self) :: init
-    procedure, public, pass(self) :: load
-    procedure, public, pass(self) :: loss
-    procedure, public, pass(self) :: output_batch
-    procedure, public, pass(self) :: output_single
-    procedure, public, pass(self) :: save
-    procedure, public, pass(self) :: set_activation_equal
-    procedure, public, pass(self) :: set_activation_layers
-    procedure, public, pass(self) :: sync
-    procedure, public, pass(self) :: train_batch
-    procedure, public, pass(self) :: train_epochs
-    procedure, public, pass(self) :: train_single
-    procedure, public, pass(self) :: update
-
-    generic, public :: output => output_batch, output_single
-    generic, public :: set_activation => set_activation_equal, set_activation_layers
-    generic, public :: train => train_batch, train_epochs, train_single
-
-  end type network_type
-
-  interface network_type
-    
-    module function net_constructor(dims, activation) result(net)
-      !! Network class constructor. Size of input array dims indicates the total
-      !! number of layers (input + hidden + output), and the value of its elements
-      !! corresponds the size of each layer.
-      implicit none
-      integer(ik), intent(in) :: dims(:)
-      character(len=*), intent(in), optional :: activation
-      type(network_type) :: net
-    end function net_constructor
-          
-  end interface network_type
-
-  interface
-
-    pure real(rk) module function accuracy(self, x, y)
-      !! Given input x and output y, evaluates the position of the
-      !! maximum value of the output and returns the number of matches
-      !! relative to the size of the dataset.
-      implicit none
-      class(network_type), intent(in) :: self
-      real(rk), intent(in) :: x(:,:), y(:,:)
-    end function accuracy
-
-    pure module subroutine backprop(self, y, dw, db)
-      !! Applies a backward propagation through the network
-      !! and returns the weight and bias gradients.
-      implicit none
-      class(network_type), intent(in out) :: self
-      real(rk), intent(in) :: y(:)
-      type(array2d), allocatable, intent(out) :: dw(:)
-      type(array1d), allocatable, intent(out) :: db(:)
-    end subroutine backprop
-
-
-    pure module subroutine fwdprop(self, x)
-      !! Performs the forward propagation and stores arguments to activation
-      !! functions and activations themselves for use in backprop.
-      implicit none
-      class(network_type), intent(in out) :: self
-      real(rk), intent(in) :: x(:)
-    end subroutine fwdprop
-
-    module subroutine init(self, dims)
-      !! Allocates and initializes the layers with given dimensions dims.
-      implicit none
-      class(network_type), intent(in out) :: self
-      integer(ik), intent(in) :: dims(:)
-    end subroutine init
-
-
-    module subroutine load(self, filename)
-      !! Loads the network from file.
-      implicit none
-      class(network_type), intent(in out) :: self
-      character(len=*), intent(in) :: filename
-    end subroutine load
-
-
-    pure module real(rk) function loss(self, x, y)
-      !! Given input x and expected output y, returns the loss of the network.
-      implicit none
-      class(network_type), intent(in) :: self
-      real(rk), intent(in) :: x(:), y(:)
-    end function loss
-
-
-    pure module function output_single(self, x) result(a)
-      !! Use forward propagation to compute the output of the network.
-      !! This specific procedure is for a single sample of 1-d input data.
-      implicit none
-      class(network_type), intent(in) :: self
-      real(rk), intent(in) :: x(:)
-      real(rk), allocatable :: a(:)
-    end function output_single
-
-
-    pure module function output_batch(self, x) result(a)
-      !! Use forward propagation to compute the output of the network.
-      !! This specific procedure is for a batch of 1-d input data.
-      implicit none
-      class(network_type), intent(in) :: self
-      real(rk), intent(in) :: x(:,:)
-      real(rk), allocatable :: a(:,:)
-    end function output_batch
-
-    module subroutine save(self, filename)
-      !! Saves the network to a file.
-      implicit none
-      class(network_type), intent(in out) :: self
-      character(len=*), intent(in) :: filename
-    end subroutine save
-
-
-    pure module subroutine set_activation_equal(self, activation)
-      !! A thin wrapper around layer % set_activation().
-      !! This method can be used to set an activation function
-      !! for all layers at once.
-      implicit none
-      class(network_type), intent(in out) :: self
-      character(len=*), intent(in) :: activation
-    end subroutine set_activation_equal
-
-
-    pure module subroutine set_activation_layers(self, activation)
-      !! A thin wrapper around layer % set_activation().
-      !! This method can be used to set different activation functions
-      !! for each layer separately.
-      implicit none
-      class(network_type), intent(in out) :: self
-      character(len=*), intent(in) :: activation(size(self % layers))
-    end subroutine set_activation_layers
-
-    module subroutine sync(self, image)
-      !! Broadcasts network weights and biases from
-      !! specified image to all others.
-      implicit none
-      class(network_type), intent(in out) :: self
-      integer(ik), intent(in) :: image
-    end subroutine sync
-
-
-    module subroutine train_batch(self, x, y, eta)
-      !! Trains a network using input data x and output data y,
-      !! and learning rate eta. The learning rate is normalized
-      !! with the size of the data batch.
-      implicit none
-      class(network_type), intent(in out) :: self
-      real(rk), intent(in) :: x(:,:), y(:,:), eta
-    end subroutine train_batch
-
-
-    module subroutine train_epochs(self, x, y, eta, num_epochs, batch_size)
-      !! Trains for num_epochs epochs with mini-bachtes of size equal to batch_size.
-      implicit none
-      class(network_type), intent(in out) :: self
-      integer(ik), intent(in) :: num_epochs, batch_size
-      real(rk), intent(in) :: x(:,:), y(:,:), eta
-    end subroutine train_epochs
-
-
-    pure module subroutine train_single(self, x, y, eta)
-      !! Trains a network using a single set of input data x and output data y,
-      !! and learning rate eta.
-      implicit none
-      class(network_type), intent(in out) :: self
-      real(rk), intent(in) :: x(:), y(:), eta
-    end subroutine train_single
-
-
-    pure module subroutine update(self, dw, db, eta)
-      !! Updates network weights and biases with gradients dw and db,
-      !! scaled by learning rate eta.
-      implicit none
-      class(network_type), intent(in out) :: self
-      class(array2d), intent(in) :: dw(:)
-      class(array1d), intent(in) :: db(:)
-      real(rk), intent(in) :: eta
-    end subroutine update
-
-  end interface
-
-end module mod_network
diff --git a/src/mod_network_submodule.f90 b/src/mod_network_submodule.f90
deleted file mode 100644
index 30757661..00000000
--- a/src/mod_network_submodule.f90
+++ /dev/null
@@ -1,296 +0,0 @@
-submodule(mod_network) mod_network_submodule
-
-  use mod_kinds, only: ik, rk
-  use mod_layer, only: db_init, dw_init, db_co_sum, dw_co_sum
-  use mod_parallel, only: tile_indices
-
-  implicit none
-
-contains
-
-  module function net_constructor(dims, activation) result(net)
-    integer(ik), intent(in) :: dims(:)
-    character(len=*), intent(in), optional :: activation
-    type(network_type) :: net
-    call net % init(dims)
-    if (present(activation)) then
-      call net % set_activation(activation)
-    else
-      call net % set_activation('sigmoid')
-    end if
-    call net % sync(1)
-  end function net_constructor
-
-  pure real(rk) module function accuracy(self, x, y)
-    class(network_type), intent(in) :: self
-    real(rk), intent(in) :: x(:,:), y(:,:)
-    integer(ik) :: i, good
-    good = 0
-    do i = 1, size(x, dim=2)
-      if (all(maxloc(self % output(x(:,i))) == maxloc(y(:,i)))) then
-        good = good + 1
-      end if
-    end do
-    accuracy = real(good, kind=rk) / size(x, dim=2)
-  end function accuracy
-
-
-  pure module subroutine backprop(self, y, dw, db)
-    class(network_type), intent(in out) :: self
-    real(rk), intent(in) :: y(:)
-    type(array2d), allocatable, intent(out) :: dw(:)
-    type(array1d), allocatable, intent(out) :: db(:)
-    integer(ik) :: n, nm
-
-    associate(dims => self % dims, layers => self % layers)
-
-      call db_init(db, dims)
-      call dw_init(dw, dims)
-
-      n = size(dims)
-      db(n) % array = (layers(n) % a - y) * self % layers(n) % activation_prime(layers(n) % z)
-      dw(n-1) % array = matmul(reshape(layers(n-1) % a, [dims(n-1), 1]),&
-                               reshape(db(n) % array, [1, dims(n)]))
-
-      do n = size(dims) - 1, 2, -1
-        db(n) % array = matmul(layers(n) % w, db(n+1) % array)&
-                      * self % layers(n) % activation_prime(layers(n) % z)
-        dw(n-1) % array = matmul(reshape(layers(n-1) % a, [dims(n-1), 1]),&
-                                 reshape(db(n) % array, [1, dims(n)]))
-      end do
-
-    end associate
-
-  end subroutine backprop
-
-
-  pure module subroutine fwdprop(self, x)
-    class(network_type), intent(in out) :: self
-    real(rk), intent(in) :: x(:)
-    integer(ik) :: n
-    associate(layers => self % layers)
-      layers(1) % a = x
-      do n = 2, size(layers)
-        layers(n) % z = matmul(transpose(layers(n-1) % w), layers(n-1) % a) + layers(n) % b
-        layers(n) % a = self % layers(n) % activation(layers(n) % z)
-      end do
-    end associate
-  end subroutine fwdprop
-
-
-  module subroutine init(self, dims)
-    class(network_type), intent(in out) :: self
-    integer(ik), intent(in) :: dims(:)
-    integer(ik) :: n
-    self % dims = dims
-    if (.not. allocated(self % layers)) allocate(self % layers(size(dims)))
-    do n = 1, size(dims) - 1
-      self % layers(n) = layer_type(dims(n), dims(n+1))
-    end do
-    self % layers(n) = layer_type(dims(n), 1)
-    self % layers(1) % b = 0
-    self % layers(size(dims)) % w = 0
-  end subroutine init
-
-
-  module subroutine load(self, filename)
-    class(network_type), intent(in out) :: self
-    character(len=*), intent(in) :: filename
-    integer(ik) :: fileunit, n, num_layers, layer_idx
-    integer(ik), allocatable :: dims(:)
-    character(len=100) :: buffer !! activation string
-    open(newunit=fileunit, file=filename, status='old', action='read')
-    read(fileunit, *) num_layers
-    allocate(dims(num_layers))
-    read(fileunit, *) dims
-    call self % init(dims)
-    do n = 1, num_layers
-      read(fileunit, *) layer_idx, buffer
-      call self % layers(layer_idx) % set_activation(trim(buffer))
-    end do
-    do n = 2, size(self % dims)
-      read(fileunit, *) self % layers(n) % b
-    end do
-    do n = 1, size(self % dims) - 1
-      read(fileunit, *) self % layers(n) % w
-    end do
-    close(fileunit)
-  end subroutine load
-
-
-  pure real(rk) module function loss(self, x, y)
-    class(network_type), intent(in) :: self
-    real(rk), intent(in) :: x(:), y(:)
-    loss = 0.5 * sum((y - self % output(x))**2) / size(x)
-  end function loss
-
-
-  pure module function output_single(self, x) result(a)
-    class(network_type), intent(in) :: self
-    real(rk), intent(in) :: x(:)
-    real(rk), allocatable :: a(:)
-    integer(ik) :: n
-    associate(layers => self % layers)
-      a = self % layers(2) % activation(matmul(transpose(layers(1) % w), x) + layers(2) % b)
-      do n = 3, size(layers)
-        a = self % layers(n) % activation(matmul(transpose(layers(n-1) % w), a) + layers(n) % b)
-      end do
-    end associate
-  end function output_single
-
-
-  pure module function output_batch(self, x) result(a)
-    class(network_type), intent(in) :: self
-    real(rk), intent(in) :: x(:,:)
-    real(rk), allocatable :: a(:,:)
-    integer(ik) :: i
-    allocate(a(self % dims(size(self % dims)), size(x, dim=2)))
-    do i = 1, size(x, dim=2)
-     a(:,i) = self % output_single(x(:,i))
-    end do
-  end function output_batch
-
-
-  module subroutine save(self, filename)
-    class(network_type), intent(in out) :: self
-    character(len=*), intent(in) :: filename
-    integer(ik) :: fileunit, n
-    open(newunit=fileunit, file=filename)
-    write(fileunit, fmt=*) size(self % dims)
-    write(fileunit, fmt=*) self % dims
-    do n = 1, size(self % dims)
-      write(fileunit, fmt=*) n, self % layers(n) % activation_str
-    end do
-    do n = 2, size(self % dims)
-      write(fileunit, fmt=*) self % layers(n) % b
-    end do
-    do n = 1, size(self % dims) - 1
-      write(fileunit, fmt=*) self % layers(n) % w
-    end do
-    close(fileunit)
-  end subroutine save
-
-
-  pure module subroutine set_activation_equal(self, activation)
-    class(network_type), intent(in out) :: self
-    character(len=*), intent(in) :: activation
-    call self % layers(:) % set_activation(activation)
-  end subroutine set_activation_equal
-
-
-  pure module subroutine set_activation_layers(self, activation)
-    class(network_type), intent(in out) :: self
-    character(len=*), intent(in) :: activation(size(self % layers))
-    call self % layers(:) % set_activation(activation)
-  end subroutine set_activation_layers
-
-  module subroutine sync(self, image)
-    class(network_type), intent(in out) :: self
-    integer(ik), intent(in) :: image
-    integer(ik) :: n
-    if (num_images() == 1) return
-    layers: do n = 1, size(self % dims)
-      call co_broadcast(self % layers(n) % b, image)
-      call co_broadcast(self % layers(n) % w, image)
-    end do layers
-  end subroutine sync
-
-  module subroutine train_batch(self, x, y, eta)
-    class(network_type), intent(in out) :: self
-    real(rk), intent(in) :: x(:,:), y(:,:), eta
-    type(array1d), allocatable :: db(:), db_batch(:)
-    type(array2d), allocatable :: dw(:), dw_batch(:)
-    integer(ik) :: i, im, n, nm
-    integer(ik) :: is, ie, indices(2)
-
-    im = size(x, dim=2) ! mini-batch size
-    nm = size(self % dims) ! number of layers
-
-    ! get start and end index for mini-batch
-    indices = tile_indices(im)
-    is = indices(1)
-    ie = indices(2)
-
-    call db_init(db_batch, self % dims)
-    call dw_init(dw_batch, self % dims)
-
-    do concurrent(i = is:ie)
-      call self % fwdprop(x(:,i))
-      call self % backprop(y(:,i), dw, db)
-      do concurrent(n = 1:nm)
-        dw_batch(n) % array =  dw_batch(n) % array + dw(n) % array
-        db_batch(n) % array =  db_batch(n) % array + db(n) % array
-      end do
-    end do
-
-    if (num_images() > 1) then
-      call dw_co_sum(dw_batch)
-      call db_co_sum(db_batch)
-    end if
-
-    call self % update(dw_batch, db_batch, eta / im)
-
-  end subroutine train_batch
-
-  module subroutine train_epochs(self, x, y, eta, num_epochs, batch_size)
-    class(network_type), intent(in out) :: self
-    integer(ik), intent(in) :: num_epochs, batch_size
-    real(rk), intent(in) :: x(:,:), y(:,:), eta
-
-    integer(ik) :: i, n, nsamples, nbatch
-    integer(ik) :: batch_start, batch_end
-
-    real(rk) :: pos
-
-    nsamples = size(y, dim=2)
-    nbatch = nsamples / batch_size
-
-    epochs: do n = 1, num_epochs
-      batches: do i = 1, nbatch
-      
-        !pull a random mini-batch from the dataset  
-        call random_number(pos)
-        batch_start = int(pos * (nsamples - batch_size + 1))
-        if (batch_start == 0) batch_start = 1
-        batch_end = batch_start + batch_size - 1
-   
-        call self % train(x(:,batch_start:batch_end), y(:,batch_start:batch_end), eta)
-       
-      end do batches
-    end do epochs
-
-  end subroutine train_epochs
-
-
-  pure module subroutine train_single(self, x, y, eta)
-    class(network_type), intent(in out) :: self
-    real(rk), intent(in) :: x(:), y(:), eta
-    type(array2d), allocatable :: dw(:)
-    type(array1d), allocatable :: db(:)
-    call self % fwdprop(x)
-    call self % backprop(y, dw, db)
-    call self % update(dw, db, eta)
-  end subroutine train_single
-
-
-  pure module subroutine update(self, dw, db, eta)
-    class(network_type), intent(in out) :: self
-    class(array2d), intent(in) :: dw(:)
-    class(array1d), intent(in) :: db(:)
-    real(rk), intent(in) :: eta
-    integer(ik) :: n
-
-    associate(layers => self % layers, nm => size(self % dims))
-      ! update biases
-      do concurrent(n = 2:nm)
-        layers(n) % b = layers(n) % b - eta * db(n) % array
-      end do
-      ! update weights
-      do concurrent(n = 1:nm-1)
-        layers(n) % w = layers(n) % w - eta * dw(n) % array
-      end do
-    end associate
-
-  end subroutine update
-
-end submodule mod_network_submodule
diff --git a/src/mod_random_submodule.f90 b/src/mod_random_submodule.f90
deleted file mode 100644
index c75e4e7c..00000000
--- a/src/mod_random_submodule.f90
+++ /dev/null
@@ -1,24 +0,0 @@
-submodule(mod_random) mod_random_submodule
-  implicit none
-
-  real(rk), parameter :: pi = 4 * atan(1._rk)
-
-contains
-
-  module function randn1d(n) result(r)
-    integer(ik), intent(in) :: n
-    real(rk) :: r(n), r2(n)
-    call random_number(r)
-    call random_number(r2)
-    r = sqrt(-2 * log(r)) * cos(2 * pi * r2)
-  end function randn1d
-
-  module function randn2d(m, n) result(r)
-    integer(ik), intent(in) :: m, n
-    real(rk) :: r(m, n), r2(m, n)
-    call random_number(r)
-    call random_number(r2)
-    r = sqrt(-2 * log(r)) * cos(2 * pi * r2)
-  end function randn2d
-
-end submodule mod_random_submodule
diff --git a/src/nf.f90 b/src/nf.f90
new file mode 100644
index 00000000..ca71a2c5
--- /dev/null
+++ b/src/nf.f90
@@ -0,0 +1,6 @@
+module nf
+  use nf_datasets_mnist, only: label_digits, load_mnist
+  use nf_layer, only: layer
+  use nf_layer_constructors, only: dense, input
+  use nf_network, only: network
+end module nf
diff --git a/src/nf_activation.f90 b/src/nf_activation.f90
new file mode 100644
index 00000000..50f9362c
--- /dev/null
+++ b/src/nf_activation.f90
@@ -0,0 +1,155 @@
+module nf_activation
+
+  ! A collection of activation functions and their derivatives.
+
+  implicit none
+
+  private
+
+  public :: activation_function
+  public :: elu, elu_prime
+  public :: exponential
+  public :: gaussian, gaussian_prime
+  public :: relu, relu_prime
+  public :: sigmoid, sigmoid_prime
+  public :: softplus, softplus_prime
+  public :: step, step_prime
+  public :: tanhf, tanh_prime
+
+  interface
+    pure function activation_function(x)
+      real, intent(in) :: x(:)
+      real :: activation_function(size(x))
+    end function activation_function
+  end interface
+
+contains
+
+  pure function elu(x, alpha) result(res)
+    ! Exponential Linear Unit (ELU) activation function.
+    real, intent(in) :: x(:)
+    real, intent(in) :: alpha
+    real :: res(size(x))
+    where (x >= 0)
+      res = x
+    elsewhere
+      res = alpha * (exp(x) - 1)
+    end where
+  end function elu
+
+  pure function elu_prime(x, alpha) result(res)
+    ! First derivative of the Exponential Linear Unit (ELU)
+    ! activation function.
+    real, intent(in) :: x(:)
+    real, intent(in) :: alpha
+    real :: res(size(x))
+    where (x >= 0)
+      res = 1
+    elsewhere
+      res = alpha * exp(x)
+    end where
+  end function elu_prime
+
+  pure function exponential(x) result(res)
+    ! Exponential activation function.
+    real, intent(in) :: x(:)
+    real :: res(size(x))
+    res = exp(x)
+  end function exponential
+
+  pure function gaussian(x) result(res)
+    ! Gaussian activation function.
+    real, intent(in) :: x(:)
+    real :: res(size(x))
+    res = exp(-x**2)
+  end function gaussian
+
+  pure function gaussian_prime(x) result(res)
+    ! First derivative of the Gaussian activation function.
+    real, intent(in) :: x(:)
+    real :: res(size(x))
+    res = -2 * x * gaussian(x)
+  end function gaussian_prime
+
+  pure function relu(x) result(res)
+    !! Rectified Linear Unit (ReLU) activation function.
+    real, intent(in) :: x(:)
+    real :: res(size(x))
+    res = max(0., x)
+  end function relu
+
+  pure function relu_prime(x) result(res)
+    ! First derivative of the Rectified Linear Unit (ReLU) activation function.
+    real, intent(in) :: x(:)
+    real :: res(size(x))
+    where (x > 0)
+      res = 1
+    elsewhere
+      res = 0
+    end where
+  end function relu_prime
+
+  pure function sigmoid(x) result(res)
+    ! Sigmoid activation function.
+    real, intent(in) :: x(:)
+    real :: res(size(x))
+    res = 1 / (1 + exp(-x))
+  endfunction sigmoid
+
+  pure function sigmoid_prime(x) result(res)
+    ! First derivative of the sigmoid activation function.
+    real, intent(in) :: x(:)
+    real :: res(size(x))
+    res = sigmoid(x) * (1 - sigmoid(x))
+  end function sigmoid_prime
+
+  pure function softplus(x) result(res)
+    ! Softplus activation function.
+    real, intent(in) :: x(:)
+    real :: res(size(x))
+    res = log(exp(x) + 1)
+  end function softplus
+
+  pure function softplus_prime(x) result(res)
+    ! First derivative of the softplus activation function.
+    real, intent(in) :: x(:)
+    real :: res(size(x))
+    res = exp(x) / (exp(x) + 1)
+  end function softplus_prime
+
+  pure function step(x) result(res)
+    ! Step activation function.
+    real, intent(in) :: x(:)
+    real :: res(size(x))
+    where (x > 0)
+      res = 1
+    elsewhere
+      res = 0
+    end where
+  end function step
+
+  pure function step_prime(x) result(res)
+    ! First derivative of the step activation function.
+    real, intent(in) :: x(:)
+    real :: res(size(x))
+    res = 0
+  end function step_prime
+
+  pure function tanhf(x) result(res)
+    ! Tangent hyperbolic activation function. 
+    ! Same as the intrinsic tanh, but must be 
+    ! defined here so that we can use procedure
+    ! pointer with it.
+    real, intent(in) :: x(:)
+    real :: res(size(x))
+    res = tanh(x)
+  end function tanhf
+
+  pure function tanh_prime(x) result(res)
+    ! First derivative of the tanh activation function.
+    real, intent(in) :: x(:)
+    real :: res(size(x))
+    res = 1 - tanh(x)**2
+  end function tanh_prime
+
+end module nf_activation
diff --git a/src/nf_base_layer.f90 b/src/nf_base_layer.f90
new file mode 100644
index 00000000..9a5ae1d9
--- /dev/null
+++ b/src/nf_base_layer.f90
@@ -0,0 +1,53 @@
+module nf_base_layer
+
+  !! This module provides the abstract base layer type, to be extended by
+  !! specific concrete types.
+
+  use nf_activation, only: activation_function
+
+  implicit none
+
+  private
+  public :: base_layer
+
+  type, abstract :: base_layer
+
+    !! This type is the base for creating concrete layer instances.
+    !! Extend this type when creating other concrete layer types.
+
+    character(:), allocatable :: activation_name
+    procedure(activation_function), pointer, nopass :: &
+      activation => null()
+    procedure(activation_function), pointer, nopass :: &
+      activation_prime => null()
+
+  contains
+
+    procedure(init_interface), deferred :: init
+    procedure :: set_activation
+
+  end type base_layer
+
+  abstract interface
+    subroutine init_interface(self, input_shape)
+      !! Initialize the internal layer data structures.
+      import :: base_layer
+      class(base_layer), intent(in out) :: self
+        !! Layer instance
+      integer, intent(in) :: input_shape(:)
+        !! Shape of the input layer, i.e. the layer that precedes
+        !! this layer
+    end subroutine init_interface
+  end interface
+
+  interface
+    elemental module subroutine set_activation(self, activation)
+      !! Set the activation functions.
+      class(base_layer), intent(in out) :: self
+        !! Layer instance
+      character(*), intent(in) :: activation
+        !! String with the activation function name
+    end subroutine set_activation
+  end interface
+
+end module nf_base_layer
diff --git a/src/nf_base_layer_submodule.f90 b/src/nf_base_layer_submodule.f90
new file mode 100644
index 00000000..99f1532c
--- /dev/null
+++ b/src/nf_base_layer_submodule.f90
@@ -0,0 +1,73 @@
+submodule(nf_base_layer) nf_base_layer_submodule
+
+  use nf_activation, only: activation_function, &
+                           elu, elu_prime, &
+                           exponential, &
+                           gaussian, gaussian_prime, &
+                           relu, relu_prime, &
+                           sigmoid, sigmoid_prime, &
+                           softplus, softplus_prime, &
+                           step, step_prime, &
+                           tanhf, tanh_prime
+
+  implicit none
+
+contains
+  
+  elemental module subroutine set_activation(self, activation)
+    class(base_layer), intent(in out) :: self
+    character(*), intent(in) :: activation
+
+    select case(trim(activation))
+
+      ! TODO need to figure out how to handle the alpha param
+      !case('elu')
+      !  self % activation => elu
+      !  self % activation_prime => elu_prime
+      !  self % activation_name = 'elu'
+
+      case('exponential')
+        self % activation => exponential
+        self % activation_prime => exponential
+        self % activation_name = 'exponential'
+
+      case('gaussian')
+        self % activation => gaussian
+        self % activation_prime => gaussian_prime
+        self % activation_name = 'gaussian'
+
+      case('relu')
+        self % activation => relu
+        self % activation_prime => relu_prime
+        self % activation_name = 'relu'
+
+      case('sigmoid')
+        self % activation => sigmoid
+        self % activation_prime => sigmoid_prime
+        self % activation_name = 'sigmoid'
+
+      case('softplus')
+        self % activation => softplus
+        self % activation_prime => softplus_prime
+        self % activation_name = 'softplus'
+
+      case('step')
+        self % activation => step
+        self % activation_prime => step_prime
+        self % activation_name = 'step'
+
+      case('tanh')
+        self % activation => tanhf
+        self % activation_prime => tanh_prime
+        self % activation_name = 'tanh'
+
+      case default
+        error stop 'Activation must be one of: ' // &
+          '"elu", "exponential", "gaussian", "relu", ' // &
+          '"sigmoid", "softplus", "step", or "tanh".'
+
+    end select
+
+  end subroutine set_activation
+
+end submodule nf_base_layer_submodule
diff --git a/src/nf_conv2d_layer.f90 b/src/nf_conv2d_layer.f90
new file mode 100644
index 00000000..455a811a
--- /dev/null
+++ b/src/nf_conv2d_layer.f90
@@ -0,0 +1,84 @@
+module nf_conv2d_layer
+
+  !! This is a placeholder module that will later define a concrete conv2d
+  !! layer type.
+
+  use nf_base_layer, only: base_layer
+  implicit none
+
+  private
+  public :: conv2d_layer
+
+  type, extends(base_layer) :: conv2d_layer
+
+    integer :: width
+    integer :: height
+    integer :: channels
+    integer :: window_size
+    integer :: filters
+
+    real, allocatable :: biases(:) ! as many as there are filters
+    real, allocatable :: kernel(:,:,:,:)
+    real, allocatable :: output(:,:,:)
+
+  contains
+
+    procedure :: init
+    procedure :: forward
+    procedure :: backward
+
+  end type conv2d_layer
+
+  interface conv2d_layer
+    module procedure :: conv2d_layer_cons
+  end interface conv2d_layer
+
+contains
+
+  pure function conv2d_layer_cons(window_size, filters, activation) result(res)
+    integer, intent(in) :: window_size
+    integer, intent(in) :: filters
+    character(*), intent(in) :: activation
+    type(conv2d_layer) :: res
+    res % window_size = window_size
+    res % filters = filters
+    call res % set_activation(activation)
+  end function conv2d_layer_cons
+
+
+  subroutine init(self, input_shape)
+    class(conv2d_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    self % width = input_shape(1) - self % window_size + 1
+    self % height = input_shape(2) - self % window_size + 1
+    self % channels = input_shape(3)
+
+    allocate(self % output(self % width, self % height, self % filters))
+    self % output = 0
+
+    allocate(self % kernel(self % window_size, self % window_size, &
+                           self % channels, self % filters))
+    self % kernel = 0 ! TODO 4-d randn
+
+    allocate(self % biases(self % filters))
+    self % biases = 0
+
+  end subroutine init
+
+
+  subroutine forward(self, input)
+    class(conv2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:,:,:)
+    print *, 'Warning: conv2d forward pass not implemented'
+  end subroutine forward
+
+
+  subroutine backward(self, input, gradient)
+    class(conv2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:,:,:)
+    real, intent(in) :: gradient(:,:,:)
+    print *, 'Warning: conv2d backward pass not implemented'
+  end subroutine backward
+
+end module nf_conv2d_layer
diff --git a/src/nf_datasets_mnist.f90 b/src/nf_datasets_mnist.f90
new file mode 100644
index 00000000..574679fa
--- /dev/null
+++ b/src/nf_datasets_mnist.f90
@@ -0,0 +1,47 @@
+module nf_datasets_mnist
+
+  !! Procedures to work with MNIST dataset, usable with data format
+  !! as provided in this repo and not the original data format (idx).
+
+  implicit none
+
+  private
+  public :: label_digits, load_mnist, print_image
+
+  interface
+  
+    pure module function label_digits(labels) result(res)
+      !! Converts an array of individual MNIST labels (e.g. 3)
+      !! into a form that can be used to evaluate against dense layer output,
+      !! e.g. [0, 0, 0, 1, 0, 0, 0, 0, 0].
+      implicit none
+      real, intent(in) :: labels(:)
+        !! Array of labels with single digit values in the range 0-9
+      real :: res(10, size(labels))
+        !! 10-element array of zeros and a single one indicating the digit
+    end function label_digits
+  
+    module subroutine load_mnist(training_images, training_labels, &
+                                 validation_images, validation_labels, &
+                                 testing_images, testing_labels)
+      !! Loads the MNIST dataset into arrays.
+      implicit none
+      real, allocatable, intent(in out) :: training_images(:,:)
+      real, allocatable, intent(in out) :: training_labels(:)
+      real, allocatable, intent(in out) :: validation_images(:,:)
+      real, allocatable, intent(in out) :: validation_labels(:)
+      real, allocatable, intent(in out), optional :: testing_images(:,:)
+      real, allocatable, intent(in out), optional :: testing_labels(:)
+    end subroutine load_mnist
+  
+    module subroutine print_image(images, labels, n)
+      !! Print a single image and label to the screen.
+      implicit none
+      real, intent(in) :: images(:,:)
+      real, intent(in) :: labels(:)
+      integer, intent(in) :: n
+    end subroutine print_image
+  
+  end interface
+
+end module nf_datasets_mnist
diff --git a/src/nf_datasets_mnist_submodule.f90 b/src/nf_datasets_mnist_submodule.f90
new file mode 100644
index 00000000..e1ff0dfa
--- /dev/null
+++ b/src/nf_datasets_mnist_submodule.f90
@@ -0,0 +1,132 @@
+submodule(nf_datasets_mnist) nf_datasets_mnist_submodule
+
+  use nf_io, only: read_binary_file
+
+  implicit none
+
+  integer, parameter :: message_len = 128
+
+contains
+
+  subroutine download_and_uncompress()
+    character(*), parameter :: download_mechanism = 'curl -LO '
+    character(*), parameter :: base_url='https://github.com/modern-fortran/neural-fortran/files/8498876/'
+    character(*), parameter :: download_filename = 'mnist.tar.gz'
+    character(*), parameter :: download_command = download_mechanism // base_url // download_filename
+    character(*), parameter :: uncompress_file = 'tar xvzf ' // download_filename
+    character(message_len) :: command_message
+    character(:), allocatable :: error_message
+    integer :: exit_status, command_status
+
+    exit_status=0
+    call execute_command_line(command=download_command, wait=.true., &
+      exitstat=exit_status, cmdstat=command_status, cmdmsg=command_message)
+
+    if (any([exit_status, command_status] /= 0)) then
+      error_message = 'command "' // download_command // '" failed'
+      if (command_status /= 0) error_message = error_message // " with message " // trim(command_message)
+      error stop error_message
+    end if
+
+    call execute_command_line(command=uncompress_file, wait=.true., &
+      exitstat=exit_status, cmdstat=command_status, cmdmsg=command_message)
+
+    if (any([exit_status, command_status] /= 0)) then
+      error_message = 'command "' // uncompress_file // '" failed'
+      if (command_status /= 0) error_message = error_message // " with message " // trim(command_message)
+      error stop  error_message
+    end if
+
+  end subroutine download_and_uncompress
+
+
+  pure module function label_digits(labels) result(res)
+    real, intent(in) :: labels(:)
+    real :: res(10, size(labels))
+    integer :: i
+    do i = 1, size(labels)
+      res(:,i) = digits(labels(i))
+    end do
+  contains
+    pure function digits(x)
+      !! Returns an array of 10 reals, with zeros everywhere
+      !! and a one corresponding to the input digit.
+      !!
+      !! Example
+      !!
+      !! ```
+      !! digits(0) = [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]
+      !! digits(1) = [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]
+      !! digits(6) = [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]
+      !! ```
+      real, intent(in) :: x
+        !! Input digit (0-9)
+      real :: digits(10)
+        !! 10-element array of zeros with a single one
+        !! indicating the input digit
+      digits = 0
+      digits(int(x + 1)) = 1
+    end function digits
+  end function label_digits
+
+
+  module subroutine load_mnist(training_images, training_labels, &
+                               validation_images, validation_labels, &
+                               testing_images, testing_labels)
+    real, allocatable, intent(in out) :: training_images(:,:)
+    real, allocatable, intent(in out) :: training_labels(:)
+    real, allocatable, intent(in out) :: validation_images(:,:)
+    real, allocatable, intent(in out) :: validation_labels(:)
+    real, allocatable, intent(in out), optional :: testing_images(:,:)
+    real, allocatable, intent(in out), optional :: testing_labels(:)
+
+    integer, parameter :: dtype = 4, image_size = 784
+    integer, parameter :: num_training_images = 50000
+    integer, parameter :: num_validation_images = 10000
+    integer, parameter :: num_testing_images = 10000
+    logical :: file_exists
+
+    ! Check if MNIST data is present and download it if not.
+    inquire(file='mnist_training_images.dat', exist=file_exists)
+    if (.not. file_exists) call download_and_uncompress()
+
+    ! Load the training dataset (50000 samples)
+    call read_binary_file('mnist_training_images.dat', &
+      dtype, image_size, num_training_images, training_images)
+    call read_binary_file('mnist_training_labels.dat', &
+      dtype, num_training_images, training_labels)
+
+    ! Load the validation dataset (10000 samples), for use while training
+    call read_binary_file('mnist_validation_images.dat', &
+      dtype, image_size, num_validation_images, validation_images)
+    call read_binary_file('mnist_validation_labels.dat', &
+      dtype, num_validation_images, validation_labels)
+
+    ! Load the testing dataset (10000 samples), to test after training
+    if (present(testing_images) .and. present(testing_labels)) then
+      call read_binary_file('mnist_testing_images.dat', &
+        dtype, image_size, num_testing_images, testing_images)
+      call read_binary_file('mnist_testing_labels.dat', &
+        dtype, num_testing_images, testing_labels)
+    end if
+
+  end subroutine load_mnist
+
+
+  module subroutine print_image(images, labels, n)
+    real, intent(in) :: images(:,:)
+    real, intent(in) :: labels(:)
+    integer, intent(in) :: n
+    real :: image(28, 28)
+    character :: char_image(28, 28)
+    integer i, j
+    image = reshape(images(:,n), [28, 28])
+    char_image = '.'
+    where (image > 0) char_image = '#'
+    print *, labels(n)
+    do j = 1, 28
+      print *, char_image(:,j)
+    end do
+  end subroutine print_image
+
+end submodule nf_datasets_mnist_submodule
diff --git a/src/nf_dense_layer.f90 b/src/nf_dense_layer.f90
new file mode 100644
index 00000000..2c31898e
--- /dev/null
+++ b/src/nf_dense_layer.f90
@@ -0,0 +1,97 @@
+module nf_dense_layer
+
+  !! This module provides the concrete dense layer type.
+  !! It is used internally by the layer type.
+  !! It is not intended to be used directly by the user.
+
+  use nf_activation, only: activation_function
+  use nf_base_layer, only: base_layer
+
+  implicit none
+
+  private
+  public :: dense_layer
+
+  type, extends(base_layer) :: dense_layer
+
+    !! Concrete implementation of a dense (fully-connected) layer type
+
+    integer :: input_size
+    integer :: output_size
+
+    real, allocatable :: weights(:,:)
+    real, allocatable :: biases(:)
+    real, allocatable :: z(:) ! matmul(x, w) + b
+    real, allocatable :: output(:) ! activation(z)
+    real, allocatable :: gradient(:) ! matmul(w, db)
+    real, allocatable :: dw(:,:) ! weight gradients
+    real, allocatable :: db(:) ! bias gradients
+
+  contains
+
+    procedure :: backward
+    procedure :: forward
+    procedure :: init
+    procedure :: update
+
+  end type dense_layer
+
+  interface dense_layer
+    elemental module function dense_layer_cons(output_size, activation) &
+      result(res)
+      !! This function returns the `dense_layer` instance.
+      integer, intent(in) :: output_size
+        !! Number of neurons in this layer
+      character(*), intent(in) :: activation
+        !! Name of the activation function to use;
+        !! See nf_activation.f90 for available functions.
+      type(dense_layer) :: res
+        !! dense_layer instance
+    end function dense_layer_cons
+  end interface dense_layer
+
+  interface
+
+    pure module subroutine backward(self, input, gradient)
+      !! Apply the backward gradient descent pass.
+      !! Only weight and bias gradients are updated in this subroutine,
+      !! while the weights and biases themselves are untouched.
+      class(dense_layer), intent(in out) :: self
+        !! Dense layer instance
+      real, intent(in) :: input(:)
+        !! Input from the previous layer
+      real, intent(in) :: gradient(:)
+        !! Gradient from the next layer
+    end subroutine backward
+
+    pure module subroutine forward(self, input)
+      !! Propagate forward the layer.
+      !! Calling this subroutine updates the values of a few data components
+      !! of `dense_layer` that are needed for the backward pass.
+      class(dense_layer), intent(in out) :: self
+        !! Dense layer instance
+      real, intent(in) :: input(:)
+        !! Input from the previous layer
+    end subroutine forward
+
+    module subroutine init(self, input_shape)
+      !! Initialize the layer data structures.
+      !!
+      !! This is a deferred procedure from the `base_layer` abstract type.
+      class(dense_layer), intent(in out) :: self
+        !! Dense layer instance
+      integer, intent(in) :: input_shape(:)
+        !! Shape of the input layer
+    end subroutine init
+
+    module subroutine update(self, learning_rate)
+      !! Update the weights and biases.
+      class(dense_layer), intent(in out) :: self
+        !! Dense layer instance
+      real, intent(in) :: learning_rate
+        !! Learning rate (must be > 0)
+    end subroutine update
+
+  end interface
+
+end module nf_dense_layer
diff --git a/src/nf_dense_layer_submodule.f90 b/src/nf_dense_layer_submodule.f90
new file mode 100644
index 00000000..543b86cd
--- /dev/null
+++ b/src/nf_dense_layer_submodule.f90
@@ -0,0 +1,97 @@
+submodule(nf_dense_layer) nf_dense_layer_submodule
+
+  use nf_base_layer, only: base_layer
+  use nf_random, only: randn
+
+  implicit none
+
+contains
+
+  elemental module function dense_layer_cons(output_size, activation) &
+    result(res)
+    integer, intent(in) :: output_size
+    character(*), intent(in) :: activation
+    type(dense_layer) :: res
+    res % output_size = output_size
+    call res % set_activation(activation)
+  end function dense_layer_cons
+
+
+  pure module subroutine backward(self, input, gradient)
+    class(dense_layer), intent(in out) :: self
+    real, intent(in) :: input(:)
+    real, intent(in) :: gradient(:)
+    real :: db(self % output_size)
+    real :: dw(self % input_size, self % output_size)
+
+    db = gradient * self % activation_prime(self % z)
+    dw = matmul(reshape(input, [size(input), 1]), reshape(db, [1, size(db)]))
+    self % gradient = matmul(self % weights, db)
+    self % dw = self % dw + dw
+    self % db = self % db + db
+
+  end subroutine backward
+
+
+  pure module subroutine forward(self, input)
+    class(dense_layer), intent(in out) :: self
+    real, intent(in) :: input(:)
+
+    self % z = matmul(input, self % weights) + self % biases
+    self % output = self % activation(self % z)
+
+  end subroutine forward
+
+
+  module subroutine init(self, input_shape)
+    class(dense_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    self % input_size = input_shape(1)
+
+    ! Weights are a 2-d array of shape previous layer size
+    ! times this layer size.
+    allocate(self % weights(self % input_size, self % output_size))
+    self % weights = randn(self % input_size, self % output_size) &
+                   / self % input_size
+
+    ! Broadcast weights to all other images, if any.
+    call co_broadcast(self % weights, 1)
+
+    allocate(self % biases(self % output_size))
+    self % biases = 0
+
+    allocate(self % output(self % output_size))
+    self % output = 0
+
+    allocate(self % z(self % output_size))
+    self % z = 0
+
+    allocate(self % dw(self % input_size, self % output_size))
+    self % dw = 0
+
+    allocate(self % db(self % output_size))
+    self % db = 0
+
+    allocate(self % gradient(self % output_size))
+    self % gradient = 0
+
+  end subroutine init
+
+
+  module subroutine update(self, learning_rate)
+    class(dense_layer), intent(in out) :: self
+    real, intent(in) :: learning_rate
+
+    ! Sum weight and bias gradients across images, if any
+    call co_sum(self % dw)
+    call co_sum(self % db)
+
+    self % weights = self % weights - learning_rate * self % dw
+    self % biases = self % biases - learning_rate * self % db
+    self % dw = 0
+    self % db = 0
+
+  end subroutine update
+
+end submodule nf_dense_layer_submodule
diff --git a/src/nf_input1d_layer.f90 b/src/nf_input1d_layer.f90
new file mode 100644
index 00000000..ae3be12e
--- /dev/null
+++ b/src/nf_input1d_layer.f90
@@ -0,0 +1,50 @@
+module nf_input1d_layer
+
+  !! This module provides the `input1d_layer` type.
+
+  use nf_base_layer, only: base_layer
+
+  implicit none
+
+  private
+  public :: input1d_layer
+
+  type, extends(base_layer) :: input1d_layer
+    real, allocatable :: output(:)
+  contains
+    procedure :: init
+    procedure :: set
+  end type input1d_layer
+
+  interface input1d_layer
+    pure module function input1d_layer_cons(output_size) result(res)
+      !! Create a new instance of the 1-d input layer.
+      !! Only used internally by the `layer % init` method.
+      integer, intent(in) :: output_size
+        !! Size of the input layer
+      type(input1d_layer) :: res
+        !! 1-d input layer instance
+    end function input1d_layer_cons
+  end interface input1d_layer
+
+  interface
+
+    module subroutine init(self, input_shape)
+      !! Only here to satisfy the language rules
+      !! about deferred methods of abstract types.
+      !! This method does nothing for this type and should not be called.
+      class(input1d_layer), intent(in out) :: self
+      integer, intent(in) :: input_shape(:)
+    end subroutine init
+
+    pure module subroutine set(self, values)
+      !! Set values on this layer.
+      class(input1d_layer), intent(in out) :: self
+        !! Layer instance
+      real, intent(in) :: values(:)
+        !! Values to set
+    end subroutine set
+
+  end interface
+
+end module nf_input1d_layer
diff --git a/src/nf_input1d_layer_submodule.f90 b/src/nf_input1d_layer_submodule.f90
new file mode 100644
index 00000000..8fa8c49b
--- /dev/null
+++ b/src/nf_input1d_layer_submodule.f90
@@ -0,0 +1,23 @@
+submodule(nf_input1d_layer) nf_input1d_layer_submodule
+  implicit none
+contains
+
+  pure module function input1d_layer_cons(output_size) result(res)
+    integer, intent(in) :: output_size
+    type(input1d_layer) :: res
+    allocate(res % output(output_size))
+    res % output = 0
+  end function input1d_layer_cons
+
+  module subroutine init(self, input_shape)
+    class(input1d_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+  end subroutine init
+
+  pure module subroutine set(self, values)
+    class(input1d_layer), intent(in out) :: self
+    real, intent(in) :: values(:)
+    self % output = values
+  end subroutine set
+
+end submodule nf_input1d_layer_submodule
diff --git a/src/nf_input3d_layer.f90 b/src/nf_input3d_layer.f90
new file mode 100644
index 00000000..511fa926
--- /dev/null
+++ b/src/nf_input3d_layer.f90
@@ -0,0 +1,48 @@
+module nf_input3d_layer
+
+  !! This module provides the `input3d_layer` type.
+
+  use nf_base_layer, only: base_layer
+  implicit none
+
+  private
+  public :: input3d_layer
+
+  type, extends(base_layer) :: input3d_layer
+    real, allocatable :: output(:,:,:)
+  contains
+    procedure :: init
+    procedure :: set
+  end type input3d_layer
+
+  interface input3d_layer
+    pure module function input3d_layer_cons(output_shape) result(res)
+      !! Create a new instance of the 3-d input layer.
+      !! Only used internally by the `layer % init` method.
+      integer, intent(in) :: output_shape(3)
+        !! Shape of the input layer
+      type(input3d_layer) :: res
+        !! 3-d input layer instance
+    end function input3d_layer_cons
+  end interface input3d_layer
+
+  interface
+
+    module subroutine init(self, input_shape)
+      !! Only here to satisfy the language rules
+      !! about deferred methods of abstract types.
+      !! This method does nothing for this type and should not be called.
+      class(input3d_layer), intent(in out) :: self
+      integer, intent(in) :: input_shape(:)
+    end subroutine init
+
+    pure module subroutine set(self, values)
+      class(input3d_layer), intent(in out) :: self
+        !! Layer instance
+      real, intent(in) :: values(:,:,:)
+        !! Values to set
+    end subroutine set
+
+  end interface
+
+end module nf_input3d_layer
diff --git a/src/nf_input3d_layer_submodule.f90 b/src/nf_input3d_layer_submodule.f90
new file mode 100644
index 00000000..4cfe5126
--- /dev/null
+++ b/src/nf_input3d_layer_submodule.f90
@@ -0,0 +1,23 @@
+submodule(nf_input3d_layer) nf_input3d_layer_submodule
+  implicit none
+contains
+
+  pure module function input3d_layer_cons(output_shape) result(res)
+    integer, intent(in) :: output_shape(3)
+    type(input3d_layer) :: res
+    allocate(res % output(output_shape(1), output_shape(2), output_shape(3)))
+    res % output = 0
+  end function input3d_layer_cons
+
+  module subroutine init(self, input_shape)
+    class(input3d_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+  end subroutine init
+
+  pure module subroutine set(self, values)
+    class(input3d_layer), intent(in out) :: self
+    real, intent(in) :: values(:,:,:)
+    self % output = values
+  end subroutine set
+
+end submodule nf_input3d_layer_submodule
diff --git a/src/nf_io.f90 b/src/nf_io.f90
new file mode 100644
index 00000000..c4f898f1
--- /dev/null
+++ b/src/nf_io.f90
@@ -0,0 +1,42 @@
+module nf_io
+
+  !! This module provides subroutines to read binary files using direct access.
+
+  implicit none
+
+  private
+  public :: read_binary_file
+
+  interface read_binary_file
+
+    module subroutine read_binary_file_1d(filename, dtype, nrec, array)
+      !! Read a binary file into a 1-d real array using direct access.
+      implicit none
+      character(*), intent(in) :: filename
+        !! Path to the file to read
+      integer, intent(in) :: dtype
+        !! Number of bytes per element
+      integer, intent(in) :: nrec
+        !! Number of records to read
+      real, allocatable, intent(in out) :: array(:)
+        !! Array to store the data in
+    end subroutine read_binary_file_1d
+
+    module subroutine read_binary_file_2d(filename, dtype, dsize, nrec, array)
+      !! Read a binary file into a 2-d real array using direct access.
+      implicit none
+      character(*), intent(in) :: filename
+        !! Path to the file to read
+      integer, intent(in) :: dtype
+        !! Number of bytes per element
+      integer, intent(in) :: dsize
+        !! Number of elements in a record
+      integer, intent(in) :: nrec
+        !! Number of records to read
+      real, allocatable, intent(in out) :: array(:,:)
+        !! Array to store the data in
+    end subroutine read_binary_file_2d
+
+  end interface read_binary_file
+
+end module nf_io
diff --git a/src/mod_io_submodule.f90 b/src/nf_io_submodule.f90
similarity index 66%
rename from src/mod_io_submodule.f90
rename to src/nf_io_submodule.f90
index 89a69f5f..2ac35ba3 100644
--- a/src/mod_io_submodule.f90
+++ b/src/nf_io_submodule.f90
@@ -1,4 +1,4 @@
-submodule(mod_io) mod_io_submodule
+submodule(nf_io) nf_io_submodule
 
   implicit none
 
@@ -7,11 +7,11 @@
 contains
 
   module subroutine read_binary_file_1d(filename, dtype, nrec, array)
-    character(len=*), intent(in) :: filename
-    integer(ik), intent(in) :: dtype, nrec
-    real(rk), allocatable, intent(in out) :: array(:)
-    integer(ik) :: fileunit
-    character(len=message_len) :: io_message
+    character(*), intent(in) :: filename
+    integer, intent(in) :: dtype, nrec
+    real, allocatable, intent(in out) :: array(:)
+    integer :: fileunit
+    character(message_len) :: io_message
     integer :: io_status
     io_status = 0
     open(newunit=fileunit, file=filename, access='direct', action='read', &
@@ -23,11 +23,11 @@ module subroutine read_binary_file_1d(filename, dtype, nrec, array)
   end subroutine read_binary_file_1d
 
   module subroutine read_binary_file_2d(filename, dtype, dsize, nrec, array)
-    character(len=*), intent(in) :: filename
-    integer(ik), intent(in) :: dtype, dsize, nrec
-    real(rk), allocatable, intent(in out) :: array(:,:)
-    integer(ik) :: fileunit, i
-    character(len=message_len) :: io_message
+    character(*), intent(in) :: filename
+    integer, intent(in) :: dtype, dsize, nrec
+    real, allocatable, intent(in out) :: array(:,:)
+    integer :: fileunit, i
+    character(message_len) :: io_message
     integer :: io_status
     io_status = 0
     open(newunit=fileunit, file=filename, access='direct', action='read', &
@@ -40,4 +40,4 @@ module subroutine read_binary_file_2d(filename, dtype, dsize, nrec, array)
     close(fileunit)
   end subroutine read_binary_file_2d
 
-end submodule mod_io_submodule
+end submodule nf_io_submodule
diff --git a/src/nf_layer.f90 b/src/nf_layer.f90
new file mode 100644
index 00000000..3f40185d
--- /dev/null
+++ b/src/nf_layer.f90
@@ -0,0 +1,101 @@
+module nf_layer
+
+  !! This module provides the `layer` type that is part of the public
+  !! user-facing API.
+
+  use nf_base_layer, only: base_layer
+
+  implicit none
+
+  private
+  public :: layer
+
+  type :: layer
+
+    !! Main layer type. Use custom constructor functions from
+    !! nf_layer_constructors.f90 to create `layer` instances.
+
+    class(base_layer), allocatable :: p
+    character(:), allocatable :: name
+    character(:), allocatable :: activation
+    integer, allocatable :: layer_shape(:)
+    integer, allocatable :: input_layer_shape(:)
+    logical :: initialized = .false.
+
+  contains
+
+    procedure :: backward
+    procedure :: forward
+    procedure :: get_output
+    procedure :: init
+    procedure :: print_info
+    procedure :: update
+
+  end type layer
+
+  interface
+
+    pure module subroutine backward(self, previous, gradient)
+      !! Apply a backward pass on the layer.
+      !! This changes the internal state of the layer.
+      !! This is normally called internally by the `network % backward`
+      !! method.
+      class(layer), intent(in out) :: self
+        !! Layer instance
+      class(layer), intent(in) :: previous
+        !! Previous layer instance
+      real, intent(in) :: gradient(:)
+        !! Array of gradient values from the next layer
+    end subroutine backward
+
+    pure module subroutine forward(self, input)
+      !! Apply a forward pass on the layer.
+      !! This changes the internal state of the layer.
+      !! This is normally called internally by the `network % forward`
+      !! method.
+      class(layer), intent(in out) :: self
+        !! Layer instance
+      class(layer), intent(in) :: input
+        !! Input layer instance
+    end subroutine forward
+
+    pure module subroutine get_output(self, output)
+      !! Returns the output values (activations) from this layer.
+      class(layer), intent(in) :: self
+        !! Layer instance
+      real, allocatable, intent(out) :: output(:)
+        !! Output values from this layer
+    end subroutine get_output
+
+    impure elemental module subroutine init(self, input)
+      !! Initialize the layer, using information from the input layer,
+      !! i.e. the layer that precedes this one.
+      class(layer), intent(in out) :: self
+        !! Layer instance
+      class(layer), intent(in) :: input
+        !! Input layer instance
+    end subroutine init
+
+    impure elemental module subroutine print_info(self)
+      !! Prints a summary information about this layer to the screen.
+      !! This method is called by `network % print_info` for all layers
+      !! on that network.
+      class(layer), intent(in) :: self
+        !! Layer instance
+    end subroutine print_info
+
+    impure elemental module subroutine update(self, learning_rate)
+      !! Update the weights and biases on the layer using the stored
+      !! gradients (from backward passes), and flush those same stored
+      !! gradients to zero.
+      !! This changes the state of the layer.
+      !! Typically used only internally from the `network % update` method.
+      class(layer), intent(in out) :: self
+        !! Layer instance
+      real, intent(in) :: learning_rate
+        !! Learning rate to use; must be > 0.
+    end subroutine update
+
+  end interface
+
+end module nf_layer
diff --git a/src/nf_layer_constructors.f90 b/src/nf_layer_constructors.f90
new file mode 100644
index 00000000..b20982c2
--- /dev/null
+++ b/src/nf_layer_constructors.f90
@@ -0,0 +1,116 @@
+module nf_layer_constructors
+
+  !! This module provides the functions to instantiate specific layers.
+
+  use nf_layer, only: layer
+
+  implicit none
+
+  private
+  public :: conv2d, dense, input
+
+  interface input
+
+    pure module function input1d(layer_size) result(res)
+      !! 1-d input layer constructor.
+      !!
+      !! This layer is for inputting 1-d data to the network.
+      !! Currently, this layer must be followed by a dense layer.
+      !! An input layer must be the first layer in the network.
+      !!
+      !! This is a specific function that is available
+      !! under a generic name `input`.
+      !!
+      !! Example:
+      !!
+      !! ```
+      !! use nf, only :: input, layer
+      !! type(layer) :: input_layer
+      !! input_layer = input(768)
+      !! ```
+      integer, intent(in) :: layer_size
+        !! Size of the input layer
+      type(layer) :: res
+        !! Resulting layer instance
+    end function input1d
+
+    pure module function input3d(layer_shape) result(res)
+      !! 3-d input layer constructor.
+      !!
+      !! This layer is for inputting 3-d data to the network.
+      !! Currently, this layer must be followed by a conv2d layer.
+      !! An input layer must be the first layer in the network.
+      !!
+      !! This is a specific function that is available
+      !! under a generic name `input`.
+      !!
+      !! Example:
+      !!
+      !! ```
+      !! use nf, only :: input, layer
+      !! type(layer) :: input_layer
+      !! input_layer = input([28, 28, 1])
+      !! ```
+      integer, intent(in) :: layer_shape(3)
+        !! Shape of the input layer
+      type(layer) :: res
+        !! Resulting layer instance
+    end function input3d
+
+  end interface input
+
+  interface
+
+    pure module function dense(layer_size, activation) result(res)
+      !! Dense (fully-connected) layer constructor.
+      !!
+      !! This layer is a building block for dense, fully-connected networks,
+      !! or for an output layer of a convolutional network.
+      !! A dense layer must not be the first layer in the network.
+      !!
+      !! Example:
+      !!
+      !! ```
+      !! use nf, only :: dense, layer
+      !! type(layer) :: dense_layer
+      !! dense_layer = dense(10)
+      !! dense_layer = dense(10, activation='relu')
+      !! ```
+      integer, intent(in) :: layer_size
+        !! The number of neurons in a dense layer
+      character(*), intent(in), optional :: activation
+        !! Activation function (default 'sigmoid')
+      type(layer) :: res
+        !! Resulting layer instance
+    end function dense
+
+    pure module function conv2d(window_size, filters, activation) result(res)
+      !! 2-d convolutional layer constructor.
+      !!
+      !! This layer is for building 2-d convolutional network.
+      !! Although the established convention is to call these layers 2-d,
+      !! the shape of the data is actuall 3-d: image width, image height,
+      !! and the number of channels.
+      !! A conv2d layer must not be the first layer in the network.
+      !!
+      !! Example:
+      !!
+      !! ```
+      !! use nf, only :: conv2d, layer
+      !! type(layer) :: conv2d_layer
+      !! conv2d_layer = dense(window_size=3, filters=32)
+      !! conv2d_layer = dense(window_size=3, filters=32, activation='relu')
+      !! ```
+      integer, intent(in) :: window_size
+        !! Width of the convolution window, commonly 3 or 5
+      integer, intent(in) :: filters
+        !! Number of filters in the output of the layer
+      character(*), intent(in), optional :: activation
+        !! Activation function (default 'sigmoid')
+      type(layer) :: res
+        !! Resulting layer instance
+    end function conv2d
+
+  end interface
+
+end module nf_layer_constructors
diff --git a/src/nf_layer_constructors_submodule.f90 b/src/nf_layer_constructors_submodule.f90
new file mode 100644
index 00000000..e51713d2
--- /dev/null
+++ b/src/nf_layer_constructors_submodule.f90
@@ -0,0 +1,75 @@
+submodule(nf_layer_constructors) nf_layer_constructors_submodule
+
+  use nf_layer, only: layer
+  use nf_conv2d_layer, only: conv2d_layer
+  use nf_dense_layer, only: dense_layer
+  use nf_input1d_layer, only: input1d_layer
+  use nf_input3d_layer, only: input3d_layer
+
+  implicit none
+
+contains
+
+  pure module function input1d(layer_size) result(res)
+    integer, intent(in) :: layer_size
+    type(layer) :: res
+    res % name = 'input'
+    res % layer_shape = [layer_size]
+    res % input_layer_shape = [integer ::]
+    allocate(res % p, source=input1d_layer(layer_size))
+    res % initialized = .true.
+  end function input1d
+
+
+  pure module function input3d(layer_shape) result(res)
+    integer, intent(in) :: layer_shape(3)
+    type(layer) :: res
+    res % name = 'input'
+    res % layer_shape = layer_shape
+    res % input_layer_shape = [integer ::]
+    allocate(res % p, source=input3d_layer(layer_shape))
+    res % initialized = .true.
+  end function input3d
+
+
+  pure module function dense(layer_size, activation) result(res)
+    integer, intent(in) :: layer_size
+    character(*), intent(in), optional :: activation
+    type(layer) :: res
+
+    res % name = 'dense'
+    res % layer_shape = [layer_size]
+
+    if (present(activation)) then
+      res % activation = activation
+    else
+      res % activation = 'sigmoid'
+    end if
+
+    allocate(res % p, source=dense_layer(layer_size, res % activation))
+
+  end function dense
+
+
+  pure module function conv2d(window_size, filters, activation) result(res)
+    integer, intent(in) :: window_size
+    integer, intent(in) :: filters
+    character(*), intent(in), optional :: activation
+    type(layer) :: res
+
+    res % name = 'conv2d'
+
+    if (present(activation)) then
+      res % activation = activation
+    else
+      res % activation = 'sigmoid'
+    end if
+
+    allocate( &
+      res % p, &
+      source=conv2d_layer(window_size, filters, res % activation) &
+    )
+
+  end function conv2d
+
+end submodule nf_layer_constructors_submodule
diff --git a/src/nf_layer_submodule.f90 b/src/nf_layer_submodule.f90
new file mode 100644
index 00000000..ed03b188
--- /dev/null
+++ b/src/nf_layer_submodule.f90
@@ -0,0 +1,116 @@
+submodule(nf_layer) nf_layer_submodule
+
+  use nf_conv2d_layer, only: conv2d_layer
+  use nf_dense_layer, only: dense_layer
+  use nf_input1d_layer, only: input1d_layer
+  use nf_input3d_layer, only: input3d_layer
+
+  implicit none
+
+contains
+
+  pure module subroutine backward(self, previous, gradient)
+    class(layer), intent(in out) :: self
+    class(layer), intent(in) :: previous
+    real, intent(in) :: gradient(:)
+
+    ! Backward pass currently implemented only for dense layers
+    select type(this_layer => self % p); type is(dense_layer)
+
+    ! Previous layer is the input layer to this layer.
+    ! For a backward pass on a dense layer, we must accept either an input layer
+    ! or another dense layer as input.
+    select type(prev_layer => previous % p)
+
+      type is(input1d_layer)
+        call this_layer % backward(prev_layer % output, gradient)
+      type is(dense_layer)
+        call this_layer % backward(prev_layer % output, gradient)
+
+    end select
+    end select
+
+  end subroutine backward
+
+
+  pure module subroutine forward(self, input)
+    class(layer), intent(in out) :: self
+    class(layer), intent(in) :: input
+
+    select type(this_layer => self % p)
+
+      ! Only dense layer is supported for now
+      type is(dense_layer)
+
+        ! Input layers permitted: input1d, dense
+        select type(prev_layer => input % p)
+          type is(input1d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(dense_layer)
+            call this_layer % forward(prev_layer % output)
+        end select
+
+    end select
+
+  end subroutine forward
+
+
+  pure module subroutine get_output(self, output)
+    class(layer), intent(in) :: self
+    real, allocatable, intent(out) :: output(:)
+
+    select type(this_layer => self % p)
+
+      type is(input1d_layer)
+        allocate(output, source=this_layer % output)
+      type is(dense_layer)
+        allocate(output, source=this_layer % output)
+
+    end select
+
+  end subroutine get_output
+
+
+  impure elemental module subroutine init(self, input)
+    class(layer), intent(in out) :: self
+    class(layer), intent(in) :: input
+
+    select type(this_layer => self % p); class default
+      call this_layer % init(input % layer_shape)
+    end select
+
+    ! The shape of a conv2d layer is not known until we receive an input layer.
+    select type(this_layer => self % p); type is(conv2d_layer)
+      self % layer_shape = shape(this_layer % output)
+    end select
+
+    self % input_layer_shape = input % layer_shape 
+    self % initialized = .true.
+
+  end subroutine init
+
+
+  impure elemental module subroutine print_info(self)
+    class(layer), intent(in) :: self
+    print '("Layer: ", a)', self % name
+    print '(60("-"))'
+    if (.not. self % name == 'input') &
+      print '("Input shape: ", *(i0, 1x))', self % input_layer_shape
+    print '("Output shape: ", *(i0, 1x))', self % layer_shape
+    if (.not. self % name == 'input') &
+      print '("Activation: ", a)', self % activation
+    print *
+  end subroutine print_info
+
+
+  impure elemental module subroutine update(self, learning_rate)
+    class(layer), intent(in out) :: self
+    real, intent(in) :: learning_rate
+
+    select type(this_layer => self % p); type is(dense_layer)
+      call this_layer % update(learning_rate)
+    end select
+
+  end subroutine update
+
+end submodule nf_layer_submodule
diff --git a/src/nf_loss.f90 b/src/nf_loss.f90
new file mode 100644
index 00000000..62687fd8
--- /dev/null
+++ b/src/nf_loss.f90
@@ -0,0 +1,42 @@
+module nf_loss
+
+  !! This module will eventually provide a collection of loss functions and
+  !! their derivatives. For the time being it provides only the quadratic
+  !! function.
+
+  implicit none
+
+  private
+  public :: quadratic, quadratic_derivative
+
+  interface
+
+    pure module function quadratic(true, predicted) result(res)
+      !! Quadratic loss function:
+      !!
+      !!   L  = (predicted - true)**2 / 2
+      !!
+      real, intent(in) :: true(:)
+        !! True values, i.e. labels from training datasets
+      real, intent(in) :: predicted(:)
+        !! Values predicted by the network
+      real :: res(size(true))
+        !! Resulting loss values
+    end function quadratic
+
+    pure module function quadratic_derivative(true, predicted) result(res)
+      !! First derivative of the quadratic loss function:
+      !!
+      !!   L' =  predicted - true
+      !!
+      real, intent(in) :: true(:)
+        !! True values, i.e. labels from training datasets
+      real, intent(in) :: predicted(:)
+        !! Values predicted by the network
+      real :: res(size(true))
+        !! Resulting loss values
+    end function quadratic_derivative
+
+  end interface
+
+end module nf_loss
diff --git a/src/nf_loss_submodule.f90 b/src/nf_loss_submodule.f90
new file mode 100644
index 00000000..f8ad8a5e
--- /dev/null
+++ b/src/nf_loss_submodule.f90
@@ -0,0 +1,21 @@
+submodule(nf_loss) nf_loss_submodule
+
+  implicit none
+
+contains
+
+  pure module function quadratic(true, predicted) result(res)
+    real, intent(in) :: true(:)
+    real, intent(in) :: predicted(:)
+    real :: res(size(true))
+    res = (predicted - true)**2 / 2
+  end function quadratic
+
+  pure module function quadratic_derivative(true, predicted) result(res)
+    real, intent(in) :: true(:)
+    real, intent(in) :: predicted(:)
+    real :: res(size(true))
+    res = predicted - true
+  end function quadratic_derivative
+
+end submodule nf_loss_submodule
diff --git a/src/nf_network.f90 b/src/nf_network.f90
new file mode 100644
index 00000000..9923e946
--- /dev/null
+++ b/src/nf_network.f90
@@ -0,0 +1,113 @@
+module nf_network
+
+  !! This module provides the network type to create new models.
+
+  use nf_layer, only: layer
+  use nf_optimizers, only: sgd
+
+  implicit none
+
+  private
+  public :: network
+
+  type :: network
+    type(layer), allocatable :: layers(:)
+  contains
+    procedure :: backward
+    procedure :: forward
+    procedure :: output
+    procedure :: print_info
+    procedure :: train
+    procedure :: update
+  end type network
+
+  interface network
+    module function network_cons(layers) result(res)
+      !! Create a new `network` instance.
+      type(layer), intent(in) :: layers(:)
+        !! Input array of layer instances;
+        !! the first element must be an input layer.
+      type(network) :: res
+        !! An instance of the `network` type
+    end function network_cons
+  end interface network
+
+  interface
+
+    pure module subroutine backward(self, output)
+      !! Apply one backward pass through the network.
+      !! This changes the state of layers on the network.
+      !! Typically used only internally from the `train` method,
+      !! but can be invoked by the user when creating custom optimizers.
+      class(network), intent(in out) :: self
+        !! Network instance
+      real, intent(in) :: output(:)
+        !! Output data
+    end subroutine backward
+
+    pure module subroutine forward(self, input)
+      !! Apply a forward pass through the network.
+      !! This changes the state of layers on the network.
+      !! Typically used only internally from the `train` method,
+      !! but can be invoked by the user when creating custom optimizers.
+      class(network), intent(in out) :: self
+        !! Network instance
+      real, intent(in) :: input(:)
+        !! Input data
+    end subroutine forward
+
+    module function output(self, input) result(res)
+      !! Return the output of the network given the input array.
+      class(network), intent(in out) :: self
+        !! Network instance
+      real, intent(in) :: input(:)
+        !! Input data
+      real, allocatable :: res(:)
+        !! Output of the network
+    end function output
+
+    module subroutine print_info(self)
+      !! Prints a brief summary of the network and its layers to the screen.
+      class(network), intent(in) :: self
+        !! Network instance
+    end subroutine print_info
+
+    module subroutine train(self, input_data, output_data, batch_size, &
+                            epochs, optimizer)
+      class(network), intent(in out) :: self
+        !! Network instance
+      real, intent(in) :: input_data(:,:)
+        !! Input data to train on;
+        !! first dimension contains a single sample
+        !! and its size must match the size of the input layer.
+      real, intent(in) :: output_data(:,:)
+        !! Output data to train on;
+        !! first dimension contains a single sample
+        !! and its size must match the size of the input layer.
+      integer, intent(in) :: batch_size
+        !! Batch size to use.
+        !! Set to 1 for a pure stochastic gradient descent.
+        !! Set to `size(input_data, dim=2)` for a batch gradient descent.
+      integer, intent(in) :: epochs
+        !! Number of epochs to run
+      type(sgd), intent(in) :: optimizer
+        !! Optimizer instance; currently this is an `sgd` optimizer type
+        !! and it will be made to be a more general optimizer type.
+    end subroutine train
+
+    module subroutine update(self, learning_rate)
+      !! Update the weights and biases on all layers using the stored
+      !! gradients (from backward passes) on those layers, and flush those
+      !! same stored gradients to zero.
+      !! This changes the state of layers on the network.
+      !! Typically used only internally from the `train` method,
+      !! but can be invoked by the user when creating custom optimizers.
+      class(network), intent(in out) :: self
+        !! Network instance
+      real, intent(in) :: learning_rate
+        !! Learning rate to use; must be > 0.
+    end subroutine update
+
+  end interface
+
+end module nf_network
diff --git a/src/nf_network_submodule.f90 b/src/nf_network_submodule.f90
new file mode 100644
index 00000000..b8584eac
--- /dev/null
+++ b/src/nf_network_submodule.f90
@@ -0,0 +1,176 @@
+submodule(nf_network) nf_network_submodule
+
+  use nf_dense_layer, only: dense_layer
+  use nf_input1d_layer, only: input1d_layer
+  use nf_layer, only: layer
+  use nf_loss, only: quadratic_derivative
+  use nf_optimizers, only: sgd
+  use nf_parallel, only: tile_indices
+
+  implicit none
+
+contains
+
+  module function network_cons(layers) result(res)
+    type(layer), intent(in) :: layers(:)
+    type(network) :: res
+    integer :: n
+
+    ! Error handling
+
+    ! There must be at least two layers
+    if (size(layers) < 2) &
+      error stop 'Error: A network must have at least 2 layers.'
+
+    ! The first layer must be an input layer
+    if (.not. layers(1) % name == 'input') &
+      error stop 'Error: First layer in the network must be an input layer.'
+
+    !TODO Ensure that the layers are in allowed sequence:
+    !TODO   input1d -> dense
+    !TODO   dense -> dense
+    !TODO   input3d -> conv2d
+    !TODO   conv2d -> conv2d
+    !TODO   conv2d -> maxpool2d
+    !TODO   maxpool2d -> conv2d
+    !TODO   conv2d -> flatten
+
+    res % layers = layers
+
+    ! Loop over each layer in order and call the init methods.
+    ! This will allocate the data internal to each layer (e.g. weights, biases)
+    ! according to the size of the previous layer.
+    do n = 2, size(layers)
+      call res % layers(n) % init(res % layers(n - 1))
+    end do
+
+  end function network_cons
+
+
+  pure module subroutine backward(self, output)
+    class(network), intent(in out) :: self
+    real, intent(in) :: output(:)
+    real, allocatable :: gradient(:)
+    integer :: n, num_layers
+
+    num_layers = size(self % layers)
+
+    ! Iterate backward over layers, from the output layer
+    ! to the first non-input layer
+    do n = num_layers, 2, -1
+
+      if (n == num_layers) then
+        ! Output layer; apply the loss function
+        select type(this_layer => self % layers(n) % p)
+          type is(dense_layer)
+            gradient = quadratic_derivative(output, this_layer % output)
+        end select
+      else
+        ! Hidden layer; take the gradient from the next layer
+        select type(next_layer => self % layers(n + 1) % p)
+          type is(dense_layer)
+            gradient = next_layer % gradient
+        end select
+      end if
+
+      call self % layers(n) % backward(self % layers(n - 1), gradient)
+
+    end do
+
+  end subroutine backward
+
+
+  pure module subroutine forward(self, input)
+    class(network), intent(in out) :: self
+    real, intent(in) :: input(:)
+    integer :: n
+
+    ! Set the input array into the input layer
+    select type(input_layer => self % layers(1) % p); type is(input1d_layer)
+      call input_layer % set(input)
+    end select
+
+    do n = 2, size(self % layers)
+      call self % layers(n) % forward(self % layers(n - 1))
+    end do
+
+  end subroutine forward
+
+
+  module function output(self, input) result(res)
+    class(network), intent(in out) :: self
+    real, intent(in) :: input(:)
+    real, allocatable :: res(:)
+    integer :: num_layers
+
+    num_layers = size(self % layers)
+
+    call self % forward(input)
+
+    select type(output_layer => self % layers(num_layers) % p); type is(dense_layer)
+      res = output_layer % output
+    end select
+
+  end function output
+
+
+  module subroutine print_info(self)
+    class(network), intent(in) :: self
+    call self % layers % print_info()
+  end subroutine print_info
+
+
+  module subroutine train(self, input_data, output_data, batch_size, &
+                          epochs, optimizer)
+    class(network), intent(in out) :: self
+    real, intent(in) :: input_data(:,:)
+    real, intent(in) :: output_data(:,:)
+    integer, intent(in) :: batch_size
+    integer, intent(in) :: epochs
+    type(sgd), intent(in) :: optimizer
+
+    real :: pos
+    integer :: dataset_size
+    integer :: batch_start, batch_end
+    integer :: i, j, n
+    integer :: istart, iend, indices(2)
+
+    dataset_size = size(output_data, dim=2)
+
+    epoch_loop: do n = 1, epochs
+      batch_loop: do i = 1, dataset_size / batch_size
+
+      ! Pull a random mini-batch from the dataset
+      call random_number(pos)
+      batch_start = int(pos * (dataset_size - batch_size + 1)) + 1
+      batch_end = batch_start + batch_size - 1
+
+      ! FIXME shuffle in a way that doesn't require co_broadcast
+      call co_broadcast(batch_start, 1)
+      call co_broadcast(batch_end, 1)
+
+      ! Distribute the batch in nearly equal pieces to all images
+      indices = tile_indices(batch_size)
+      istart = indices(1) + batch_start - 1
+      iend = indices(2) + batch_start - 1
+
+      do concurrent(j = istart:iend)
+        call self % forward(input_data(:,j))
+        call self % backward(output_data(:,j))
+      end do
+
+      call self % update(optimizer % learning_rate / batch_size)
+
+      end do batch_loop
+    end do epoch_loop
+
+  end subroutine train
+
+
+  module subroutine update(self, learning_rate)
+    class(network), intent(in out) :: self
+    real, intent(in) :: learning_rate
+    call self % layers % update(learning_rate)
+  end subroutine update
+
+end submodule nf_network_submodule
diff --git a/src/nf_optimizers.f90 b/src/nf_optimizers.f90
new file mode 100644
index 00000000..2ba89904
--- /dev/null
+++ b/src/nf_optimizers.f90
@@ -0,0 +1,17 @@
+module nf_optimizers
+
+  !! This module provides optimizer types to pass to the network constructor.
+
+  implicit none
+
+  private
+  public :: sgd
+
+  type :: sgd
+    !! Stochastic Gradient Descent optimizer
+    real :: learning_rate
+    real :: momentum = 0 !TODO
+    logical :: nesterov = .false. !TODO
+  end type sgd
+
+end module nf_optimizers
diff --git a/src/mod_parallel.f90 b/src/nf_parallel.f90
similarity index 58%
rename from src/mod_parallel.f90
rename to src/nf_parallel.f90
index 2c558cea..ac847cb4 100644
--- a/src/mod_parallel.f90
+++ b/src/nf_parallel.f90
@@ -1,6 +1,5 @@
-module mod_parallel
+module nf_parallel
 
-  use mod_kinds, only: ik, rk
   implicit none
 
   private
@@ -8,14 +7,14 @@ module mod_parallel
 
   interface
   
-    pure module function tile_indices(dims)
+    pure module function tile_indices(dims) result(res)
       !! Given input global array size, return start and end index
       !! of a parallel 1-d tile that correspond to this image.
       implicit none
-      integer(ik), intent(in) :: dims
-      integer(ik) :: tile_indices(2)
+      integer, intent(in) :: dims
+      integer :: res(2)
     end function tile_indices
   
   end interface
 
-end module mod_parallel
+end module nf_parallel
diff --git a/src/mod_parallel_submodule.f90 b/src/nf_parallel_submodule.f90
similarity index 55%
rename from src/mod_parallel_submodule.f90
rename to src/nf_parallel_submodule.f90
index 3b6e1d44..6af1b57b 100644
--- a/src/mod_parallel_submodule.f90
+++ b/src/nf_parallel_submodule.f90
@@ -1,22 +1,19 @@
-submodule(mod_parallel) mod_parallel_submodule
-
-  use mod_kinds, only: ik, rk
+submodule(nf_parallel) nf_parallel_submodule
   implicit none
-
 contains
 
   pure module function tile_indices(dims) result(res)
-    integer(ik), intent(in) :: dims
-    integer(ik) :: res(2)
-    integer(ik) :: offset, tile_size
+    integer, intent(in) :: dims
+    integer :: res(2)
+    integer :: offset, tile_size
 
     tile_size = dims / num_images()
 
-    !! start and end indices assuming equal tile sizes
+    ! start and end indices assuming equal tile sizes
     res(1) = (this_image() - 1) * tile_size + 1
     res(2) = res(1) + tile_size - 1
 
-    !! if we have any remainder, distribute it to the tiles at the end
+    ! if we have any remainder, distribute it to the tiles at the end
     offset = num_images() - mod(dims, num_images())
     if (this_image() > offset) then
       res(1) = res(1) + this_image() - offset - 1
@@ -25,4 +22,4 @@ pure module function tile_indices(dims) result(res)
 
   end function tile_indices
 
-end submodule mod_parallel_submodule
+end submodule nf_parallel_submodule
diff --git a/src/mod_random.f90 b/src/nf_random.f90
similarity index 59%
rename from src/mod_random.f90
rename to src/nf_random.f90
index 6470d2c9..7c6544b6 100644
--- a/src/mod_random.f90
+++ b/src/nf_random.f90
@@ -1,10 +1,8 @@
-module mod_random
+module nf_random
 
   !! Provides a random number generator with
   !! normal distribution, centered on zero.
 
-  use mod_kinds, only: ik, rk
-
   implicit none
 
   private
@@ -13,19 +11,21 @@ module mod_random
   interface randn
 
     module function randn1d(n) result(r)
-      !! Generates n random numbers with a normal distribution.
+      !! Generates n random numbers with a normal distribution,
+      !! using the Box-Muller method.
       implicit none
-      integer(ik), intent(in) :: n
-      real(rk) :: r(n)
+      integer, intent(in) :: n
+      real :: r(n)
     end function randn1d
 
     module function randn2d(m, n) result(r)
-      !! Generates m x n random numbers with a normal distribution.
+      !! Generates m x n random numbers with a normal distribution,
+      !! using the Box-Muller method.
       implicit none
-      integer(ik), intent(in) :: m, n
-      real(rk) :: r(m, n)
+      integer, intent(in) :: m, n
+      real :: r(m,n)
     end function randn2d
 
   end interface randn
 
-end module mod_random
+end module nf_random
diff --git a/src/nf_random_submodule.f90 b/src/nf_random_submodule.f90
new file mode 100644
index 00000000..7ee8de6a
--- /dev/null
+++ b/src/nf_random_submodule.f90
@@ -0,0 +1,26 @@
+submodule(nf_random) nf_random_submodule
+  implicit none
+
+  real, parameter :: pi = 4 * atan(1.d0)
+
+contains
+
+  module function randn1d(n) result(x)
+    integer, intent(in) :: n
+    real :: x(n)
+    real :: u(n), v(n)
+    call random_number(u)
+    call random_number(v)
+    x = sqrt(-2 * log(u)) * cos(2 * pi * v)
+  end function randn1d
+
+  module function randn2d(m, n) result(x)
+    integer, intent(in) :: m, n
+    real :: x(m,n)
+    real :: u(m,n), v(m,n)
+    call random_number(u)
+    call random_number(v)
+    x = sqrt(-2 * log(u)) * cos(2 * pi * v)
+  end function randn2d
+
+end submodule nf_random_submodule
diff --git a/test/test_dense_layer.f90 b/test/test_dense_layer.f90
new file mode 100644
index 00000000..5bc68aef
--- /dev/null
+++ b/test/test_dense_layer.f90
@@ -0,0 +1,57 @@
+program test_dense_layer
+  use iso_fortran_env, only: stderr => error_unit
+  use nf, only: dense, layer
+  implicit none
+  type(layer) :: layer1, layer2
+  logical :: ok = .true.
+
+  layer1 = dense(10)
+
+  if (.not. layer1 % name == 'dense') then
+    ok = .false.
+    write(stderr, '(a)') 'dense layer has its name set correctly.. failed'
+  end if
+
+  if (.not. all(layer1 % layer_shape == [10])) then
+    ok = .false.
+    write(stderr, '(a)') 'dense layer is created with requested size.. failed'
+  end if
+
+  if (layer1 % initialized) then
+    ok = .false.
+    write(stderr, '(a)') 'dense layer should not be marked as initialized yet.. failed'
+  end if
+
+  if (.not. layer1 % activation == 'sigmoid') then
+    ok = .false.
+    write(stderr, '(a)') 'dense layer is defaults to sigmoid activation.. failed'
+  end if
+
+  layer1 = dense(10, activation='relu')
+
+  if (.not. layer1 % activation == 'relu') then
+    ok = .false.
+    write(stderr, '(a)') 'dense layer is created with the specified activation.. failed'
+  end if
+
+  layer2 = dense(20)
+  call layer2 % init(layer1)
+
+  if (.not. layer2 % initialized) then
+    ok = .false.
+    write(stderr, '(a)') 'dense layer should now be marked as initialized.. failed'
+  end if
+
+  if (.not. all(layer2 % input_layer_shape == [10])) then
+    ok = .false.
+    write(stderr, '(a)') 'dense layer should have a correct input layer shape.. failed'
+  end if
+
+  if (ok) then
+    print '(a)', 'test_dense_layer: All tests passed.'
+  else
+    write(stderr, '(a)') 'test_dense_layer: One or more tests failed.'
+    stop 1
+  end if
+
+end program test_dense_layer
diff --git a/test/test_dense_network.f90 b/test/test_dense_network.f90
new file mode 100644
index 00000000..9df7e71b
--- /dev/null
+++ b/test/test_dense_network.f90
@@ -0,0 +1,69 @@
+program test_dense_network
+  use iso_fortran_env, only: stderr => error_unit
+  use nf, only: dense, input, network
+  implicit none
+  type(network) :: net
+  logical :: ok = .true.
+
+  ! Minimal 2-layer network
+  net = network([ &
+    input(1), &
+    dense(1) &
+  ])
+
+  if (.not. size(net % layers) == 2) then
+    write(stderr, '(a)') 'dense network should have 2 layers.. failed'
+    ok = .false.
+  end if
+
+  if (.not. all(net % output([0.]) == 0.5)) then
+    write(stderr, '(a)') &
+      'dense network should output exactly 0.5 for input 0.. failed'
+    ok = .false.
+  end if
+
+  training: block
+    real :: x(1), y(1)
+    real :: tolerance = 1e-3
+    integer :: n
+    integer, parameter :: num_iterations = 1000 
+
+    x = [0.123]
+    y = [0.765]
+
+    do n = 1, num_iterations
+      call net % forward(x)
+      call net % backward(y)
+      call net % update(1.)
+      if (all(abs(net % output(x) - y) < tolerance)) exit
+    end do
+
+    if (.not. n <= num_iterations) then
+      write(stderr, '(a)') &
+        'dense network should converge in simple training.. failed'
+      ok = .false.
+    end if
+
+  end block training
+
+  ! A bit larger multi-layer network
+  net = network([ &
+    input(784), &
+    dense(30), &
+    dense(20), &
+    dense(10) &
+  ])
+
+  if (.not. size(net % layers) == 4) then
+    write(stderr, '(a)') 'dense network should have 4 layers.. failed'
+    ok = .false.
+  end if
+
+  if (ok) then
+    print '(a)', 'test_dense_network: All tests passed.'
+  else
+    write(stderr, '(a)') 'test_dense_network: One or more tests failed.'
+    stop 1
+  end if
+
+end program test_dense_network
diff --git a/test/test_input1d_layer.f90 b/test/test_input1d_layer.f90
new file mode 100644
index 00000000..f6a50369
--- /dev/null
+++ b/test/test_input1d_layer.f90
@@ -0,0 +1,57 @@
+program test_input1d_layer
+  use iso_fortran_env, only: stderr => error_unit
+  use nf, only: input, layer
+  use nf_input1d_layer, only: input1d_layer
+  implicit none
+  type(layer) :: test_layer
+  real, allocatable :: output(:)
+  logical :: ok = .true.
+
+  test_layer = input(3)
+
+  if (.not. test_layer % name == 'input') then
+    ok = .false.
+    write(stderr, '(a)') 'input1d layer has its name set correctly.. failed'
+  end if
+
+  if (.not. test_layer % initialized) then
+    ok = .false.
+    write(stderr, '(a)') 'input1d layer should be marked as initialized.. failed'
+  end if
+
+  if (.not. all(test_layer % layer_shape == [3])) then
+    ok = .false.
+    write(stderr, '(a)') 'input1d layer is created with requested size.. failed'
+  end if
+
+  if (.not. size(test_layer % input_layer_shape) == 0) then
+    ok = .false.
+    write(stderr, '(a)') 'input1d layer has no input layer shape.. failed'
+  end if
+
+  call test_layer % get_output(output) 
+
+  if (.not. all(output == 0)) then
+    ok = .false.
+    write(stderr, '(a)') 'input1d layer values are all initialized to 0.. failed'
+  end if
+
+  select type(input_layer => test_layer % p); type is(input1d_layer)
+    call input_layer % set([1., 2., 3.])
+  end select
+
+  call test_layer % get_output(output) 
+
+  if (.not. all(output == [1., 2., 3.])) then
+    ok = .false.
+    write(stderr, '(a)') 'input1d layer can have its values set.. failed'
+  end if
+
+  if (ok) then
+    print '(a)', 'test_input1d_layer: All tests passed.'
+  else
+    write(stderr, '(a)') 'test_input1d_layer: One or more tests failed.'
+    stop 1
+  end if
+
+end program test_input1d_layer
diff --git a/test/test_mnist.f90 b/test/test_mnist.f90
deleted file mode 100644
index 183de0fb..00000000
--- a/test/test_mnist.f90
+++ /dev/null
@@ -1,24 +0,0 @@
-program test_mnist
-
-  use mod_mnist, only: load_mnist
-  use mod_kinds, only: ik, rk
-
-  implicit none
-
-  real(rk), allocatable :: tr_images(:,:), tr_labels(:)
-  real(rk), allocatable :: te_images(:,:), te_labels(:)
-  real(rk), allocatable :: va_images(:,:), va_labels(:)
-
-  print *, 'Reading MNIST data..'
-  call load_mnist(tr_images, tr_labels, te_images, te_labels, va_images, va_labels)
-  print *, 'Training data:'
-  print *, shape(tr_images), minval(tr_images), maxval(tr_images), sum(tr_images) / size(tr_images)
-  print *, shape(tr_labels), sum(tr_labels) / size(tr_labels)
-  print *, 'Testing data:'
-  print *, shape(te_images), minval(te_images), maxval(te_images), sum(te_images) / size(te_images)
-  print *, shape(te_labels), sum(te_labels) / size(te_labels)
-  print *, 'Validation data:'
-  print *, shape(va_images), minval(va_images), maxval(va_images), sum(va_images) / size(va_images)
-  print *, shape(va_labels), sum(va_labels) / size(va_labels)
-
-end program test_mnist
diff --git a/test/test_network_save.f90 b/test/test_network_save.f90
deleted file mode 100644
index 00aea0d2..00000000
--- a/test/test_network_save.f90
+++ /dev/null
@@ -1,32 +0,0 @@
-program test_network_save
-  use mod_network, only: network_type
-  implicit none
-  type(network_type) :: net1, net2
-  integer :: n
-  print *, 'Initializing 2 networks with random weights and biases'
-  net1 = network_type([768, 30, 10])
-  net2 = network_type([768, 30, 10])
-
-  print *, 'Save network 1 into file'
-  call net1 % save('test_network.dat')
-  call net2 % load('test_network.dat')
-  print *, 'Load network 2 from file'
-  do n = 1, size(net1 % layers)
-    print *, 'Layer ', n, ', weights equal: ',&
-      all(net1 % layers(n) % w == net2 % layers(n) % w),&
-      ', biases equal:', all(net1 % layers(n) % b == net2 % layers(n) % b)
-  end do
-  print *, ''
-
-  print *, 'Setting different activation functions for each layer of network 1'
-  call net1 % set_activation([character(len=10) :: 'sigmoid', 'tanh', 'gaussian'])
-  print *, 'Save network 1 into file'
-  call net1 % save('test_network.dat')
-  call net2 % load('test_network.dat')
-  print *, 'Load network 2 from file'
-  do n = 1, size(net1 % layers)
-    print *, 'Layer ', n, ', activation functions equal:',&
-     associated(net1 % layers(n) % activation, net2 % layers(n) % activation),&
-     '(network 1: ', net1 % layers(n) % activation_str, ', network 2: ', net2 % layers(n) % activation_str,')'
-  end do  
-end program test_network_save
diff --git a/test/test_network_sync.f90 b/test/test_network_sync.f90
deleted file mode 100644
index 9b905b95..00000000
--- a/test/test_network_sync.f90
+++ /dev/null
@@ -1,9 +0,0 @@
-program test_network_sync
-  use mod_network, only: network_type
-  implicit none
-  type(network_type) :: net
-
-  net = network_type([5, 3, 2])
-  print *, this_image(), net % layers(1) % w
-
-end program test_network_sync
diff --git a/test/test_set_activation_function.f90 b/test/test_set_activation_function.f90
deleted file mode 100644
index 479055f0..00000000
--- a/test/test_set_activation_function.f90
+++ /dev/null
@@ -1,63 +0,0 @@
-program test_set_activation_function
-
-  ! This program will test whether per-network and per-layer
-  ! setting of activation functions works as expected.
-  ! First we create an array of random variables.
-  ! Then we set different activation functions to different 
-  ! layers in the network. 
-  ! Finally, we test whether each function produces same 
-  ! values as the activation functions set in the layers. 
-
-  use mod_activation
-  use mod_network, only: network_type
-  use mod_random, only: randn
-
-  implicit none
-  type(network_type) :: net
-  real, allocatable :: x(:)
-  integer :: n
-  logical, allocatable :: tests(:)
-  
-  tests = [logical ::]
-
-  x = randn(100)
-
-  ! the network will be created with 
-  ! sigmoid activation functions for all layers
-  net = network_type([1, 1, 1, 1, 1])
-
-  do n = 1, size(net % layers)
-    tests = [tests, all(sigmoid(x) == net % layers(n) % activation(x))]
-    tests = [tests, all(sigmoid_prime(x) == net % layers(n) % activation_prime(x))]
-  end do
-
-  ! now set the various functions for other layers
-  call net % layers(2) % set_activation('gaussian')
-  call net % layers(3) % set_activation('step')
-  call net % layers(4) % set_activation('tanh')
-  call net % layers(5) % set_activation('relu')
-    
-  tests = [tests, all(sigmoid(x) == net % layers(1) % activation(x))]
-  tests = [tests, all(sigmoid_prime(x) == net % layers(1) % activation_prime(x))]
-
-  tests = [tests, all(gaussian(x) == net % layers(2) % activation(x))]
-  tests = [tests, all(gaussian_prime(x) == net % layers(2) % activation_prime(x))]
-
-  tests = [tests, all(step(x) == net % layers(3) % activation(x))]
-  tests = [tests, all(step_prime(x) == net % layers(3) % activation_prime(x))]
-
-  tests = [tests, all(tanhf(x) == net % layers(4) % activation(x))]
-  tests = [tests, all(tanh_prime(x) == net % layers(4) % activation_prime(x))]
-
-  tests = [tests, all(relu(x) == net % layers(5) % activation(x))]
-  tests = [tests, all(relu_prime(x) == net % layers(5) % activation_prime(x))]
-
-  print *, tests
-
-  if (all(tests)) then
-    print *, 'All tests passed.'
-  else
-    error stop 'some tests failed.'
-  end if
-
-end program test_set_activation_function