diff --git a/docs/make.jl b/docs/make.jl
index 0847656b9d..ee836b216b 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -9,51 +9,53 @@ makedocs(
     sitename = "Flux",
     # strict = [:cross_references,],
     pages = [
-        "Getting Started" => [
-            "Welcome" => "index.md",
+        "Welcome" => "index.md",
+        "Guide" => [
+        # You could read this end-to-end, or skip to what you need.
+        # Aim is to cover each new concept exactly once (but not list all variants).
+        # Hard to invent further divisions which aren't more confusing than helpful?
             "Quick Start" => "models/quickstart.md",
             "Fitting a Line" => "models/overview.md",
             "Gradients and Layers" => "models/basics.md",
-        ],
-        "Building Models" => [
-            "Built-in Layers 📚" => "models/layers.md",
+            "Training" => "training/training.md",
+            "Regularisation" => "models/regularisation.md",  # consolidated in #2114
             "Recurrence" => "models/recurrence.md",
-            "Activation Functions 📚" => "models/activation.md",
-            "NNlib.jl 📚 (`softmax`, `conv`, ...)" => "models/nnlib.md",
-         ],
-         "Handling Data" => [
-             "MLUtils.jl 📚 (`DataLoader`, ...)" => "data/mlutils.md",
-             "OneHotArrays.jl 📚 (`onehot`, ...)" => "data/onehot.md",
-         ],
-         "Training Models" => [
-             "Training" => "training/training.md",
-             "Regularisation" => "models/regularisation.md",
-             "Loss Functions 📚" => "models/losses.md",
-             "Optimisation Rules 📚" => "training/optimisers.md",  # TODO move optimiser intro up to Training
-             "Callback Helpers 📚" => "training/callbacks.md",
-             "Zygote.jl 📚 (`gradient`, ...)" => "training/zygote.md",
-         ],
-         "Model Tools" => [
-             "GPU Support" => "gpu.md",
-             "Saving & Loading" => "saving.md",
-             "Shape Inference 📚" => "outputsize.md",
-             "Weight Initialisation 📚" => "utilities.md",
-             "Flat vs. Nested 📚" => "destructure.md",
-             "Functors.jl 📚 (`fmap`, ...)" => "models/functors.md",
+            "GPU Support" => "gpu.md",
+            "Saving & Loading" => "saving.md",
+            "Performance Tips" => "performance.md",
+        ],
+        "Ecosystem" => "ecosystem.md",
+        "Reference" => [
+        # This essentially collects docstrings, with a bit of introduction.
+            "Built-in Layers" => "models/layers.md",
+            "Activation Functions" => "models/activation.md",
+            "Weight Initialisation" => "utilities.md",
+            "Loss Functions" => "models/losses.md",
+            "Optimisation Rules" => "training/optimisers.md",  # TODO move optimiser intro up to Training
+            "Shape Inference" => "outputsize.md",
+            "Flat vs. Nested" => "destructure.md",
+            "Callback Helpers" => "training/callbacks.md",
+            "Gradients -- Zygote.jl" => "training/zygote.md",
+            "Batching Data -- MLUtils.jl" => "data/mlutils.md",
+            "OneHotArrays.jl" => "data/onehot.md",
+            "Low-level Operations -- NNlib.jl" => "models/nnlib.md",
+            "Nested Structures -- Functors.jl" => "models/functors.md",
          ],
-         "Tutorials" => [
-             # Roughly in order of increasing complexity? Not chronological.
+        "Tutorials" => [
+        # These walk you through various tasks. It's fine if they overlap quite a lot.
+        # All the website tutorials can move here, perhaps much of the model zoo too?
+        # Or perhaps those should just be trashed, model zoo versions are newer & more useful.
             "Linear Regression" => "tutorials/linear_regression.md",
+            #=
             "Julia & Flux: 60 Minute Blitz" => "tutorials/2020-09-15-deep-learning-flux.md",
             "Multi-layer Perceptron" => "tutorials/2021-01-26-mlp.md",
             "Simple ConvNet" => "tutorials/2021-02-07-convnet.md",
             "Generative Adversarial Net" => "tutorials/2021-10-14-vanilla-gan.md",
             "Deep Convolutional GAN" => "tutorials/2021-10-08-dcgan-mnist.md",
+            =#
             # Not really sure where this belongs... some in Fluxperimental, aim to delete?
             "Custom Layers" => "models/advanced.md",  # TODO move freezing to Training
-         ],
-         "Performance Tips" => "performance.md",
-         "Flux's Ecosystem" => "ecosystem.md",
+        ],
     ],
     format = Documenter.HTML(
         sidebar_sitename = false,
diff --git a/docs/src/ecosystem.md b/docs/src/ecosystem.md
index 9bcefc8d28..785d36ea59 100644
--- a/docs/src/ecosystem.md
+++ b/docs/src/ecosystem.md
@@ -5,11 +5,11 @@ globally providing a rich and consistent user experience.
 
 This is a non-exhaustive list of Julia packages, nicely complementing `Flux` in typical
 machine learning and deep learning workflows. To add your project please send a [PR](https://github.com/FluxML/Flux.jl/pulls).
-See also academic work citing Flux or Zygote.
+See also academic work [citing Flux](https://scholar.google.com/scholar?cites=9731162218836700005&hl=en) or [citing Zygote](https://scholar.google.com/scholar?cites=11943854577624257878&hl=en).
 
 ## Flux models
 
-Packages that are actual `Flux` models but are not available directly through the `Flux` package.
+- Flux's [model-zoo](https://github.com/FluxML/model-zoo) contains examples from many domains.
 
 ### Computer vision
 
@@ -38,6 +38,8 @@ Packages that are actual `Flux` models but are not available directly through th
 
 - [FluxArchitectures.jl](https://github.com/sdobber/FluxArchitectures.jl) is a collection of advanced network architectures for time series forecasting.
 
+---
+
 ## Tools closely associated with Flux
 
 Utility tools you're unlikely to have met if you never used Flux!
@@ -64,9 +66,10 @@ Tools to put data into the right order for creating a model.
 
 ### Parameters
 
-- [Parameters.jl](https://github.com/mauro3/Parameters.jl) types with default field values, keyword constructors and (un-)pack macros.
 - [ParameterSchedulers.jl](https://github.com/darsnack/ParameterSchedulers.jl) standard scheduling policies for machine learning.
 
+---
+
 ## Differentiable programming
 
 Packages based on differentiable programming but not necessarily related to Machine Learning. 
@@ -90,6 +93,7 @@ Packages based on differentiable programming but not necessarily related to Mach
 
 - [OnlineStats.jl](https://github.com/joshday/OnlineStats.jl) provides single-pass algorithms for statistics.
 
+---
 
 ## Useful miscellaneous packages
 
@@ -104,8 +108,29 @@ Some useful and random packages!
 - [ProgressMeter.jl](https://github.com/timholy/ProgressMeter.jl) progress meters for long-running computations.
 - [TensorBoardLogger.jl](https://github.com/PhilipVinc/TensorBoardLogger.jl) easy peasy logging to [tensorboard](https://www.tensorflow.org/tensorboard) in Julia
 - [ArgParse.jl](https://github.com/carlobaldassi/ArgParse.jl) is a package for parsing command-line arguments to Julia programs.
+- [Parameters.jl](https://github.com/mauro3/Parameters.jl) types with default field values, keyword constructors and (un-)pack macros.
 - [BSON.jl](https://github.com/JuliaIO/BSON.jl) is a package for working with the Binary JSON serialisation format.
 - [DataFrames.jl](https://github.com/JuliaData/DataFrames.jl) in-memory tabular data in Julia.
 - [DrWatson.jl](https://github.com/JuliaDynamics/DrWatson.jl) is a scientific project assistant software.
 
 This tight integration among Julia packages is shown in some of the examples in the [model-zoo](https://github.com/FluxML/model-zoo) repository.
+
+---
+
+## Alternatives to Flux
+
+Julia has several other libraries for making neural networks. 
+
+* [SimpleChains.jl](https://github.com/PumasAI/SimpleChains.jl) is focused on making small, simple, CPU-based, neural networks fast. Uses [LoopVectorization.jl](https://github.com/JuliaSIMD/LoopVectorization.jl). (Was `FastChain` in DiffEqFlux.jl) 
+
+* [Knet.jl](https://github.com/denizyuret/Knet.jl) is a neural network library built around [AutoGrad.jl](https://github.com/denizyuret/AutoGrad.jl).
+
+* [Lux.jl](https://github.com/avik-pal/Lux.jl) (earlier ExplicitFluxLayers.jl) shares much of the design, use-case, and NNlib.jl / Optimisers.jl back-end of Flux. But instead of encapsulating all parameters within the model structure, it separates this into 3 components: a model, a tree of parameters, and a tree of model states.
+
+!!! compat "Explicit or explicit?"
+    Flux's [training docs](@ref man-training) talk about changes from Zygote's implicit to
+    explicit gradients, dictionary-like to tree-like structures.
+    (See also [Zygote's description](https://fluxml.ai/Zygote.jl/dev/#Explicit-and-Implicit-Parameters-1) of these.)
+    Lux also uses Zygote, but uses the word "explicit" to mean something unrelated,
+    namely storing the tree of parameters (and of state) separately from the model.
+
diff --git a/docs/src/gpu.md b/docs/src/gpu.md
index e8e98774b6..46fed4e1bf 100644
--- a/docs/src/gpu.md
+++ b/docs/src/gpu.md
@@ -182,3 +182,4 @@ $ export CUDA_VISIBLE_DEVICES='0,1'
 
 
 More information for conditional use of GPUs in CUDA.jl can be found in its [documentation](https://cuda.juliagpu.org/stable/installation/conditional/#Conditional-use), and information about the specific use of the variable is described in the [Nvidia CUDA blog post](https://developer.nvidia.com/blog/cuda-pro-tip-control-gpu-visibility-cuda_visible_devices/).
+
diff --git a/docs/src/index.md b/docs/src/index.md
index 98fffc4a5c..833c85e5e8 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -2,29 +2,25 @@
 
 Flux is a library for machine learning. It comes "batteries-included" with many useful tools built in, but also lets you use the full power of the Julia language where you need it. We follow a few key principles:
 
-* **Doing the obvious thing**. Flux has relatively few explicit APIs for features like regularisation or embeddings. Instead, writing down the mathematical form will work – and be fast.
-* **Extensible by default**. Flux is written to be highly extensible and flexible while being performant. Extending Flux is as simple as using your own code as part of the model you want - it is all [high-level Julia code](https://github.com/FluxML/Flux.jl/blob/ec16a2c77dbf6ab8b92b0eecd11661be7a62feef/src/layers/recurrent.jl#L131). When in doubt, it’s well worth looking at [the source](https://github.com/FluxML/Flux.jl/tree/master/src). If you need something different, you can easily roll your own.
-* **Play nicely with others**. Flux works well with Julia libraries from [images](https://github.com/JuliaImages/Images.jl) to [differential equation solvers](https://github.com/SciML/DifferentialEquations.jl), so you can easily build complex data processing pipelines that integrate Flux models.
+* **Doing the obvious thing**. Flux has relatively few explicit APIs. Instead, writing down the mathematical form will work – and be fast.
+* **Extensible by default**. Flux is written to be highly flexible while being performant. Extending Flux is as simple as using your own code as part of the model you want - it is all [high-level Julia code](https://github.com/FluxML/Flux.jl/tree/master/src).
+* **Play nicely with others**. Flux works well with unrelated Julia libraries from [images](https://github.com/JuliaImages/Images.jl) to [differential equation solvers](https://github.com/SciML/DifferentialEquations.jl), rather than duplicating them.
 
-## Installation
+### Installation
 
-Download [Julia 1.6](https://julialang.org/downloads/) or later, preferably the current stable release. You can add Flux using Julia's package manager, by typing `] add Flux` in the Julia prompt.
+Download [Julia 1.6](https://julialang.org/downloads/) or later, preferably the current stable release. You can add Flux using Julia's package manager, by typing `] add Flux` in the Julia prompt. This will automatically install several other packages, including [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) for Nvidia GPU support.
 
-This will automatically install several other packages, including [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) which supports Nvidia GPUs. To directly access some of its functionality, you may want to add `] add CUDA` too. The page on [GPU support](gpu.md) has more details.
+### Learning Flux
 
-Other closely associated packages, also installed automatically, include [Zygote](https://github.com/FluxML/Zygote.jl), [Optimisers](https://github.com/FluxML/Optimisers.jl), [NNlib](https://github.com/FluxML/NNlib.jl), [Functors](https://github.com/FluxML/Functors.jl) and [MLUtils](https://github.com/JuliaML/MLUtils.jl).
+The **[quick start](@ref man-quickstart)** page trains a simple neural network.
 
-## Learning Flux
+This rest of the **guide** provides a from-scratch introduction to Flux's take on models and how they work, starting with [fitting a line](@ref man-overview). Once you understand these docs, congratulations, you also understand [Flux's source code](https://github.com/FluxML/Flux.jl), which is intended to be concise, legible and a good reference for more advanced concepts.
 
-The [quick start](@ref man-quickstart) page trains a simple neural network.
+There are some **tutorials** about building particular models. The **[model zoo](https://github.com/FluxML/model-zoo/)** has starting points for many other common ones. And finally, the **[ecosystem page](ecosystem.md)** lists packages which define Flux models.
 
-This rest of this documentation provides a from-scratch introduction to Flux's take on models and how they work, starting with [fitting a line](@ref man-overview). Once you understand these docs, congratulations, you also understand [Flux's source code](https://github.com/FluxML/Flux.jl), which is intended to be concise, legible and a good reference for more advanced concepts.
+The **reference** section includes, beside Flux's own functions, those of some companion packages: [Zygote.jl](https://github.com/FluxML/Zygote.jl) (automatic differentiation), [Optimisers.jl](https://github.com/FluxML/Optimisers.jl) (training) and others.
 
-Sections with 📚 contain API listings. The same text is avalable at the Julia prompt, by typing for example `?gpu`.
-
-If you just want to get started writing models, the [model zoo](https://github.com/FluxML/model-zoo/) gives good starting points for many common ones.
-
-## Community
+### Community
 
 Everyone is welcome to join our community on the [Julia discourse forum](https://discourse.julialang.org/), or the [slack chat](https://discourse.julialang.org/t/announcing-a-julia-slack/4866) (channel #machine-learning). If you have questions or issues we'll try to help you out.
 
diff --git a/docs/src/models/basics.md b/docs/src/models/basics.md
index d1335ff229..140ed7e13d 100644
--- a/docs/src/models/basics.md
+++ b/docs/src/models/basics.md
@@ -213,13 +213,13 @@ m(5) # => 26
 
 ## Layer Helpers
 
-There is still one problem with this `Affine` layer, that Flux does not know to look inside it. This means that [`Flux.train!`](@ref) won't see its parameters, nor will [`gpu`](@ref) be able to move them to your GPU. These features are enabled by the `@functor` macro:
+There is still one problem with this `Affine` layer, that Flux does not know to look inside it. This means that [`Flux.train!`](@ref) won't see its parameters, nor will [`gpu`](@ref) be able to move them to your GPU. These features are enabled by the [`@functor`](@ref Functors.@functor) macro:
 
 ```
 Flux.@functor Affine
 ```
 
-Finally, most Flux layers make bias optional, and allow you to supply the function used for generating random weights. We can easily add these refinements to the `Affine` layer as follows:
+Finally, most Flux layers make bias optional, and allow you to supply the function used for generating random weights. We can easily add these refinements to the `Affine` layer as follows, using the helper function [`create_bias`](@ref Flux.create_bias):
 
 ```
 function Affine((in, out)::Pair; bias=true, init=Flux.randn32)
@@ -230,7 +230,3 @@ end
 
 Affine(3 => 1, bias=false, init=ones) |> gpu
 ```
-
-```@docs
-Functors.@functor
-```
diff --git a/docs/src/models/nnlib.md b/docs/src/models/nnlib.md
index cf42cc99bf..72b8481f56 100644
--- a/docs/src/models/nnlib.md
+++ b/docs/src/models/nnlib.md
@@ -1,6 +1,6 @@
 # Neural Network primitives from NNlib.jl
 
-Flux re-exports all of the functions exported by the [NNlib](https://github.com/FluxML/NNlib.jl) package. This includes activation functions, described on the next page. Many of the functions on this page exist primarily as the internal implementation of Flux layer, but can also be used independently.
+Flux re-exports all of the functions exported by the [NNlib](https://github.com/FluxML/NNlib.jl) package. This includes activation functions, described on [their own page](@ref man-activations). Many of the functions on this page exist primarily as the internal implementation of Flux layer, but can also be used independently.
 
 ## Softmax
 
diff --git a/docs/src/tutorials/2021-10-14-vanilla-gan.md b/docs/src/tutorials/2021-10-14-vanilla-gan.md
index b2c7bb6f4c..5b09345db8 100644
--- a/docs/src/tutorials/2021-10-14-vanilla-gan.md
+++ b/docs/src/tutorials/2021-10-14-vanilla-gan.md
@@ -32,7 +32,7 @@ type `add MLDatasets` or perform this operation with the Pkg module like this
 > Pkg.add(MLDatasets)
 ```
 
-While [UnicodePlots]() is not necessary, it can be used to plot generated samples
+While [UnicodePlots](https://github.com/JuliaPlots/UnicodePlots.jl) is not necessary, it can be used to plot generated samples
 into the terminal during training. Having direct feedback, instead of looking
 at plots in a separate window, use fantastic for debugging.