From a96674d16835bcafa9c2df9b3295e7ce8351daf0 Mon Sep 17 00:00:00 2001 From: Adrian Hill Date: Tue, 12 Mar 2024 18:49:09 +0100 Subject: [PATCH] Improve documentation (#34) * Improve documentation * Rename source files * More tables in README * Split docs into user and dev docs * Add Fallback call structure diagrams * Improve Mermaid diagrams * Fix typos * Fix API ref * No duplicates * Reorder stuff --------- Co-authored-by: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> --- README.md | 18 ++-- docs/Project.toml | 1 + docs/make.jl | 5 +- docs/src/api.md | 24 +++--- docs/src/design.md | 57 ------------- docs/src/developer.md | 92 +++++++++++++++++++++ docs/src/getting_started.md | 51 ++++++++++++ src/DifferentiationInterface.jl | 8 +- src/{scalar_scalar.jl => derivative.jl} | 0 src/{array_scalar.jl => gradient.jl} | 0 src/{array_array.jl => jacobian.jl} | 4 +- src/{scalar_array.jl => multiderivative.jl} | 0 12 files changed, 174 insertions(+), 86 deletions(-) delete mode 100644 docs/src/design.md create mode 100644 docs/src/developer.md create mode 100644 docs/src/getting_started.md rename src/{scalar_scalar.jl => derivative.jl} (100%) rename src/{array_scalar.jl => gradient.jl} (100%) rename src/{array_array.jl => jacobian.jl} (99%) rename src/{scalar_array.jl => multiderivative.jl} (100%) diff --git a/README.md b/README.md index e49d159bf..72993d831 100644 --- a/README.md +++ b/README.md @@ -17,14 +17,16 @@ It supports in-place versions of every operator, and ensures type stability when We support some of the backends defined by [ADTypes.jl](https://github.com/SciML/ADTypes.jl): -- [ChainRulesCore.jl](https://github.com/JuliaDiff/ChainRulesCore.jl) with `AutoChainRules(ruleconfig)` -- [Diffractor.jl](https://github.com/JuliaDiff/Diffractor.jl) with `AutoDiffractor()` -- [Enzyme.jl](https://github.com/EnzymeAD/Enzyme.jl) with `AutoEnzyme(Val(:forward))` or `AutoEnzyme(Val(:reverse))` -- [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) with `AutoFiniteDiff()` -- [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl) with `AutoForwardDiff()` -- [PolyesterForwardDiff.jl](https://github.com/JuliaDiff/PolyesterForwardDiff.jl) with `AutoPolyesterForwardDiff(; chunksize=C)` -- [ReverseDiff.jl](https://github.com/JuliaDiff/ReverseDiff.jl) with `AutoReverseDiff()` -- [Zygote.jl](https://github.com/FluxML/Zygote.jl) with `AutoZygote()` +| Backend | Type | +|:--------------------------------------------------------------------------------|:-----------------------------------------------------------| +| [ChainRulesCore.jl](https://github.com/JuliaDiff/ChainRulesCore.jl) | `AutoChainRules(ruleconfig)` | +| [Diffractor.jl](https://github.com/JuliaDiff/Diffractor.jl) | `AutoDiffractor()` | +| [Enzyme.jl](https://github.com/EnzymeAD/Enzyme.jl) | `AutoEnzyme(Val(:forward))` or `AutoEnzyme(Val(:reverse))` | +| [FiniteDiff.jl](https://github.com/JuliaDiff/FiniteDiff.jl) | `AutoFiniteDiff()` | +| [ForwardDiff.jl](https://github.com/JuliaDiff/ForwardDiff.jl) | `AutoForwardDiff()` | +| [PolyesterForwardDiff.jl](https://github.com/JuliaDiff/PolyesterForwardDiff.jl) | `AutoPolyesterForwardDiff(; chunksize=C)` | +| [ReverseDiff.jl](https://github.com/JuliaDiff/ReverseDiff.jl) | `AutoReverseDiff()` | +| [Zygote.jl](https://github.com/FluxML/Zygote.jl) | `AutoZygote()` | ## Example diff --git a/docs/Project.toml b/docs/Project.toml index 43ae050ca..aae07b52d 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -5,6 +5,7 @@ DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" Diffractor = "9f5e2b26-1114-432f-b630-d3fe2085c51c" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +DocumenterMermaid = "a078cd44-4d9c-4618-b545-3ab9d77f9177" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" diff --git a/docs/make.jl b/docs/make.jl index d485c686a..92ebe4ca1 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -2,6 +2,7 @@ using Base: get_extension using DifferentiationInterface import DifferentiationInterface as DI using Documenter +using DocumenterMermaid using ADTypes using Diffractor: Diffractor @@ -64,7 +65,9 @@ makedocs(; canonical="https://gdalle.github.io/DifferentiationInterface.jl", edit_link="main", ), - pages=["Home" => "index.md", "design.md", "api.md", "backends.md"], + pages=[ + "Home" => "index.md", "getting_started.md", "api.md", "backends.md", "developer.md" + ], warnonly=:missing_docs, # missing docs for ADTypes.jl are normal ) diff --git a/docs/src/api.md b/docs/src/api.md index 0d74648c4..b33c57f4f 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -9,46 +9,42 @@ CollapsedDocStrings = true DifferentiationInterface ``` -## Utilities - -### Scalar to scalar +## Derivative ```@autodocs Modules = [DifferentiationInterface] -Pages = ["scalar_scalar.jl"] +Pages = ["src/derivative.jl"] ``` -### Scalar to array +## Multiderivative ```@autodocs Modules = [DifferentiationInterface] -Pages = ["scalar_array.jl"] +Pages = ["multiderivative.jl"] ``` -### Array to scalar +## Gradient ```@autodocs Modules = [DifferentiationInterface] -Pages = ["array_scalar.jl"] +Pages = ["gradient.jl"] ``` -### Array to array +## Jacobian ```@autodocs Modules = [DifferentiationInterface] -Pages = ["array_array.jl"] +Pages = ["jacobian.jl"] ``` -## Primitives - -### Pushforward +## Pushforward (JVP) ```@autodocs Modules = [DifferentiationInterface] Pages = ["pushforward.jl"] ``` -### Pullback +## Pullback (JVP) ```@autodocs Modules = [DifferentiationInterface] diff --git a/docs/src/design.md b/docs/src/design.md deleted file mode 100644 index f737cadb8..000000000 --- a/docs/src/design.md +++ /dev/null @@ -1,57 +0,0 @@ -# Design - -The operators defined in this package are split into two main parts: - -- the "utilities", which are sufficient for most users -- the "primitives", which are mostly relevant for experts or backend developers - -## Utilities - -Depending on the type of input and output, differentiation operators can have various names. -We choose the following terminology for the utilities we provide: - -| | **scalar output** | **array output** | -| ---------------- | ----------------- | ---------------- | -| **scalar input** | derivative | multiderivative | -| **array input** | gradient | jacobian | - -Most backends have custom implementations for all of these, which we reuse whenever possible. - -## Primitives - -Every utility can also be implemented from either of these two primitives: - -- the pushforward (in forward mode), computing a Jacobian-vector product -- the pullback (in reverse mode), computing a vector-Jacobian product - -## Variants - -Whenever it makes sense, four variants of the same operator are defined: - -| | **mutating** | **non-mutating** | -| --------------------- | ---------------------------------------- | ------------------------------ | -| **primal too** | `value_and_something!(storage, args...)` | `value_and_something(args...)` | -| **differential only** | `something!(storage, args...)` | `something(args...)` | - -Replace `something` with `derivative`, `multiderivative`, `gradient`, `jacobian`, `pushforward` or `pullback` to get the correct name. - -## Preparation - -In many cases, automatic differentiation can be accelerated if the function has been run at least once (e.g. to record a tape) and if some cache objects are provided. -This is a backend-specific procedure, but we expose a common syntax to achieve it. - -If you run `prepare_something(backend, f, x)`, it will create an object called `extras` containing the necessary information to speed up the `something` procedure and its variants. -You can them call `something(backend, f, x, extras)`, which should be faster than `something(backend, f, x)`. -This is especially worth it if you plan to call `something` several times in similar settings: same backend, same function, but different inputs. -You can think of it as a warm up. - -By default, all the preparation functions return `nothing`. -We do not make any guarantees on their implementation for each backend, or on the performance gains that can be expected. - -## Backend requirements - -The only requirement for a backend is to implement either [`value_and_pushforward!`](@ref) or [`value_and_pullback!`](@ref), from which the rest of the operators can be deduced. -We provide a standard series of fallbacks, but we leave it to each backend to redefine as many of the utilities as necessary to achieve optimal performance. - -Every backend we support corresponds to a package extension of DifferentiationInterface.jl (located in the `ext` subfolder). -Advanced users are welcome to code more backends and submit pull requests! diff --git a/docs/src/developer.md b/docs/src/developer.md new file mode 100644 index 000000000..7ff9dd50f --- /dev/null +++ b/docs/src/developer.md @@ -0,0 +1,92 @@ +# For AD developers + +## Backend requirements + +Every [operator](@ref operators) can be implemented from either of these two primitives: + +- the pushforward (in forward mode), computing a Jacobian-vector product +- the pullback (in reverse mode), computing a vector-Jacobian product + +The only requirement for a backend is therefore to implement either [`value_and_pushforward!`](@ref) or [`value_and_pullback!`](@ref), from which the rest of the operators can be deduced. +We provide a standard series of fallbacks, but we leave it to each backend to redefine as many of the utilities as necessary to achieve optimal performance. + +Every backend we support corresponds to a package extension of DifferentiationInterface.jl (located in the `ext` subfolder). +Advanced users are welcome to code more backends and submit pull requests! + +## Fallback call structure + +### Forward mode + +```mermaid +flowchart LR + subgraph Gradient + gradient --> value_and_gradient + value_and_gradient --> value_and_gradient! + gradient! --> value_and_gradient! + end + + subgraph Jacobian + jacobian --> value_and_jacobian + value_and_jacobian --> value_and_jacobian! + jacobian! --> value_and_jacobian! + end + + subgraph Multiderivative + multiderivative --> value_and_multiderivative + value_and_multiderivative --> value_and_multiderivative! + multiderivative! --> value_and_multiderivative! + end + + subgraph Derivative + derivative --> value_and_derivative + end + + subgraph Pushforward + pushforward --> value_and_pushforward + value_and_pushforward --> value_and_pushforward! + pushforward! --> value_and_pushforward! + end + + value_and_jacobian! --> value_and_pushforward! + value_and_gradient! --> value_and_pushforward! + value_and_multiderivative! --> value_and_pushforward! + value_and_derivative --> value_and_pushforward +``` + +### Reverse mode + +```mermaid +flowchart LR + subgraph Gradient + gradient --> value_and_gradient + value_and_gradient --> value_and_gradient! + gradient! --> value_and_gradient! + end + + subgraph Jacobian + jacobian --> value_and_jacobian + value_and_jacobian --> value_and_jacobian! + jacobian! --> value_and_jacobian! + end + + subgraph Multiderivative + multiderivative --> value_and_multiderivative + value_and_multiderivative --> value_and_multiderivative! + multiderivative! --> value_and_multiderivative! + end + + subgraph Derivative + derivative --> value_and_derivative + end + + subgraph Pullback + pullback --> value_and_pullback + value_and_pullback --> value_and_pullback! + pullback! --> value_and_pullback! + end + + value_and_jacobian! --> value_and_pullback! + value_and_gradient! --> value_and_pullback! + value_and_multiderivative! --> value_and_pullback! + value_and_derivative --> value_and_pullback +``` diff --git a/docs/src/getting_started.md b/docs/src/getting_started.md new file mode 100644 index 000000000..ff7f9afeb --- /dev/null +++ b/docs/src/getting_started.md @@ -0,0 +1,51 @@ +# Getting started + +## [Operators](@id operators) + +Depending on the type of input and output, differentiation operators can have various names. +We choose the following terminology for the ones we provide: + +| | **scalar output** | **array output** | +| ---------------- | ----------------- | ----------------- | +| **scalar input** | `derivative` | `multiderivative` | +| **array input** | `gradient` | `jacobian` | + +Most backends have custom implementations for all of these, which we reuse whenever possible. + +### Variants + +Whenever it makes sense, four variants of the same operator are defined: + +| **Operator** | **non-mutating** | **mutating** | **non-mutating with primal** | **mutating with primal** | +| :---------------- | :------------------------ | :------------------------- | :---------------------------------- | :----------------------------------- | +| Derivative | [`derivative`](@ref) | N/A | [`value_and_derivative`](@ref) | N/A | +| Multiderivative | [`multiderivative`](@ref) | [`multiderivative!`](@ref) | [`value_and_multiderivative`](@ref) | [`value_and_multiderivative!`](@ref) | +| Gradient | [`gradient`](@ref) | [`gradient!`](@ref) | [`value_and_gradient`](@ref) | [`value_and_gradient!`](@ref) | +| Jacobian | [`jacobian`](@ref) | [`jacobian!`](@ref) | [`value_and_jacobian`](@ref) | [`value_and_jacobian!`](@ref) | +| Pushforward (JVP) | [`pushforward`](@ref) | [`pushforward!`](@ref) | [`value_and_pushforward`](@ref) | [`value_and_pushforward!`](@ref) | +| Pullback (VJP) | [`pullback`](@ref) | [`pullback!`](@ref) | [`value_and_pullback`](@ref) | [`value_and_pullback!`](@ref) | + +Note that scalar outputs can't be mutated, which is why `derivative` doesn't have mutating variants. + +## Preparation + +In many cases, automatic differentiation can be accelerated if the function has been run at least once (e.g. to record a tape) and if some cache objects are provided. +This is a backend-specific procedure, but we expose a common syntax to achieve it. + +| **Operator** | **preparation function** | +| :---------------- | :-------------------------------- | +| Derivative | [`prepare_derivative`](@ref) | +| Multiderivative | [`prepare_multiderivative`](@ref) | +| Gradient | [`prepare_gradient`](@ref) | +| Jacobian | [`prepare_jacobian`](@ref) | +| Pushforward (JVP) | [`prepare_pushforward`](@ref) | +| Pullback (VJP) | [`prepare_pullback`](@ref) | + +If you run `prepare_operator(backend, f, x)`, it will create an object called `extras` containing the necessary information to speed up `operator` and its variants. +This information is specific to `backend` and `f`, as well as the _type and size_ of the input `x`, but it should work with different _values_ of `x`. + +You can then call `operator(backend, f, similar_x, extras)`, which should be faster than `operator(backend, f, similar_x)`. +This is especially worth it if you plan to call `operator` several times in similar settings: you can think of it as a warm up. + +By default, all the preparation functions return `nothing`. +We do not make any guarantees on their implementation for each backend, or on the performance gains that can be expected. diff --git a/src/DifferentiationInterface.jl b/src/DifferentiationInterface.jl index cce87e560..8457d2007 100644 --- a/src/DifferentiationInterface.jl +++ b/src/DifferentiationInterface.jl @@ -19,10 +19,10 @@ include("mode.jl") include("utils.jl") include("pushforward.jl") include("pullback.jl") -include("scalar_scalar.jl") -include("scalar_array.jl") -include("array_scalar.jl") -include("array_array.jl") +include("derivative.jl") +include("multiderivative.jl") +include("gradient.jl") +include("jacobian.jl") include("prepare.jl") export value_and_pushforward!, value_and_pushforward diff --git a/src/scalar_scalar.jl b/src/derivative.jl similarity index 100% rename from src/scalar_scalar.jl rename to src/derivative.jl diff --git a/src/array_scalar.jl b/src/gradient.jl similarity index 100% rename from src/array_scalar.jl rename to src/gradient.jl diff --git a/src/array_array.jl b/src/jacobian.jl similarity index 99% rename from src/array_array.jl rename to src/jacobian.jl index 62f65ea4a..eae106f22 100644 --- a/src/array_array.jl +++ b/src/jacobian.jl @@ -2,7 +2,7 @@ const JAC_NOTES = """ ## Notes Regardless of the shape of `x` and `y`, if `x` has length `n` and `y` has length `m`, then `jac` is expected to be a `m × n` matrix. -This function acts as if the input and output had been flattened with `vec`. +This function acts as if the input and output had been flattened with `vec`. """ """ @@ -57,7 +57,7 @@ end Compute the primal value `y = f(x)` and the Jacobian matrix `jac = ∂f(x)` of an array-to-array function. -$JAC_NOTES +$JAC_NOTES """ function value_and_jacobian(backend::AbstractADType, f, x::AbstractArray, args...) y = f(x) diff --git a/src/scalar_array.jl b/src/multiderivative.jl similarity index 100% rename from src/scalar_array.jl rename to src/multiderivative.jl