From b41db2ff5791d53e669e641a823effa957c42ff6 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Tue, 11 Jun 2024 13:03:32 +0200 Subject: [PATCH] Add dev guide to docs (#317) * Add dev guide to docs * Fix typo * Fix cleanup --- .github/workflows/DocPreviewCleanup.yml | 8 +- DifferentiationInterface/docs/make.jl | 2 +- DifferentiationInterface/docs/src/backends.md | 14 -- .../docs/src/dev_guide.md | 137 ++++++++++++++++++ .../docs/src/operators.md | 7 +- .../docs/src/preparation.md | 53 ------- 6 files changed, 150 insertions(+), 71 deletions(-) create mode 100644 DifferentiationInterface/docs/src/dev_guide.md delete mode 100644 DifferentiationInterface/docs/src/preparation.md diff --git a/.github/workflows/DocPreviewCleanup.yml b/.github/workflows/DocPreviewCleanup.yml index 36f8569a5..7c125b254 100644 --- a/.github/workflows/DocPreviewCleanup.yml +++ b/.github/workflows/DocPreviewCleanup.yml @@ -14,6 +14,12 @@ jobs: runs-on: ubuntu-latest permissions: contents: write + strategy: + fail-fast: false + matrix: + package: + - DifferentiationInterface + - DifferentiationInterfaceTest steps: - name: Checkout gh-pages branch uses: actions/checkout@v4 @@ -30,4 +36,4 @@ jobs: git push --force origin gh-pages-new:gh-pages fi env: - preview_dir: previews/PR${{ github.event.number }} \ No newline at end of file + preview_dir: ${{ matrix.package }}/previews/PR${{ github.event.number }} \ No newline at end of file diff --git a/DifferentiationInterface/docs/make.jl b/DifferentiationInterface/docs/make.jl index e2ab05706..4fcfd0ace 100644 --- a/DifferentiationInterface/docs/make.jl +++ b/DifferentiationInterface/docs/make.jl @@ -37,7 +37,7 @@ makedocs(; "Home" => "index.md", "Tutorials" => ["tutorial1.md", "tutorial2.md"], "Reference" => ["operators.md", "backends.md", "api.md"], - "Advanced" => ["preparation.md", "overloads.md"], + "Advanced" => ["dev_guide.md", "overloads.md"], ], checkdocs=:exports, plugins=[links], diff --git a/DifferentiationInterface/docs/src/backends.md b/DifferentiationInterface/docs/src/backends.md index eeb2b8285..ff2bfe264 100644 --- a/DifferentiationInterface/docs/src/backends.md +++ b/DifferentiationInterface/docs/src/backends.md @@ -100,17 +100,3 @@ The wrapper [`DifferentiateWith`](@ref) allows you to switch between backends. It takes a function `f` and specifies that `f` should be differentiated with the backend of your choice, instead of whatever other backend the code is trying to use. In other words, when someone tries to differentiate `dw = DifferentiateWith(f, backend1)` with `backend2`, then `backend1` steps in and `backend2` does nothing. At the moment, `DifferentiateWith` only works when `backend2` supports [ChainRules.jl](https://github.com/JuliaDiff/ChainRules.jl). - -## Defining your own - -To work with DifferentiationInterface.jl, a new AD system would need to create an object subtyping [`ADTypes.AbstractADType`](@extref ADTypes). -In addition, some low-level operators would need to be defined at the very least: - -| backend subtype | pushforward necessary | pullback necessary | -| :---------------------------------------- | :-------------------- | :----------------- | -| [`ADTypes.ForwardMode`](@extref ADTypes) | yes | no | -| [`ADTypes.ReverseMode`](@extref ADTypes) | no | yes | -| [`ADTypes.SymbolicMode`](@extref ADTypes) | yes | yes | - -Every backend we support corresponds to a package extension of DifferentiationInterface.jl (located in the `ext` subfolder). -If you need to implement your own backend, take a look in there for inspiration, or reach out to us in the GitHub issues. diff --git a/DifferentiationInterface/docs/src/dev_guide.md b/DifferentiationInterface/docs/src/dev_guide.md new file mode 100644 index 000000000..1c87a1f5b --- /dev/null +++ b/DifferentiationInterface/docs/src/dev_guide.md @@ -0,0 +1,137 @@ +# Dev guide + +This page is important reading if you want to contribute to DifferentiationInterface.jl. +It is not part of the public API. + +!!! warning + The content below may become outdated, in which case you should refer to the source code as the ground truth. + +## General principles + +The package is structured around 8 [operators](@ref Operators): + +- [`derivative`](@ref) +- [`second_derivative`](@ref) +- [`gradient`](@ref) +- [`jacobian`](@ref) +- [`hessian`](@ref) +- [`pushforward`](@ref) +- [`pullback`](@ref) +- [`hvp`](@ref) + +Most operators have 4 variants, which look like this in the first order: `operator`, `operator!`, `value_and_operator`, `value_and_operator!`. + +### New operator + +To implement a new operator for an existing backend, you need to write 5 methods: 1 for [preparation](@ref Preparation) and 4 corresponding to the variants of the operator (see above). +In some cases, a subset of those methods will be enough, but most of the time, forgetting one will trigger errors. +For first-order operators, you may also want to support [two-argument functions](@ref "Mutation and signatures"), which requires another 5 methods (defined on `f!` instead of `f`). + +The method `prepare_operator` must output an `extras` object of the correct type. +For instance, `prepare_gradient(f, backend, x)` must return a [`DifferentiationInterface.GradientExtras`](@ref). +Assuming you don't need any preparation for said operator, you can use the trivial extras that are already defined, like `DifferentiationInterface.NoGradientExtras`. +Otherwise, define a custom struct like `MyGradientExtras <: DifferentiationInterface.GradientExtras` and put the necessary storage in there. + +### New backend + +If you want to implement a new backend, for instance because you developed a new AD package called `SuperDiff`, please open a pull request to DifferentiationInterface.jl. +Your AD package needs to be registered first. + +#### Core code + +In the main package, you should define a new struct `SuperDiffBackend` which subtypes [`ADTypes.AbstractADType`](@extref ADTypes), and endow it with the fields you need to parametrize your differentiation routines. +You also have to define [`ADTypes.mode`](@extref) and [`DifferentiationInterface.twoarg_support`](@ref) on `SuperDiffBackend`. + +!!! info + In the end, this backend struct will need to be contributed to [ADTypes.jl](https://github.com/SciML/ADTypes.jl). + However, putting it in the DifferentiationInterface.jl PR is a good first step for debugging. + +In a [package extension](https://pkgdocs.julialang.org/v1/creating-packages/#Conditional-loading-of-code-in-packages-(Extensions)) named `DifferentiationInterfaceSuperDiffExt`, you need to implement at least [`pushforward`](@ref) or [`pullback`](@ref) (and their variants). +The exact requirements depend on the differentiation mode you chose: + +| backend mode | pushforward necessary | pullback necessary | +| :------------------------------------------------ | :-------------------- | :----------------- | +| [`ADTypes.ForwardMode`](@extref ADTypes) | yes | no | +| [`ADTypes.ReverseMode`](@extref ADTypes) | no | yes | +| [`ADTypes.ForwardOrReverseMode`](@extref ADTypes) | yes | yes | +| [`ADTypes.SymbolicMode`](@extref ADTypes) | yes | yes | + +Every other operator can be deduced from these two, but you can gain efficiency by implementing additional operators. + +#### Tests and docs + +Once that is done, you need to add your new backend to the test suite. +Test files should be gathered in a folder named `SuperDiff` inside [`DifferentiationInterface/test/Single`](https://github.com/gdalle/DifferentiationInterface.jl/tree/main/DifferentiationInterface/test/Single). +They should use [DifferentiationInterfaceTest.jl](https://github.com/gdalle/DifferentiationInterface.jl/tree/main/DifferentiationInterfaceTest) to check correctness against the default scenarios. +Take inspiration from the tests of other backends to write your own. +To activate tests in CI, modify the [test workflow](https://github.com/gdalle/DifferentiationInterface.jl/blob/main/.github/workflows/Test.yml) and add your package to the list. +To run the tests locally, replace the following line in [`DifferentiationInterface/test/runtests.jl`](https://github.com/gdalle/DifferentiationInterface.jl/blob/main/DifferentiationInterface/test/runtests.jl) + +```julia +GROUP = get(ENV, "JULIA_DI_TEST_GROUP", "All") +``` + +with the much cheaper version + +```julia +GROUP = get(ENV, "JULIA_DI_TEST_GROUP", "Single/SuperDiff") +``` + +but don't forget to switch it back before pushing. + +Finally, you need to add your backend to the documentation, modifying every page that involves a list of backends. +That includes the README. + +## Specific details + +Here we give some more information on the contents of the extension for each backend. + +### ChainRulesCore + +For [`pullback`](@ref), same-point preparation runs the forward sweep and returns the pullback closure. + +### Enzyme + +In forward mode, for [`gradient`](@ref) and [`jacobian`](@ref), preparation chooses a number of chunks. + +### FastDifferentiation + +Preparation generates an [executable function](https://brianguenter.github.io/FastDifferentiation.jl/stable/makefunction/) from the symbolic expression of the differentiated function. + +!!! warning + Preparation can be very slow for symbolic AD. + +### FiniteDiff + +Whenever possible, preparation creates a cache object. + +### ForwardDiff + +Wherever possible, preparation creates a [config](https://juliadiff.org/ForwardDiff.jl/stable/user/api/#Preallocating/Configuring-Work-Buffers). +For [`pushforward`](@ref), preparation allocates the necessary space for `Dual` number computations. + +### ReverseDiff + +Wherever possible, preparation records a [tape](https://juliadiff.org/ReverseDiff.jl/dev/api/#The-AbstractTape-API) of the function's execution. + +!!! warning + This tape is specific to the control flow inside the function, and cannot be reused if the control flow is value-dependent (like `if x[1] > 0`). + +### Symbolics + +Preparation generates an [executable function](https://docs.sciml.ai/Symbolics/stable/manual/build_function/) from the symbolic expression of the differentiated function. + +!!! warning + Preparation can be very slow for symbolic AD. + +### Tapir + +For [`pullback`](@ref), preparation [builds the reverse rule](https://github.com/withbayes/Tapir.jl?tab=readme-ov-file#how-it-works) of the function. + +### Tracker + +For [`pullback`](@ref), same-point preparation runs the forward sweep and returns the pullback closure at `x`. + +### Zygote + +For [`pullback`](@ref), same-point preparation runs the forward sweep and returns the pullback closure at `x`. diff --git a/DifferentiationInterface/docs/src/operators.md b/DifferentiationInterface/docs/src/operators.md index a06dcd053..97230c6fe 100644 --- a/DifferentiationInterface/docs/src/operators.md +++ b/DifferentiationInterface/docs/src/operators.md @@ -58,11 +58,14 @@ Several variants of each operator are defined. ## Mutation and signatures -We support two types of functions: +Some operators support two types of functions: - one-argument functions `f(x) = y` - two-argument functions `f!(y, x) = nothing` +!!! warning + Only [`pushforward`](@ref), [`pullback`](@ref), [`derivative`](@ref) and [`jacobian`](@ref) support two-argument functions at the moment. + The same operators are defined for both cases, but they have different signatures (they take different arguments): | signature | out-of-place | in-place | @@ -75,7 +78,7 @@ The same operators are defined for both cases, but they have different signature This convention holds regardless of the bang `!` in the operator name. In particular, for two-argument functions `f!(y, x)`, every variant of every operator will mutate `y`. -## [Preparation](@id Operators-Preparation) +## Preparation ### Principle diff --git a/DifferentiationInterface/docs/src/preparation.md b/DifferentiationInterface/docs/src/preparation.md deleted file mode 100644 index 94a96df9a..000000000 --- a/DifferentiationInterface/docs/src/preparation.md +++ /dev/null @@ -1,53 +0,0 @@ -# Preparation - -Preparation is a backend-specific procedure which involves some subtleties. -Here we list the broad principles of preparation for each backend where it is nontrivial. - -The following is not part of the public API. - -!!! warning - This page may become outdated, in which case you should refer to the source code as the ground truth. - -## ChainRulesCore - -For [`pullback`](@ref), same-point preparation runs the forward sweep and returns the pullback closure. - -## Enzyme - -In forward mode, for [`gradient`](@ref) and [`jacobian`](@ref) - -## FastDifferentiation - -Preparation generates an [executable function](https://brianguenter.github.io/FastDifferentiation.jl/stable/makefunction/) from the symbolic expression of the differentiated function. - -## FiniteDiff - -Whenever possible, preparation creates a cache object. - -## ForwardDiff - -Wherever possible, preparation creates a [config](https://juliadiff.org/ForwardDiff.jl/stable/user/api/#Preallocating/Configuring-Work-Buffers) with all the necessary memory to use as buffer. -For [`pushforward`](@ref), preparation allocates the necessary space for `Dual` number computations. - -## ReverseDiff - -Wherever possible, preparation records a [tape](https://juliadiff.org/ReverseDiff.jl/dev/api/#The-AbstractTape-API) of the function's execution. - -!!! warning - This tape is specific to the control flow inside the function, and cannot be reused if the control flow is value-dependent (like `if x[1] > 0`). - -## Symbolics - -Preparation generates an [executable function](https://docs.sciml.ai/Symbolics/stable/manual/build_function/) from the symbolic expression of the differentiated function. - -## Tapir - -For [`pullback`](@ref), preparation [builds the reverse rule](https://github.com/withbayes/Tapir.jl?tab=readme-ov-file#how-it-works) of the function. - -## Tracker - -For [`pullback`](@ref), same-point preparation runs the forward sweep and returns the pullback closure at `x`. - -## Zygote - -For [`pullback`](@ref), same-point preparation runs the forward sweep and returns the pullback closure at `x`.