From c99b0c441296d1d043407b6f7a467979481696d4 Mon Sep 17 00:00:00 2001 From: PetroZarytskyi <119341518+PetroZarytskyi@users.noreply.github.com> Date: Tue, 18 Feb 2025 06:58:49 +0100 Subject: [PATCH] Update the documentation for clad::jacobian (#1242) --- README.md | 65 +++++++++++++++++++------ demos/Jupyter/Intro.ipynb | 10 ++-- docs/userDocs/source/user/UsingClad.rst | 17 ++++--- docs/userDocs/source/user/reference.rst | 18 ++++--- docs/userDocs/source/user/tutorials.rst | 23 ++++----- 5 files changed, 86 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 06d0e7bf9..7ace41cff 100644 --- a/README.md +++ b/README.md @@ -134,8 +134,7 @@ int main() { ### Jacobian mode - `clad::jacobian` -Clad can produce the jacobian of a function using its reverse mode. It returns the jacobian matrix as a flattened -vector in row major format. +Clad can produce the jacobian of a function using its reverse mode. It returns the jacobian matrix as a `clad::matrix` for every pointer/array parameter. `clad::jacobian(f, /*optional*/ ARGS)` takes 1 or 2 arguments: 1. `f` is a pointer to a function or a method to be differentiated @@ -143,9 +142,9 @@ vector in row major format. * not provided, then `f` is differentiated w.r.t. its every argument * a string literal with comma-separated names of independent variables (e.g. `"x"` or `"y"` or `"x, y"` or `"y, x"`) -The generated function has `void` return type and same input arguments. The function has an additional argument of -type `T *`, where `T` is the pointee type of the output (the last variable) of `f`. This variable stores the jacobian -matrix. *The caller is responsible for allocating and zeroing-out the jacobian storage*. Example: +The generated function has `void` return type and same input arguments. For every pointer/array parameter `arr`, the function has an additional argument `_d_vector_arr`. Its +type is `clad::matrix`, where `T` is the pointee type of `arr`. These variables store their derivatives w.r.t. all inputs. +*The caller is responsible for allocating the matrices*. Example: ```cpp #include "clad/Differentiator/Differentiator.h" @@ -158,19 +157,57 @@ void h(double a, double b, double output[]) { } int main() { - // This sets all the input variables (i.e a and b) as independent variables + // This sets all the input variables (i.e a, b, and output) as independent variables auto h_jac = clad::jacobian(h); - // The jacobian matrix size should be the number of - // independent variables * the number of outputs of the original function - // In this case it is 2 * 3 = 6 - double jac[6] = {0}; + // The jacobian matrix size should be + // the size of the output x the number of independent variables + // In this case it is 3 x (1 + 1 + 3) + clad::matrix d_output(3, 5); double output[3] = {0}; - h_jac.execute(/*a=*/3, /*b=*/4, output, jac); + h_jac.execute(/*a=*/3, /*b=*/4, output, &d_output); + + // d_output[i][j] is the derivative of the i-th element of `output` w.r.t. the j-th input + std::cout << d_output[0][0] << " " << d_output[0][1] << std::endl + << d_output[1][0] << " " << d_output[1][1] << std::endl + << d_output[2][0] << " " << d_output[2][1] << std::endl; +} +``` + +Or in the case of multiple array parameters: + +```cpp +#include "clad/Differentiator/Differentiator.h" +#include + +void h(double a, double b, double arr[], double* ptr) { + arr[0] = a * a * a; + ptr[0] = arr[0] + b * b * b; + arr[1] = 2 * (a + b); +} + +int main() { + auto h_jac = clad::jacobian(h); + + // The jacobian matrix size should be + // the size of the output x the number of independent variables + + // 3 x (1 + 1 + 2 + 1) + clad::matrix d_arr(2, 5); + double arr[2] = {0}; + + // 1 x (1 + 1 + 2 + 1) + clad::matrix d_ptr(1, 5); + double ptr[1] = {0}; + + h_jac.execute(/*a=*/3, /*b=*/4, arr, ptr, &d_arr, &d_ptr); - std::cout << jac[0] << " " << jac[1] << std::endl - << jac[2] << " " << jac[3] << std::endl - << jac[4] << " " << jac[5] << std::endl; + // d_arr[i][j] is the derivative of the i-th element of `arr` w.r.t. the j-th input + std::cout << d_arr[0][0] << " " << d_arr[0][1] << std::endl + << d_arr[1][0] << " " << d_arr[1][1] << std::endl; + + // Likewise, with `ptr` + std::cout << d_ptr[0][0] << " " << d_ptr[0][1] << std::endl; } ``` diff --git a/demos/Jupyter/Intro.ipynb b/demos/Jupyter/Intro.ipynb index 78df06f6d..361d03893 100644 --- a/demos/Jupyter/Intro.ipynb +++ b/demos/Jupyter/Intro.ipynb @@ -247,7 +247,7 @@ "id": "03638fa7-1837-4052-b548-0289098dbe12", "metadata": {}, "source": [ - "Clad can produce the jacobian of a function using its reverse mode. It returns the jacobian matrix as a flattened vector in row major format. The generated function has void return type and same input arguments. The function has an additional argument of type T\\*, where T is the pointee type of the output (the last variable) of *fn_jacobian*. This variable stores the jacobian matrix. The caller is responsible for allocating and zeroing-out the jacobian storage. " + "Clad can produce the jacobian of a function using its reverse mode. It returns the jacobian matrix as a `clad::matrix` for every pointer/array parameter. The generated function has `void` return type and same input arguments. For every pointer/array parameter `arr`, the function has an additional argument `_d_vector_arr`. Its type is `clad::matrix`, where `T` is the pointee type of `arr`. These variables store their derivatives w.r.t. all inputs. The caller is responsible for allocating the matrices." ] }, { @@ -273,8 +273,8 @@ "source": [ "auto d_fn = clad::jacobian(fn_jacobian);\n", "double res[3] = {0, 0, 0};\n", - "double derivatives[6] = {0, 0, 0, 0, 0, 0};\n", - "d_fn.execute(3, 5, res, derivatives);" + "clad::matrix d_res(3, 5);\n", + "d_fn.execute(3, 5, res, &d_res);" ] }, { @@ -295,7 +295,7 @@ } ], "source": [ - "derivatives" + "d_res" ] }, { @@ -319,7 +319,7 @@ "std::cout<<\"Jacobian matrix:\\n\";\n", " for (int i=0; i<3; ++i) {\n", " for (int j=0; j<2; ++j) {\n", - " std::cout< d_res(3, 5); // Prints the generated Hessian function jacobian.dump(); // Substitutes these values into the Jacobian function and pipes the result - // into the matrix variable. + // into the d_res variable. double res[3] = {0, 0, 0}; - jacobian.execute(3, 5, res, matrix); + jacobian.execute(3, 5, res, &d_res); + // Now, you can access the derivatives with d_res[i][j]. } Few important things to note through this example: - ``clad::jacobian`` supports differentiating w.r.t multiple paramters. - - The array that will store the computed jacobian matrix needs to be passed as the - last argument to ``CladFunction::execute`` call. The array size - needs to be greater or equal to the size required to store the jacobian matrix. - Passing an array of a smaller size will result in undefined behaviour. + - The clad::matrix args are generated for all array/pointer parameters. They need to be passed + after the original parameters to ``CladFunction::execute`` call. The size of every matrix + needs to be exactly the size required to store the derivative of the corresponding parameter + w.r.t. all input parameters. Passing a matrix of a different size will result in undefined behaviour. Array Support ---------------- diff --git a/docs/userDocs/source/user/reference.rst b/docs/userDocs/source/user/reference.rst index 1f6f23943..15e0dfd03 100644 --- a/docs/userDocs/source/user/reference.rst +++ b/docs/userDocs/source/user/reference.rst @@ -118,9 +118,9 @@ API reference `jacobian matrix `_ of the provided function (``fn``) with respect to all the arguments specified in ``args``. If no explicit ``args`` argument is specified, - then jacbian matrix is computed with respect to all the input parameters. + then jacobian is computed with respect to all the input parameters. For a function with 3 input parameters and an output array of size 4, - the jacobian matrix will contain 12 elements. + the jacobian matrix (called `_d_result``) will be 3 x 5. :: @@ -135,14 +135,18 @@ API reference auto fn_jcbn = clad::jacobian(func); // Creates an empty matrix to store the Jacobian in - double matrix[6] = {0}; + clad::matrix d_res(3, 5); double res[3] = {0}; - fn_jcbn.execute(8, 2, res, matrix); + fn_jcbn.execute(8, 2, res, &d_res); - //Result is 48, 64, 4, 32, 2, 8 - printf("Result is %g, %g, %g, %g, %g, %g \n", matrix[0], matrix[1], - matrix[2], matrix[3], matrix[4], matrix[5]); + //Result is 32 64 + // 4 2 + // 2 8 + printf("Result is \n %g %g \n %g %g \n %g %g \n", + d_res[0][0], d_res[0][1], + d_res[1][0], d_res[1][1], + d_res[2][0], d_res[2][1]); } ------------------ diff --git a/docs/userDocs/source/user/tutorials.rst b/docs/userDocs/source/user/tutorials.rst index 240559d43..0ea188630 100644 --- a/docs/userDocs/source/user/tutorials.rst +++ b/docs/userDocs/source/user/tutorials.rst @@ -95,8 +95,8 @@ Here the array variable stores the hessian matrix. **The Jacobian Mode** -Clad can produce Jacobian of a function using its reverse mode. It returns the -jacobian matrix as a flattened vector with elements arranged in row-major format. +Clad can produce the jacobian of a function using its reverse mode. It returns the +jacobian matrix as a `clad::matrix` for every pointer/array parameter. .. code-block:: cpp @@ -112,19 +112,16 @@ jacobian matrix as a flattened vector with elements arranged in row-major format int main() { auto f_jac = clad::jacobian(f); - double jac[9] = {0}; - double output[3] = {0}; - f_jac.execute(3, 4, 5, output, jac); - std::cout << jac[0] << " " << jac[1] << std::endl - << jac[2] << " " << jac[3] << std::endl - << jac[4] << " " << jac[5] << std::endl - << jac[6] << " " << jac[7] << std::endl - << jac[8] << std::endl; + clad::matrix d_output(3, 6); + double output[3]; + f_jac.execute(3, 4, 5, output, &d_output); + std::cout << d_output[1][0] << " " << d_output[1][1] << " " << d_output[1][2] << std::endl + << d_output[1][0] << " " << d_output[1][1] << " " << d_output[1][2] << std::endl + << d_output[2][0] << " " << d_output[2][1] << " " << d_output[2][2] << std::endl; } -The jacobian matrix size should be equal to `no. of independent variables times -the number of outputs in the original function` in the above example it would be -an array of size 3x3 = 9. +The jacobian matrix size should be `the size of the output` x `no. of independent variables`. +In the above example, it would be 3 x 6 (1+1+1+3) **Error Estimation API**