From 2efdd176ce32c0a5ccfb06f08bb7fba736b17fdf Mon Sep 17 00:00:00 2001 From: Charles Beauville Date: Wed, 25 Sep 2024 15:15:47 +0200 Subject: [PATCH] ci(framework:skip) Add rST formatting to docs (#4254) --- dev/format.sh | 3 + dev/test.sh | 8 + ...or-explanation-public-and-private-apis.rst | 162 ++- ...contributor-how-to-build-docker-images.rst | 222 +-- ...ributor-how-to-contribute-translations.rst | 81 +- ...ow-to-develop-in-vscode-dev-containers.rst | 57 +- ...or-how-to-install-development-versions.rst | 35 +- .../contributor-how-to-release-flower.rst | 46 +- ...ontributor-how-to-set-up-a-virtual-env.rst | 42 +- ...contributor-how-to-write-documentation.rst | 10 +- ...ntributor-ref-good-first-contributions.rst | 34 +- ...butor-ref-secure-aggregation-protocols.rst | 253 ++-- ...tributor-tutorial-contribute-on-github.rst | 362 ++--- ...-tutorial-get-started-as-a-contributor.rst | 156 ++- doc/source/docker/enable-tls.rst | 218 +-- doc/source/docker/index.rst | 40 +- doc/source/docker/persist-superlink-state.rst | 55 +- doc/source/docker/pin-version.rst | 27 +- doc/source/docker/run-as-root-user.rst | 39 +- doc/source/docker/run-as-subprocess.rst | 60 +- ...run-quickstart-examples-docker-compose.rst | 127 +- .../docker/set-environment-variables.rst | 6 +- .../tutorial-deploy-on-multiple-machines.rst | 218 ++- .../tutorial-quickstart-docker-compose.rst | 429 +++--- .../docker/tutorial-quickstart-docker.rst | 487 +++---- doc/source/docker/use-a-different-version.rst | 13 +- ...-pytorch-from-centralized-to-federated.rst | 64 +- ...-pytorch-from-centralized-to-federated.rst | 172 ++- .../explanation-differential-privacy.rst | 182 ++- .../explanation-federated-evaluation.rst | 69 +- .../explanation-flower-architecture.rst | 230 ++-- .../how-to-aggregate-evaluation-results.rst | 23 +- doc/source/how-to-authenticate-supernodes.rst | 95 +- doc/source/how-to-configure-clients.rst | 71 +- doc/source/how-to-configure-logging.rst | 86 +- doc/source/how-to-enable-ssl-connections.rst | 78 +- doc/source/how-to-implement-strategies.rst | 189 ++- doc/source/how-to-install-flower.rst | 75 +- doc/source/how-to-monitor-simulation.rst | 273 ++-- doc/source/how-to-run-simulations.rst | 261 ++-- ...how-to-save-and-load-model-checkpoints.rst | 57 +- doc/source/how-to-upgrade-to-flower-1.0.rst | 90 +- doc/source/how-to-upgrade-to-flower-next.rst | 137 +- doc/source/how-to-use-built-in-mods.rst | 34 +- .../how-to-use-differential-privacy.rst | 185 +-- doc/source/how-to-use-strategies.rst | 50 +- doc/source/index.rst | 229 ++-- doc/source/ref-api-cli.rst | 56 +- doc/source/ref-example-projects.rst | 46 +- doc/source/ref-faq.rst | 3 +- doc/source/tutorial-quickstart-android.rst | 6 +- doc/source/tutorial-quickstart-fastai.rst | 169 ++- .../tutorial-quickstart-huggingface.rst | 674 +++++---- doc/source/tutorial-quickstart-ios.rst | 196 +-- doc/source/tutorial-quickstart-jax.rst | 204 +-- doc/source/tutorial-quickstart-mlx.rst | 669 +++++---- doc/source/tutorial-quickstart-pandas.rst | 6 +- .../tutorial-quickstart-pytorch-lightning.rst | 183 ++- doc/source/tutorial-quickstart-pytorch.rst | 636 +++++---- .../tutorial-quickstart-scikitlearn.rst | 202 +-- doc/source/tutorial-quickstart-tensorflow.rst | 493 ++++--- doc/source/tutorial-quickstart-xgboost.rst | 1215 +++++++++-------- pyproject.toml | 9 + 63 files changed, 5678 insertions(+), 4929 deletions(-) diff --git a/dev/format.sh b/dev/format.sh index e1e2abc307f1..ada5a7f13abc 100755 --- a/dev/format.sh +++ b/dev/format.sh @@ -36,3 +36,6 @@ python -m nbstripout examples/*/*.ipynb --extra-keys "$KEYS" # Markdown python -m mdformat --number doc/source examples + +# RST +docstrfmt doc/source diff --git a/dev/test.sh b/dev/test.sh index 58ac0b3d24cd..170d9f4acd1e 100755 --- a/dev/test.sh +++ b/dev/test.sh @@ -56,6 +56,14 @@ echo "- mdformat: done" echo "- All Markdown checks passed" +echo "- Start rST checks" + +echo "- docstrfmt: start" +docstrfmt --check doc/source +echo "- docstrfmt: done" + +echo "- All rST checks passed" + echo "- Start license checks" echo "- copyright: start" diff --git a/doc/source/contributor-explanation-public-and-private-apis.rst b/doc/source/contributor-explanation-public-and-private-apis.rst index 1dfdf88f97d3..ac62ae341f14 100644 --- a/doc/source/contributor-explanation-public-and-private-apis.rst +++ b/doc/source/contributor-explanation-public-and-private-apis.rst @@ -1,22 +1,23 @@ Public and private APIs ======================= -In Python, everything is public. -To enable developers to understand which components can be relied upon, Flower declares a public API. -Components that are part of the public API can be relied upon. -Changes to the public API are announced in the release notes and are subject to deprecation policies. +In Python, everything is public. To enable developers to understand which components can +be relied upon, Flower declares a public API. Components that are part of the public API +can be relied upon. Changes to the public API are announced in the release notes and are +subject to deprecation policies. -Everything that is not part of the public API is part of the private API. -Even though Python allows accessing them, user code should never use those components. -Private APIs can change at any time, even in patch releases. +Everything that is not part of the public API is part of the private API. Even though +Python allows accessing them, user code should never use those components. Private APIs +can change at any time, even in patch releases. How can you determine whether a component is part of the public API or not? Easy: - `Use the Flower API reference documentation `_ - `Use the Flower CLI reference documentation `_ -Everything listed in the reference documentation is part of the public API. -This document explains how Flower maintainers define the public API and how you can determine whether a component is part of the public API or not by reading the Flower source code. +Everything listed in the reference documentation is part of the public API. This +document explains how Flower maintainers define the public API and how you can determine +whether a component is part of the public API or not by reading the Flower source code. Flower public API ----------------- @@ -25,94 +26,117 @@ Flower has a well-defined public API. Let's look at this in more detail. .. important:: - Every component that is reachable by recursively following ``__init__.__all__`` starting from the root package (``flwr``) is part of the public API. + Every component that is reachable by recursively following ``__init__.__all__`` + starting from the root package (``flwr``) is part of the public API. -If you want to determine whether a component (class/function/generator/...) is part of the public API or not, you need to start at the root of the ``flwr`` package. -Let's use ``tree -L 1 -d src/py/flwr`` to look at the Python sub-packages contained ``flwr``: +If you want to determine whether a component (class/function/generator/...) is part of +the public API or not, you need to start at the root of the ``flwr`` package. Let's use +``tree -L 1 -d src/py/flwr`` to look at the Python sub-packages contained ``flwr``: .. code-block:: bash - flwr - ├── cli - ├── client - ├── common - ├── proto - ├── server - └── simulation + flwr + ├── cli + ├── client + ├── common + ├── proto + ├── server + └── simulation -Contrast this with the definition of ``__all__`` in the root ``src/py/flwr/__init__.py``: +Contrast this with the definition of ``__all__`` in the root +``src/py/flwr/__init__.py``: .. code-block:: python - # From `flwr/__init__.py` - __all__ = [ - "client", - "common", - "server", - "simulation", - ] - -You can see that ``flwr`` has six subpackages (``cli``, ``client``, ``common``, ``proto``, ``server``, ``simulation``), but only four of them are "exported" via ``__all__`` (``client``, ``common``, ``server``, ``simulation``). - -What does this mean? It means that ``client``, ``common``, ``server`` and ``simulation`` are part of the public API, but ``cli`` and ``proto`` are not. -The ``flwr`` subpackages ``cli`` and ``proto`` are private APIs. -A private API can change completely from one release to the next (even in patch releases). -It can change in a breaking way, it can be renamed (for example, ``flwr.cli`` could be renamed to ``flwr.command``) and it can even be removed completely. + # From `flwr/__init__.py` + __all__ = [ + "client", + "common", + "server", + "simulation", + ] + +You can see that ``flwr`` has six subpackages (``cli``, ``client``, ``common``, +``proto``, ``server``, ``simulation``), but only four of them are "exported" via +``__all__`` (``client``, ``common``, ``server``, ``simulation``). + +What does this mean? It means that ``client``, ``common``, ``server`` and ``simulation`` +are part of the public API, but ``cli`` and ``proto`` are not. The ``flwr`` subpackages +``cli`` and ``proto`` are private APIs. A private API can change completely from one +release to the next (even in patch releases). It can change in a breaking way, it can be +renamed (for example, ``flwr.cli`` could be renamed to ``flwr.command``) and it can even +be removed completely. Therefore, as a Flower user: - ``from flwr import client`` ✅ Ok, you're importing a public API. - ``from flwr import proto`` ❌ Not recommended, you're importing a private API. -What about components that are nested deeper in the hierarchy? Let's look at Flower strategies to see another typical pattern. -Flower strategies like ``FedAvg`` are often imported using ``from flwr.server.strategy import FedAvg``. -Let's look at ``src/py/flwr/server/strategy/__init__.py``: +What about components that are nested deeper in the hierarchy? Let's look at Flower +strategies to see another typical pattern. Flower strategies like ``FedAvg`` are often +imported using ``from flwr.server.strategy import FedAvg``. Let's look at +``src/py/flwr/server/strategy/__init__.py``: .. code-block:: python - from .fedavg import FedAvg as FedAvg - # ... more imports + from .fedavg import FedAvg as FedAvg + + # ... more imports - __all__ = [ - "FedAvg", - # ... more exports - ] + __all__ = [ + "FedAvg", + # ... more exports + ] -What's notable here is that all strategies are implemented in dedicated modules (e.g., ``fedavg.py``). -In ``__init__.py``, we *import* the components we want to make part of the public API and then *export* them via ``__all__``. -Note that we export the component itself (for example, the ``FedAvg`` class), but not the module it is defined in (for example, ``fedavg.py``). -This allows us to move the definition of ``FedAvg`` into a different module (or even a module in a subpackage) without breaking the public API (as long as we update the import path in ``__init__.py``). +What's notable here is that all strategies are implemented in dedicated modules (e.g., +``fedavg.py``). In ``__init__.py``, we *import* the components we want to make part of +the public API and then *export* them via ``__all__``. Note that we export the component +itself (for example, the ``FedAvg`` class), but not the module it is defined in (for +example, ``fedavg.py``). This allows us to move the definition of ``FedAvg`` into a +different module (or even a module in a subpackage) without breaking the public API (as +long as we update the import path in ``__init__.py``). Therefore: -- ``from flwr.server.strategy import FedAvg`` ✅ Ok, you're importing a class that is part of the public API. -- ``from flwr.server.strategy import fedavg`` ❌ Not recommended, you're importing a private module. +- ``from flwr.server.strategy import FedAvg`` ✅ Ok, you're importing a class that is + part of the public API. +- ``from flwr.server.strategy import fedavg`` ❌ Not recommended, you're importing a + private module. -This approach is also implemented in the tooling that automatically builds API reference docs. +This approach is also implemented in the tooling that automatically builds API reference +docs. Flower public API of private packages ------------------------------------- -We also use this to define the public API of private subpackages. -Public, in this context, means the API that other ``flwr`` subpackages should use. -For example, ``flwr.server.driver`` is a private subpackage (it's not exported via ``src/py/flwr/server/__init__.py``'s ``__all__``). +We also use this to define the public API of private subpackages. Public, in this +context, means the API that other ``flwr`` subpackages should use. For example, +``flwr.server.driver`` is a private subpackage (it's not exported via +``src/py/flwr/server/__init__.py``'s ``__all__``). -Still, the private sub-package ``flwr.server.driver`` defines a "public" API using ``__all__`` in ``src/py/flwr/server/driver/__init__.py``: +Still, the private sub-package ``flwr.server.driver`` defines a "public" API using +``__all__`` in ``src/py/flwr/server/driver/__init__.py``: .. code-block:: python - from .driver import Driver - from .grpc_driver import GrpcDriver - from .inmemory_driver import InMemoryDriver - - __all__ = [ - "Driver", - "GrpcDriver", - "InMemoryDriver", - ] - -The interesting part is that both ``GrpcDriver`` and ``InMemoryDriver`` are never used by Flower framework users, only by other parts of the Flower framework codebase. -Those other parts of the codebase import, for example, ``InMemoryDriver`` using ``from flwr.server.driver import InMemoryDriver`` (i.e., the ``InMemoryDriver`` exported via ``__all__``), not ``from flwr.server.driver.in_memory_driver import InMemoryDriver`` (``in_memory_driver.py`` is the module containing the actual ``InMemoryDriver`` class definition). - -This is because ``flwr.server.driver`` defines a public interface for other ``flwr`` subpackages. -This allows codeowners of ``flwr.server.driver`` to refactor the package without breaking other ``flwr``-internal users. + from .driver import Driver + from .grpc_driver import GrpcDriver + from .inmemory_driver import InMemoryDriver + + __all__ = [ + "Driver", + "GrpcDriver", + "InMemoryDriver", + ] + +The interesting part is that both ``GrpcDriver`` and ``InMemoryDriver`` are never used +by Flower framework users, only by other parts of the Flower framework codebase. Those +other parts of the codebase import, for example, ``InMemoryDriver`` using ``from +flwr.server.driver import InMemoryDriver`` (i.e., the ``InMemoryDriver`` exported via +``__all__``), not ``from flwr.server.driver.in_memory_driver import InMemoryDriver`` +(``in_memory_driver.py`` is the module containing the actual ``InMemoryDriver`` class +definition). + +This is because ``flwr.server.driver`` defines a public interface for other ``flwr`` +subpackages. This allows codeowners of ``flwr.server.driver`` to refactor the package +without breaking other ``flwr``-internal users. diff --git a/doc/source/contributor-how-to-build-docker-images.rst b/doc/source/contributor-how-to-build-docker-images.rst index d6acad4afa03..0b3ce243ce50 100644 --- a/doc/source/contributor-how-to-build-docker-images.rst +++ b/doc/source/contributor-how-to-build-docker-images.rst @@ -2,153 +2,161 @@ How to Build Docker Flower Images Locally ========================================= Flower provides pre-made docker images on `Docker Hub `_ -that include all necessary dependencies for running the SuperLink, SuperNode or ServerApp. -You can also build your own custom docker images from scratch with a different version of Python -or Linux distribution (Ubuntu/Alpine) if that is what you need. In this guide, we will explain what -images exist and how to build them locally. +that include all necessary dependencies for running the SuperLink, SuperNode or +ServerApp. You can also build your own custom docker images from scratch with a +different version of Python or Linux distribution (Ubuntu/Alpine) if that is what you +need. In this guide, we will explain what images exist and how to build them locally. -Before we can start, we need to meet a few prerequisites in our local development environment. +Before we can start, we need to meet a few prerequisites in our local development +environment. -#. Clone the ``flower`` repository. +1. Clone the ``flower`` repository. - .. code-block:: bash + .. code-block:: bash - $ git clone --depth=1 https://github.com/adap/flower.git && cd flower + $ git clone --depth=1 https://github.com/adap/flower.git && cd flower -#. Verify the Docker daemon is running. +2. Verify the Docker daemon is running. - The build instructions that assemble the images are located in the respective Dockerfiles. You - can find them in the subdirectories of ``src/docker``. + The build instructions that assemble the images are located in the respective + Dockerfiles. You can find them in the subdirectories of ``src/docker``. - Flower Docker images are configured via build arguments. Through build arguments, we can make the - creation of images more flexible. For example, in the base image, we can specify the version of - Python to install using the ``PYTHON_VERSION`` build argument. Some of the build arguments have - default values, others must be specified when building the image. All available build arguments for - each image are listed in one of the tables below. + Flower Docker images are configured via build arguments. Through build arguments, we + can make the creation of images more flexible. For example, in the base image, we can + specify the version of Python to install using the ``PYTHON_VERSION`` build argument. + Some of the build arguments have default values, others must be specified when + building the image. All available build arguments for each image are listed in one of + the tables below. Building the Base Image ----------------------- .. list-table:: - :widths: 25 45 15 15 - :header-rows: 1 - - * - Build argument - - Description - - Required - - Example - * - ``DISTRO`` - - The Linux distribution to use as the base image. - - No - - ``ubuntu`` - * - ``DISTRO_VERSION`` - - Version of the Linux distribution. - - No - - :substitution-code:`|ubuntu_version|` - * - ``PYTHON_VERSION`` - - Version of ``python`` to be installed. - - No - - ``3.11`` or ``3.11.1`` - * - ``PIP_VERSION`` - - Version of ``pip`` to be installed. - - Yes - - :substitution-code:`|pip_version|` - * - ``SETUPTOOLS_VERSION`` - - Version of ``setuptools`` to be installed. - - Yes - - :substitution-code:`|setuptools_version|` - * - ``FLWR_VERSION`` - - Version of Flower to be installed. - - Yes - - :substitution-code:`|stable_flwr_version|` - * - ``FLWR_PACKAGE`` - - The Flower package to be installed. - - No - - ``flwr`` or ``flwr-nightly`` - * - ``FLWR_VERSION_REF`` - - A `direct reference `_ without the ``@`` specifier. If both ``FLWR_VERSION`` and ``FLWR_VERSION_REF`` are specified, the ``FLWR_VERSION_REF`` has precedence. - - No - - `Direct Reference Examples`_ - -The following example creates a base Ubuntu/Alpine image with Python ``3.11.0``, -pip :substitution-code:`|pip_version|`, setuptools :substitution-code:`|setuptools_version|` + :widths: 25 45 15 15 + :header-rows: 1 + + - - Build argument + - Description + - Required + - Example + - - ``DISTRO`` + - The Linux distribution to use as the base image. + - No + - ``ubuntu`` + - - ``DISTRO_VERSION`` + - Version of the Linux distribution. + - No + - :substitution-code:`|ubuntu_version|` + - - ``PYTHON_VERSION`` + - Version of ``python`` to be installed. + - No + - ``3.11`` or ``3.11.1`` + - - ``PIP_VERSION`` + - Version of ``pip`` to be installed. + - Yes + - :substitution-code:`|pip_version|` + - - ``SETUPTOOLS_VERSION`` + - Version of ``setuptools`` to be installed. + - Yes + - :substitution-code:`|setuptools_version|` + - - ``FLWR_VERSION`` + - Version of Flower to be installed. + - Yes + - :substitution-code:`|stable_flwr_version|` + - - ``FLWR_PACKAGE`` + - The Flower package to be installed. + - No + - ``flwr`` or ``flwr-nightly`` + - - ``FLWR_VERSION_REF`` + - A `direct reference + `_ + without the ``@`` specifier. If both ``FLWR_VERSION`` and ``FLWR_VERSION_REF`` + are specified, the ``FLWR_VERSION_REF`` has precedence. + - No + - `Direct Reference Examples`_ + +The following example creates a base Ubuntu/Alpine image with Python ``3.11.0``, pip +:substitution-code:`|pip_version|`, setuptools :substitution-code:`|setuptools_version|` and Flower :substitution-code:`|stable_flwr_version|`: .. code-block:: bash - :substitutions: + :substitutions: - $ cd src/docker/base/ - $ docker build \ - --build-arg PYTHON_VERSION=3.11.0 \ - --build-arg FLWR_VERSION=|stable_flwr_version| \ - --build-arg PIP_VERSION=|pip_version| \ - --build-arg SETUPTOOLS_VERSION=|setuptools_version| \ - -t flwr_base:0.1.0 . + $ cd src/docker/base/ + $ docker build \ + --build-arg PYTHON_VERSION=3.11.0 \ + --build-arg FLWR_VERSION=|stable_flwr_version| \ + --build-arg PIP_VERSION=|pip_version| \ + --build-arg SETUPTOOLS_VERSION=|setuptools_version| \ + -t flwr_base:0.1.0 . -In this example, we specify our image name as ``flwr_base`` and the tag as ``0.1.0``. Remember that the build arguments as well -as the name and tag can be adapted to your needs. These values serve as examples only. +In this example, we specify our image name as ``flwr_base`` and the tag as ``0.1.0``. +Remember that the build arguments as well as the name and tag can be adapted to your +needs. These values serve as examples only. Building a Flower Binary Image ------------------------------ .. list-table:: - :widths: 25 45 15 15 - :header-rows: 1 - - * - Build argument - - Description - - Required - - Example - * - ``BASE_REPOSITORY`` - - The repository name of the base image. - - No - - ``flwr/base`` - * - ``BASE_IMAGE`` - - The Tag of the Flower base image. - - Yes - - :substitution-code:`|stable_flwr_version|-py3.11-ubuntu|ubuntu_version|` - -For example, to build a SuperLink image with the latest Flower version, Python 3.11 and Ubuntu 22.04, run the following: + :widths: 25 45 15 15 + :header-rows: 1 + + - - Build argument + - Description + - Required + - Example + - - ``BASE_REPOSITORY`` + - The repository name of the base image. + - No + - ``flwr/base`` + - - ``BASE_IMAGE`` + - The Tag of the Flower base image. + - Yes + - :substitution-code:`|stable_flwr_version|-py3.11-ubuntu|ubuntu_version|` + +For example, to build a SuperLink image with the latest Flower version, Python 3.11 and +Ubuntu 22.04, run the following: .. code-block:: bash - :substitutions: + :substitutions: - $ cd src/docker/superlink - $ docker build \ - --build-arg BASE_IMAGE=|stable_flwr_version|-py3.11-ubuntu22.04 \ - -t flwr_superlink:0.1.0 . + $ cd src/docker/superlink + $ docker build \ + --build-arg BASE_IMAGE=|stable_flwr_version|-py3.11-ubuntu22.04 \ + -t flwr_superlink:0.1.0 . -If you want to use your own base image instead of the official Flower base image, all you need to do -is set the ``BASE_REPOSITORY`` build argument to ``flwr_base`` (as we've specified above). +If you want to use your own base image instead of the official Flower base image, all +you need to do is set the ``BASE_REPOSITORY`` build argument to ``flwr_base`` (as we've +specified above). .. code-block:: bash - $ cd src/docker/superlink/ - $ docker build \ - --build-arg BASE_REPOSITORY=flwr_base \ - --build-arg BASE_IMAGE=0.1.0 - -t flwr_superlink:0.1.0 . + $ cd src/docker/superlink/ + $ docker build \ + --build-arg BASE_REPOSITORY=flwr_base \ + --build-arg BASE_IMAGE=0.1.0 + -t flwr_superlink:0.1.0 . After creating the image, we can test whether the image is working: .. code-block:: bash - $ docker run --rm flwr_superlink:0.1.0 --help + $ docker run --rm flwr_superlink:0.1.0 --help Direct Reference Examples ------------------------- .. code-block:: bash - :substitutions: + :substitutions: - # main branch - git+https://github.com/adap/flower.git@main + # main branch + git+https://github.com/adap/flower.git@main - # commit hash - git+https://github.com/adap/flower.git@1187c707f1894924bfa693d99611cf6f93431835 + # commit hash + git+https://github.com/adap/flower.git@1187c707f1894924bfa693d99611cf6f93431835 - # tag - git+https://github.com/adap/flower.git@|stable_flwr_version| + # tag + git+https://github.com/adap/flower.git@|stable_flwr_version| - # artifact store - https://artifact.flower.ai/py/main/latest/flwr-|stable_flwr_version|-py3-none-any.whl + # artifact store + https://artifact.flower.ai/py/main/latest/flwr-|stable_flwr_version|-py3-none-any.whl diff --git a/doc/source/contributor-how-to-contribute-translations.rst b/doc/source/contributor-how-to-contribute-translations.rst index ba59901cf1c4..5fff62833b0e 100644 --- a/doc/source/contributor-how-to-contribute-translations.rst +++ b/doc/source/contributor-how-to-contribute-translations.rst @@ -2,70 +2,67 @@ Contribute translations ======================= Since `Flower 1.5 -`_ we -have introduced translations to our doc pages, but, as you might have noticed, -the translations are often imperfect. If you speak languages other than -English, you might be able to help us in our effort to make Federated Learning -accessible to as many people as possible by contributing to those translations! -This might also be a great opportunity for those wanting to become open source -contributors with little prerequisites. +`_ we have +introduced translations to our doc pages, but, as you might have noticed, the +translations are often imperfect. If you speak languages other than English, you might +be able to help us in our effort to make Federated Learning accessible to as many people +as possible by contributing to those translations! This might also be a great +opportunity for those wanting to become open source contributors with little +prerequisites. Our translation project is publicly available over on `Weblate -`_, this where most -of the work will happen. +`_, this where most of the +work will happen. Contribute to existing languages -------------------------------- .. youtube:: 10_Xfy5BOfQ - :width: 100% + :width: 100% -The first thing you will need to do in order to contribute is to create a -free Weblate account on this `page -`_. More information -about profile settings can be found `here +The first thing you will need to do in order to contribute is to create a free Weblate +account on this `page `_. More +information about profile settings can be found `here `_. -Once you are signed in to Weblate, you can navigate to the `Flower Framework -project `_. Here, -you should see the different existing languages that can be found on the -website. +Once you are signed in to Weblate, you can navigate to the `Flower Framework project +`_. Here, you should see the +different existing languages that can be found on the website. -Once you have selected the language you want to contribute to, you should see a -similar interface to this: +Once you have selected the language you want to contribute to, you should see a similar +interface to this: - .. image:: _static/weblate_status.png + .. image:: _static/weblate_status.png -The most straight forward option here is to click on the ``Translate`` button -on the top right (in the ``Translation status`` section). This will -automatically bring you to the translation interface for untranslated strings. +The most straight forward option here is to click on the ``Translate`` button on the top +right (in the ``Translation status`` section). This will automatically bring you to the +translation interface for untranslated strings. This is what the interface looks like: - .. image:: _static/weblate_interface.png + .. image:: _static/weblate_interface.png -You input your translation in the text box at the top and then, once you are -happy with it, you either press ``Save and continue`` (to save the translation -and go to the next untranslated string), ``Save and stay`` (to save the -translation and stay on the same page), ``Suggest`` (to add your translation to -suggestions for other users to view), or ``Skip`` (to go to the next -untranslated string without saving anything). +You input your translation in the text box at the top and then, once you are happy with +it, you either press ``Save and continue`` (to save the translation and go to the next +untranslated string), ``Save and stay`` (to save the translation and stay on the same +page), ``Suggest`` (to add your translation to suggestions for other users to view), or +``Skip`` (to go to the next untranslated string without saving anything). In order to help with the translations, you can see on the bottom the ``Nearby -strings``, the ``Comments`` (from other contributors), the ``Automatic -suggestions`` (from machine translation engines), the translations in ``Other -languages``, and the ``History`` of translations for this string. +strings``, the ``Comments`` (from other contributors), the ``Automatic suggestions`` +(from machine translation engines), the translations in ``Other languages``, and the +``History`` of translations for this string. -On the right, under the ``String information`` section, you can also click the -link under ``Source string location`` in order to view the source of the doc -file containing the string. +On the right, under the ``String information`` section, you can also click the link +under ``Source string location`` in order to view the source of the doc file containing +the string. -For more information about translating using Weblate, you can check out this -`in-depth guide `_. +For more information about translating using Weblate, you can check out this `in-depth +guide `_. Add new languages ----------------- -If you want to add a new language, you will first have to contact us, either on -`Slack `_, or by opening an issue on our `GitHub -repo `_. +If you want to add a new language, you will first have to contact us, either on `Slack +`_, or by opening an issue on our `GitHub repo +`_. diff --git a/doc/source/contributor-how-to-develop-in-vscode-dev-containers.rst b/doc/source/contributor-how-to-develop-in-vscode-dev-containers.rst index c861457b6edc..79f52f8d8f6f 100644 --- a/doc/source/contributor-how-to-develop-in-vscode-dev-containers.rst +++ b/doc/source/contributor-how-to-develop-in-vscode-dev-containers.rst @@ -1,24 +1,47 @@ Develop in VSCode Dev Containers ================================ -When working on the Flower framework we want to ensure that all contributors use the same developer environment to format code or run tests. For this purpose we are using the VSCode Remote Containers extension. What is it? Read the following quote: - - - The Visual Studio Code Remote - Containers extension lets you use a Docker container as a fully-featured development environment. It allows you to open any folder inside (or mounted into) a container and take advantage of Visual Studio Code's full feature set. A :code:`devcontainer.json` file in your project tells VS Code how to access (or create) a development container with a well-defined tool and runtime stack. This container can be used to run an application or to separate tools, libraries, or runtimes needed for working with a codebase. - - Workspace files are mounted from the local file system or copied or cloned into the container. Extensions are installed and run inside the container, where they have full access to the tools, platform, and file system. This means that you can seamlessly switch your entire development environment just by connecting to a different container. - -Source: `Official VSCode documentation `_ - +When working on the Flower framework we want to ensure that all contributors use the +same developer environment to format code or run tests. For this purpose we are using +the VSCode Remote Containers extension. What is it? Read the following quote: + + The Visual Studio Code Remote - Containers extension lets you use a Docker container + as a fully-featured development environment. It allows you to open any folder inside + (or mounted into) a container and take advantage of Visual Studio Code's full + feature set. A ``devcontainer.json`` file in your project tells VS Code how to + access (or create) a development container with a well-defined tool and runtime + stack. This container can be used to run an application or to separate tools, + libraries, or runtimes needed for working with a codebase. + + Workspace files are mounted from the local file system or copied or cloned into the + container. Extensions are installed and run inside the container, where they have + full access to the tools, platform, and file system. This means that you can + seamlessly switch your entire development environment just by connecting to a + different container. + +Source: `Official VSCode documentation +`_ Getting started --------------- -Configuring and setting up the :code:`Dockerfile` as well the configuration for the devcontainer can be a bit more involved. The good thing is you don't have to do it. Usually it should be enough to install `Docker `_ on your system and ensure its available on your command line. Additionally, install the `VSCode Containers Extension `_. - -Now you should be good to go. When starting VSCode, it will ask you to run in the container environment and - if you confirm - automatically build the container and use it. To manually instruct VSCode to use the devcontainer, you can, after installing the extension, click the green area in the bottom left corner of your VSCode window and select the option *(Re)Open Folder in Container*. - -In some cases your setup might be more involved. For those cases consult the following sources: - -* `Developing inside a Container `_ -* `Remote development in Containers `_ +Configuring and setting up the ``Dockerfile`` as well the configuration for the +devcontainer can be a bit more involved. The good thing is you don't have to do it. +Usually it should be enough to install `Docker +`_ on your system and ensure its available on +your command line. Additionally, install the `VSCode Containers Extension +`_. + +Now you should be good to go. When starting VSCode, it will ask you to run in the +container environment and - if you confirm - automatically build the container and use +it. To manually instruct VSCode to use the devcontainer, you can, after installing the +extension, click the green area in the bottom left corner of your VSCode window and +select the option *(Re)Open Folder in Container*. + +In some cases your setup might be more involved. For those cases consult the following +sources: + +- `Developing inside a Container + `_ +- `Remote development in Containers + `_ diff --git a/doc/source/contributor-how-to-install-development-versions.rst b/doc/source/contributor-how-to-install-development-versions.rst index 0f0773c85e73..61c123a24309 100644 --- a/doc/source/contributor-how-to-install-development-versions.rst +++ b/doc/source/contributor-how-to-install-development-versions.rst @@ -7,10 +7,13 @@ Install development versions of Flower Using Poetry (recommended) ~~~~~~~~~~~~~~~~~~~~~~~~~~ -Install a ``flwr`` pre-release from PyPI: update the ``flwr`` dependency in ``pyproject.toml`` and then reinstall (don't forget to delete ``poetry.lock`` (``rm poetry.lock``) before running ``poetry install``). +Install a ``flwr`` pre-release from PyPI: update the ``flwr`` dependency in +``pyproject.toml`` and then reinstall (don't forget to delete ``poetry.lock`` (``rm +poetry.lock``) before running ``poetry install``). - ``flwr = { version = "1.0.0a0", allow-prereleases = true }`` (without extras) -- ``flwr = { version = "1.0.0a0", allow-prereleases = true, extras = ["simulation"] }`` (with extras) +- ``flwr = { version = "1.0.0a0", allow-prereleases = true, extras = ["simulation"] }`` + (with extras) Install ``flwr`` from a local copy of the Flower source code via ``pyproject.toml``: @@ -20,9 +23,11 @@ Install ``flwr`` from a local copy of the Flower source code via ``pyproject.tom Install ``flwr`` from a local wheel file via ``pyproject.toml``: - ``flwr = { path = "../../dist/flwr-1.8.0-py3-none-any.whl" }`` (without extras) -- ``flwr = { path = "../../dist/flwr-1.8.0-py3-none-any.whl", extras = ["simulation"] }`` (with extras) +- ``flwr = { path = "../../dist/flwr-1.8.0-py3-none-any.whl", extras = ["simulation"] + }`` (with extras) -Please refer to the Poetry documentation for further details: `Poetry Dependency Specification `_ +Please refer to the Poetry documentation for further details: `Poetry Dependency +Specification `_ Using pip (recommended on Colab) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -32,18 +37,21 @@ Install a ``flwr`` pre-release from PyPI: - ``pip install -U --pre flwr`` (without extras) - ``pip install -U --pre 'flwr[simulation]'`` (with extras) -Python packages can be installed from git repositories. Use one of the following commands to install the Flower directly from GitHub. +Python packages can be installed from git repositories. Use one of the following +commands to install the Flower directly from GitHub. Install ``flwr`` from the default GitHub branch (``main``): - ``pip install flwr@git+https://github.com/adap/flower.git`` (without extras) -- ``pip install 'flwr[simulation]@git+https://github.com/adap/flower.git'`` (with extras) +- ``pip install 'flwr[simulation]@git+https://github.com/adap/flower.git'`` (with + extras) Install ``flwr`` from a specific GitHub branch (``branch-name``): -- ``pip install flwr@git+https://github.com/adap/flower.git@branch-name`` (without extras) -- ``pip install 'flwr[simulation]@git+https://github.com/adap/flower.git@branch-name'`` (with extras) - +- ``pip install flwr@git+https://github.com/adap/flower.git@branch-name`` (without + extras) +- ``pip install 'flwr[simulation]@git+https://github.com/adap/flower.git@branch-name'`` + (with extras) Open Jupyter Notebooks on Google Colab -------------------------------------- @@ -52,12 +60,15 @@ Open the notebook ``doc/source/tutorial-series-get-started-with-flower-pytorch.i - https://colab.research.google.com/github/adap/flower/blob/main/doc/source/tutorial-series-get-started-with-flower-pytorch.ipynb -Open a development version of the same notebook from branch `branch-name` by changing ``main`` to ``branch-name`` (right after ``blob``): +Open a development version of the same notebook from branch `branch-name` by changing +``main`` to ``branch-name`` (right after ``blob``): - https://colab.research.google.com/github/adap/flower/blob/branch-name/doc/source/tutorial-series-get-started-with-flower-pytorch.ipynb Install a `whl` on Google Colab: -1. In the vertical icon grid on the left hand side, select ``Files`` > ``Upload to session storage`` +1. In the vertical icon grid on the left hand side, select ``Files`` > ``Upload to + session storage`` 2. Upload the whl (e.g., ``flwr-1.8.0-py3-none-any.whl``) -3. Change ``!pip install -q 'flwr[simulation]' torch torchvision matplotlib`` to ``!pip install -q 'flwr-1.8.0-py3-none-any.whl[simulation]' torch torchvision matplotlib`` +3. Change ``!pip install -q 'flwr[simulation]' torch torchvision matplotlib`` to ``!pip + install -q 'flwr-1.8.0-py3-none-any.whl[simulation]' torch torchvision matplotlib`` diff --git a/doc/source/contributor-how-to-release-flower.rst b/doc/source/contributor-how-to-release-flower.rst index 4853d87bc4c1..fafc02cab64c 100644 --- a/doc/source/contributor-how-to-release-flower.rst +++ b/doc/source/contributor-how-to-release-flower.rst @@ -1,16 +1,28 @@ Release Flower ============== -This document describes the current release process. It may or may not change in the future. +This document describes the current release process. It may or may not change in the +future. During the release ------------------ -The version number of a release is stated in ``pyproject.toml``. To release a new version of Flower, the following things need to happen (in that order): - -1. Run ``python3 src/py/flwr_tool/update_changelog.py `` in order to add every new change to the changelog (feel free to make manual changes to the changelog afterwards until it looks good). -2. Once the changelog has been updated with all the changes, run ``./dev/prepare-release-changelog.sh v``, where ```` is the version stated in ``pyproject.toml`` (notice the ``v`` added before it). This will replace the ``Unreleased`` header of the changelog by the version and current date, and it will add a thanking message for the contributors. Open a pull request with those changes. -3. Once the pull request is merged, tag the release commit with the version number as soon as the PR is merged: ``git tag v`` (notice the ``v`` added before the version number), then ``git push --tags``. This will create a draft release on GitHub containing the correct artifacts and the relevant part of the changelog. +The version number of a release is stated in ``pyproject.toml``. To release a new +version of Flower, the following things need to happen (in that order): + +1. Run ``python3 src/py/flwr_tool/update_changelog.py `` in order to add + every new change to the changelog (feel free to make manual changes to the changelog + afterwards until it looks good). +2. Once the changelog has been updated with all the changes, run + ``./dev/prepare-release-changelog.sh v``, where ```` is the + version stated in ``pyproject.toml`` (notice the ``v`` added before it). This will + replace the ``Unreleased`` header of the changelog by the version and current date, + and it will add a thanking message for the contributors. Open a pull request with + those changes. +3. Once the pull request is merged, tag the release commit with the version number as + soon as the PR is merged: ``git tag v`` (notice the ``v`` added before + the version number), then ``git push --tags``. This will create a draft release on + GitHub containing the correct artifacts and the relevant part of the changelog. 4. Check the draft release on GitHub, and if everything is good, publish it. After the release @@ -22,7 +34,8 @@ Create a pull request which contains the following changes: 2. Update all files which contain the current version number if necessary. 3. Add a new ``Unreleased`` section in ``changelog.md``. -Merge the pull request on the same day (i.e., before a new nightly release gets published to PyPI). +Merge the pull request on the same day (i.e., before a new nightly release gets +published to PyPI). Publishing a pre-release ------------------------ @@ -30,7 +43,8 @@ Publishing a pre-release Pre-release naming ~~~~~~~~~~~~~~~~~~ -PyPI supports pre-releases (alpha, beta, release candidate). Pre-releases MUST use one of the following naming patterns: +PyPI supports pre-releases (alpha, beta, release candidate). Pre-releases MUST use one +of the following naming patterns: - Alpha: ``MAJOR.MINOR.PATCHaN`` - Beta: ``MAJOR.MINOR.PATCHbN`` @@ -43,19 +57,25 @@ Examples include: - ``1.0.0rc0`` - ``1.0.0rc1`` -This is in line with PEP-440 and the recommendations from the Python Packaging -Authority (PyPA): +This is in line with PEP-440 and the recommendations from the Python Packaging Authority +(PyPA): - `PEP-440 `_ -- `PyPA Choosing a versioning scheme `_ +- `PyPA Choosing a versioning scheme + `_ -Note that the approach defined by PyPA is not compatible with SemVer 2.0.0 spec, for details consult the `Semantic Versioning Specification `_ (specifically item 11 on precedence). +Note that the approach defined by PyPA is not compatible with SemVer 2.0.0 spec, for +details consult the `Semantic Versioning Specification +`_ (specifically item 11 on +precedence). Pre-release classification ~~~~~~~~~~~~~~~~~~~~~~~~~~ Should the next pre-release be called alpha, beta, or release candidate? -- RC: feature complete, no known issues (apart from issues that are classified as "won't fix" for the next stable release) - if no issues surface this will become the next stable release +- RC: feature complete, no known issues (apart from issues that are classified as "won't + fix" for the next stable release) - if no issues surface this will become the next + stable release - Beta: feature complete, allowed to have known issues - Alpha: not feature complete, allowed to have known issues diff --git a/doc/source/contributor-how-to-set-up-a-virtual-env.rst b/doc/source/contributor-how-to-set-up-a-virtual-env.rst index a844298fdca9..7e54ed64c9c9 100644 --- a/doc/source/contributor-how-to-set-up-a-virtual-env.rst +++ b/doc/source/contributor-how-to-set-up-a-virtual-env.rst @@ -1,26 +1,33 @@ Set up a virtual env ==================== -It is recommended to run your Python setup within a virtual environment. -This guide shows three different examples how to create a virtual environment with pyenv virtualenv, poetry, or Anaconda. -You can follow the instructions or choose your preferred setup. +It is recommended to run your Python setup within a virtual environment. This guide +shows three different examples how to create a virtual environment with pyenv +virtualenv, poetry, or Anaconda. You can follow the instructions or choose your +preferred setup. Python Version -------------- -Flower requires at least `Python 3.9 `_, but `Python 3.10 `_ or above is recommended. +Flower requires at least `Python 3.9 `_, but `Python 3.10 +`_ or above is recommended. .. note:: - Due to a known incompatibility with `ray `_, - we currently recommend utilizing at most `Python 3.11 `_ for - running Flower simulations. + + Due to a known incompatibility with `ray `_, we + currently recommend utilizing at most `Python 3.11 `_ + for running Flower simulations. Virtualenv with Pyenv/Virtualenv -------------------------------- -One of the recommended virtual environment is `pyenv `_/`virtualenv `_. Please see `Flower examples `_ for details. +One of the recommended virtual environment is `pyenv +`_/`virtualenv +`_. Please see `Flower examples +`_ for details. -Once Pyenv is set up, you can use it to install `Python Version 3.10 `_ or above: +Once Pyenv is set up, you can use it to install `Python Version 3.10 +`_ or above: .. code-block:: shell @@ -32,34 +39,35 @@ Create the virtualenv with: pyenv virtualenv 3.10.12 flower-3.10.12 - Activate the virtualenv by running the following command: .. code-block:: shell echo flower-3.10.12 > .python-version - Virtualenv with Poetry ---------------------- -The Flower examples are based on `Poetry `_ to manage dependencies. After installing Poetry you simply create a virtual environment with: +The Flower examples are based on `Poetry `_ to manage +dependencies. After installing Poetry you simply create a virtual environment with: .. code-block:: shell poetry shell -If you open a new terminal you can activate the previously created virtual environment with the following command: +If you open a new terminal you can activate the previously created virtual environment +with the following command: .. code-block:: shell source $(poetry env info --path)/bin/activate - Virtualenv with Anaconda ------------------------ -If you prefer to use Anaconda for your virtual environment then install and setup the `conda `_ package. After setting it up you can create a virtual environment with: +If you prefer to use Anaconda for your virtual environment then install and setup the +`conda `_ +package. After setting it up you can create a virtual environment with: .. code-block:: shell @@ -71,8 +79,8 @@ and activate the virtual environment with: conda activate flower-3.10.12 - And then? --------- -As soon as you created your virtual environment you clone one of the `Flower examples `_. +As soon as you created your virtual environment you clone one of the `Flower examples +`_. diff --git a/doc/source/contributor-how-to-write-documentation.rst b/doc/source/contributor-how-to-write-documentation.rst index fcd8c5bb18c6..6209530b71e0 100644 --- a/doc/source/contributor-how-to-write-documentation.rst +++ b/doc/source/contributor-how-to-write-documentation.rst @@ -1,14 +1,15 @@ Write documentation =================== - Project layout -------------- -The Flower documentation lives in the ``doc`` directory. The Sphinx-based documentation system supports both reStructuredText (``.rst`` files) and Markdown (``.md`` files). - -Note that, in order to build the documentation locally (with ``poetry run make html``, like described below), `Pandoc `_ needs to be installed on the system. +The Flower documentation lives in the ``doc`` directory. The Sphinx-based documentation +system supports both reStructuredText (``.rst`` files) and Markdown (``.md`` files). +Note that, in order to build the documentation locally (with ``poetry run make html``, +like described below), `Pandoc `_ needs to be +installed on the system. Edit an existing page --------------------- @@ -17,7 +18,6 @@ Edit an existing page 2. Compile the docs: ``cd doc``, then ``poetry run make html`` 3. Open ``doc/build/html/index.html`` in the browser to check the result - Create a new page ----------------- diff --git a/doc/source/contributor-ref-good-first-contributions.rst b/doc/source/contributor-ref-good-first-contributions.rst index 2b8ce88413f5..a715e006f905 100644 --- a/doc/source/contributor-ref-good-first-contributions.rst +++ b/doc/source/contributor-ref-good-first-contributions.rst @@ -1,41 +1,41 @@ Good first contributions ======================== -We welcome contributions to Flower! However, it is not always easy to know -where to start. We therefore put together a few recommendations on where to -start to increase your chances of getting your PR accepted into the Flower -codebase. - +We welcome contributions to Flower! However, it is not always easy to know where to +start. We therefore put together a few recommendations on where to start to increase +your chances of getting your PR accepted into the Flower codebase. Where to start -------------- -Until the Flower core library matures it will be easier to get PR's accepted if -they only touch non-core areas of the codebase. Good candidates to get started -are: +Until the Flower core library matures it will be easier to get PR's accepted if they +only touch non-core areas of the codebase. Good candidates to get started are: - Documentation: What's missing? What could be expressed more clearly? - Baselines: See below. - Examples: See below. - Request for Flower Baselines ---------------------------- -If you are not familiar with Flower Baselines, you should probably check-out our `contributing guide for baselines `_. +If you are not familiar with Flower Baselines, you should probably check-out our +`contributing guide for baselines +`_. -You should then check out the open -`issues `_ for baseline requests. -If you find a baseline that you'd like to work on and that has no assignees, feel free to assign it to yourself and start working on it! +You should then check out the open `issues +`_ +for baseline requests. If you find a baseline that you'd like to work on and that has no +assignees, feel free to assign it to yourself and start working on it! -Otherwise, if you don't find a baseline you'd like to work on, be sure to open a new issue with the baseline request template! +Otherwise, if you don't find a baseline you'd like to work on, be sure to open a new +issue with the baseline request template! Request for examples -------------------- -We wish we had more time to write usage examples because we believe they help -users to get started with building what they want to build. Here are a few -ideas where we'd be happy to accept a PR: +We wish we had more time to write usage examples because we believe they help users to +get started with building what they want to build. Here are a few ideas where we'd be +happy to accept a PR: - Llama 2 fine-tuning, with Hugging Face Transformers and PyTorch - XGBoost diff --git a/doc/source/contributor-ref-secure-aggregation-protocols.rst b/doc/source/contributor-ref-secure-aggregation-protocols.rst index 7107d04b8cd0..347cb2724424 100644 --- a/doc/source/contributor-ref-secure-aggregation-protocols.rst +++ b/doc/source/contributor-ref-secure-aggregation-protocols.rst @@ -1,13 +1,16 @@ Secure Aggregation Protocols ============================ -Include SecAgg, SecAgg+, and LightSecAgg protocol. The LightSecAgg protocol has not been implemented yet, so its diagram and abstraction may not be accurate in practice. -The SecAgg protocol can be considered as a special case of the SecAgg+ protocol. +Include SecAgg, SecAgg+, and LightSecAgg protocol. The LightSecAgg protocol has not been +implemented yet, so its diagram and abstraction may not be accurate in practice. The +SecAgg protocol can be considered as a special case of the SecAgg+ protocol. -The :code:`SecAgg+` abstraction -------------------------------- +The ``SecAgg+`` abstraction +--------------------------- -In this implementation, each client will be assigned with a unique index (int) for secure aggregation, and thus many python dictionaries used have keys of int type rather than ClientProxy type. +In this implementation, each client will be assigned with a unique index (int) for +secure aggregation, and thus many python dictionaries used have keys of int type rather +than ClientProxy type. .. code-block:: python @@ -15,9 +18,7 @@ In this implementation, each client will be assigned with a unique index (int) f """Abstract base class for the SecAgg+ protocol implementations.""" @abstractmethod - def generate_graph( - self, clients: List[ClientProxy], k: int - ) -> ClientGraph: + def generate_graph(self, clients: List[ClientProxy], k: int) -> ClientGraph: """Build a k-degree undirected graph of clients. Each client will only generate pair-wise masks with its k neighbours. k is equal to the number of clients in SecAgg, i.e., a complete graph. @@ -31,16 +32,16 @@ In this implementation, each client will be assigned with a unique index (int) f @abstractmethod def ask_keys( - self, - clients: List[ClientProxy], ask_keys_ins_list: List[AskKeysIns] + self, clients: List[ClientProxy], ask_keys_ins_list: List[AskKeysIns] ) -> AskKeysResultsAndFailures: """Ask public keys. (AskKeysIns is an empty class, and hence ask_keys_ins_list can be omitted.)""" @abstractmethod def share_keys( self, - clients: List[ClientProxy], public_keys_dict: Dict[int, AskKeysRes], - graph: ClientGraph + clients: List[ClientProxy], + public_keys_dict: Dict[int, AskKeysRes], + graph: ClientGraph, ) -> ShareKeysResultsAndFailures: """Send public keys.""" @@ -48,17 +49,18 @@ In this implementation, each client will be assigned with a unique index (int) f def ask_vectors( clients: List[ClientProxy], forward_packet_list_dict: Dict[int, List[ShareKeysPacket]], - client_instructions=None: Dict[int, FitIns] + client_instructions: Dict[int, FitIns] = None, ) -> AskVectorsResultsAndFailures: """Ask vectors of local model parameters. (If client_instructions is not None, local models will be trained in the ask vectors stage, - rather than trained parallelly as the protocol goes through the previous stages.)""" + rather than trained parallelly as the protocol goes through the previous stages.) + """ @abstractmethod def unmask_vectors( clients: List[ClientProxy], dropout_clients: List[ClientProxy], - graph: ClientGraph + graph: ClientGraph, ) -> UnmaskVectorsResultsAndFailures: """Unmask and compute the aggregated model. UnmaskVectorRes contains shares of keys needed to generate masks.""" @@ -155,10 +157,12 @@ The Flower server will execute and process received results in the following ord deactivate P end -The :code:`LightSecAgg` abstraction ------------------------------------ +The ``LightSecAgg`` abstraction +------------------------------- -In this implementation, each client will be assigned with a unique index (int) for secure aggregation, and thus many python dictionaries used have keys of int type rather than ClientProxy type. +In this implementation, each client will be assigned with a unique index (int) for +secure aggregation, and thus many python dictionaries used have keys of int type rather +than ClientProxy type. .. code-block:: python @@ -174,7 +178,8 @@ In this implementation, each client will be assigned with a unique index (int) f @abstractmethod def ask_encrypted_encoded_masks( self, - clients: List[ClientProxy], public_keys_dict: Dict[int, LightSecAggSetupConfigRes] + clients: List[ClientProxy], + public_keys_dict: Dict[int, LightSecAggSetupConfigRes], ) -> AskEncryptedEncodedMasksResultsAndFailures: """Ask encrypted encoded masks. The protocol adopts Diffie-Hellman keys to build pair-wise secured channels to transfer encoded mask.""" @@ -183,15 +188,16 @@ In this implementation, each client will be assigned with a unique index (int) f self, clients: List[ClientProxy], forward_packet_list_dict: Dict[int, List[EncryptedEncodedMasksPacket]], - client_instructions=None: Dict[int, FitIns] + client_instructions: Dict[int, FitIns] = None, ) -> AskMaskedModelsResultsAndFailures: """Ask the masked local models. (If client_instructions is not None, local models will be trained in the ask vectors stage, - rather than trained parallelly as the protocol goes through the previous stages.)""" + rather than trained parallelly as the protocol goes through the previous stages.) + """ @abstractmethod def ask_aggregated_encoded_masks( - clients: List[ClientProxy] + clients: List[ClientProxy], ) -> AskAggregatedEncodedMasksResultsAndFailures: """Ask aggregated encoded masks""" @@ -272,158 +278,157 @@ Types .. code-block:: python - # the SecAgg+ protocol + # the SecAgg+ protocol + + ClientGraph = Dict[int, List[int]] - ClientGraph = Dict[int, List[int]] + SetupConfigResultsAndFailures = Tuple[ + List[Tuple[ClientProxy, SetupConfigRes]], List[BaseException] + ] - SetupConfigResultsAndFailures = Tuple[ - List[Tuple[ClientProxy, SetupConfigRes]], List[BaseException] - ] + AskKeysResultsAndFailures = Tuple[ + List[Tuple[ClientProxy, AskKeysRes]], List[BaseException] + ] - AskKeysResultsAndFailures = Tuple[ - List[Tuple[ClientProxy, AskKeysRes]], List[BaseException] - ] + ShareKeysResultsAndFailures = Tuple[ + List[Tuple[ClientProxy, ShareKeysRes]], List[BaseException] + ] - ShareKeysResultsAndFailures = Tuple[ - List[Tuple[ClientProxy, ShareKeysRes]], List[BaseException] - ] + AskVectorsResultsAndFailures = Tuple[ + List[Tuple[ClientProxy, AskVectorsRes]], List[BaseException] + ] - AskVectorsResultsAndFailures = Tuple[ - List[Tuple[ClientProxy, AskVectorsRes]], List[BaseException] - ] + UnmaskVectorsResultsAndFailures = Tuple[ + List[Tuple[ClientProxy, UnmaskVectorsRes]], List[BaseException] + ] - UnmaskVectorsResultsAndFailures = Tuple[ - List[Tuple[ClientProxy, UnmaskVectorsRes]], List[BaseException] - ] + FitResultsAndFailures = Tuple[List[Tuple[ClientProxy, FitRes]], List[BaseException]] - FitResultsAndFailures = Tuple[ - List[Tuple[ClientProxy, FitRes]], List[BaseException] - ] + @dataclass + class SetupConfigIns: + sec_agg_cfg_dict: Dict[str, Scalar] - @dataclass - class SetupConfigIns: - sec_agg_cfg_dict: Dict[str, Scalar] + @dataclass + class SetupConfigRes: + pass - @dataclass - class SetupConfigRes: - pass + @dataclass + class AskKeysIns: + pass - @dataclass - class AskKeysIns: - pass + @dataclass + class AskKeysRes: + """Ask Keys Stage Response from client to server""" - @dataclass - class AskKeysRes: - """Ask Keys Stage Response from client to server""" - pk1: bytes - pk2: bytes + pk1: bytes + pk2: bytes - @dataclass - class ShareKeysIns: - public_keys_dict: Dict[int, AskKeysRes] + @dataclass + class ShareKeysIns: + public_keys_dict: Dict[int, AskKeysRes] - @dataclass - class ShareKeysPacket: - source: int - destination: int - ciphertext: bytes + @dataclass + class ShareKeysPacket: + source: int + destination: int + ciphertext: bytes - @dataclass - class ShareKeysRes: - share_keys_res_list: List[ShareKeysPacket] + @dataclass + class ShareKeysRes: + share_keys_res_list: List[ShareKeysPacket] - @dataclass - class AskVectorsIns: - ask_vectors_in_list: List[ShareKeysPacket] - fit_ins: FitIns + @dataclass + class AskVectorsIns: + ask_vectors_in_list: List[ShareKeysPacket] + fit_ins: FitIns - @dataclass - class AskVectorsRes: - parameters: Parameters + @dataclass + class AskVectorsRes: + parameters: Parameters - @dataclass - class UnmaskVectorsIns: - available_clients: List[int] - dropout_clients: List[int] + @dataclass + class UnmaskVectorsIns: + available_clients: List[int] + dropout_clients: List[int] - @dataclass - class UnmaskVectorsRes: - share_dict: Dict[int, bytes] + @dataclass + class UnmaskVectorsRes: + share_dict: Dict[int, bytes] - # the LightSecAgg protocol + # the LightSecAgg protocol - LightSecAggSetupConfigResultsAndFailures = Tuple[ - List[Tuple[ClientProxy, LightSecAggSetupConfigRes]], List[BaseException] - ] + LightSecAggSetupConfigResultsAndFailures = Tuple[ + List[Tuple[ClientProxy, LightSecAggSetupConfigRes]], List[BaseException] + ] - AskEncryptedEncodedMasksResultsAndFailures = Tuple[ - List[Tuple[ClientProxy, AskEncryptedEncodedMasksRes]], List[BaseException] - ] + AskEncryptedEncodedMasksResultsAndFailures = Tuple[ + List[Tuple[ClientProxy, AskEncryptedEncodedMasksRes]], List[BaseException] + ] - AskMaskedModelsResultsAndFailures = Tuple[ - List[Tuple[ClientProxy, AskMaskedModelsRes]], List[BaseException] - ] + AskMaskedModelsResultsAndFailures = Tuple[ + List[Tuple[ClientProxy, AskMaskedModelsRes]], List[BaseException] + ] - AskAggregatedEncodedMasksResultsAndFailures = Tuple[ - List[Tuple[ClientProxy, AskAggregatedEncodedMasksRes]], List[BaseException] - ] + AskAggregatedEncodedMasksResultsAndFailures = Tuple[ + List[Tuple[ClientProxy, AskAggregatedEncodedMasksRes]], List[BaseException] + ] - @dataclass - class LightSecAggSetupConfigIns: - sec_agg_cfg_dict: Dict[str, Scalar] + @dataclass + class LightSecAggSetupConfigIns: + sec_agg_cfg_dict: Dict[str, Scalar] - @dataclass - class LightSecAggSetupConfigRes: - pk: bytes + @dataclass + class LightSecAggSetupConfigRes: + pk: bytes - @dataclass - class AskEncryptedEncodedMasksIns: - public_keys_dict: Dict[int, LightSecAggSetupConfigRes] + @dataclass + class AskEncryptedEncodedMasksIns: + public_keys_dict: Dict[int, LightSecAggSetupConfigRes] - @dataclass - class EncryptedEncodedMasksPacket: - source: int - destination: int - ciphertext: bytes + @dataclass + class EncryptedEncodedMasksPacket: + source: int + destination: int + ciphertext: bytes - @dataclass - class AskEncryptedEncodedMasksRes: - packet_list: List[EncryptedEncodedMasksPacket] + @dataclass + class AskEncryptedEncodedMasksRes: + packet_list: List[EncryptedEncodedMasksPacket] - @dataclass - class AskMaskedModelsIns: - packet_list: List[EncryptedEncodedMasksPacket] - fit_ins: FitIns + @dataclass + class AskMaskedModelsIns: + packet_list: List[EncryptedEncodedMasksPacket] + fit_ins: FitIns - @dataclass - class AskMaskedModelsRes: - parameters: Parameters + @dataclass + class AskMaskedModelsRes: + parameters: Parameters - @dataclass - class AskAggregatedEncodedMasksIns: - surviving_clients: List[int] + @dataclass + class AskAggregatedEncodedMasksIns: + surviving_clients: List[int] - @dataclass - class AskAggregatedEncodedMasksRes: - aggregated_encoded_mask: Parameters + @dataclass + class AskAggregatedEncodedMasksRes: + aggregated_encoded_mask: Parameters diff --git a/doc/source/contributor-tutorial-contribute-on-github.rst b/doc/source/contributor-tutorial-contribute-on-github.rst index 6970e7e8a580..22c6c6ef86b0 100644 --- a/doc/source/contributor-tutorial-contribute-on-github.rst +++ b/doc/source/contributor-tutorial-contribute-on-github.rst @@ -1,100 +1,113 @@ Contribute on GitHub ==================== -This guide is for people who want to get involved with Flower, but who are not used to contributing to GitHub projects. - -If you're familiar with how contributing on GitHub works, you can directly checkout our :doc:`getting started guide for contributors `. +This guide is for people who want to get involved with Flower, but who are not used to +contributing to GitHub projects. +If you're familiar with how contributing on GitHub works, you can directly checkout our +:doc:`getting started guide for contributors +`. Setting up the repository ------------------------- 1. **Create a GitHub account and setup Git** - Git is a distributed version control tool. This allows for an entire codebase's history to be stored and every developer's machine. - It is a software that will need to be installed on your local machine, you can follow this `guide `_ to set it up. - - GitHub, itself, is a code hosting platform for version control and collaboration. It allows for everyone to collaborate and work from anywhere on remote repositories. - - If you haven't already, you will need to create an account on `GitHub `_. - - The idea behind the generic Git and GitHub workflow boils down to this: - you download code from a remote repository on GitHub, make changes locally and keep track of them using Git and then you upload your new history back to GitHub. - + Git is a distributed version control tool. This allows for an entire codebase's + history to be stored and every developer's machine. It is a software that will + need to be installed on your local machine, you can follow this `guide + `_ to + set it up. + + GitHub, itself, is a code hosting platform for version control and collaboration. + It allows for everyone to collaborate and work from anywhere on remote + repositories. + + If you haven't already, you will need to create an account on `GitHub + `_. + + The idea behind the generic Git and GitHub workflow boils down to this: you + download code from a remote repository on GitHub, make changes locally and keep + track of them using Git and then you upload your new history back to GitHub. 2. **Forking the Flower repository** - A fork is a personal copy of a GitHub repository. To create one for Flower, you must navigate to ``_ (while connected to your GitHub account) - and click the ``Fork`` button situated on the top right of the page. - - .. image:: _static/fork_button.png + A fork is a personal copy of a GitHub repository. To create one for Flower, you + must navigate to https://github.com/adap/flower (while connected to your GitHub + account) and click the ``Fork`` button situated on the top right of the page. - You can change the name if you want, but this is not necessary as this version of Flower will be yours and will sit inside your own account (i.e., in your own list of repositories). - Once created, you should see on the top left corner that you are looking at your own version of Flower. + .. image:: _static/fork_button.png - .. image:: _static/fork_link.png + You can change the name if you want, but this is not necessary as this version of + Flower will be yours and will sit inside your own account (i.e., in your own list + of repositories). Once created, you should see on the top left corner that you + are looking at your own version of Flower. + .. image:: _static/fork_link.png 3. **Cloning your forked repository** - The next step is to download the forked repository on your machine to be able to make changes to it. - On your forked repository page, you should first click on the ``Code`` button on the right, - this will give you the ability to copy the HTTPS link of the repository. + The next step is to download the forked repository on your machine to be able to + make changes to it. On your forked repository page, you should first click on the + ``Code`` button on the right, this will give you the ability to copy the HTTPS + link of the repository. - .. image:: _static/cloning_fork.png + .. image:: _static/cloning_fork.png - Once you copied the \, you can open a terminal on your machine, navigate to the place you want to download the repository to and type: + Once you copied the \, you can open a terminal on your machine, navigate to + the place you want to download the repository to and type: - .. code-block:: shell + .. code-block:: shell - $ git clone - - This will create a ``flower/`` (or the name of your fork if you renamed it) folder in the current working directory. + $ git clone + This will create a ``flower/`` (or the name of your fork if you renamed it) + folder in the current working directory. 4. **Add origin** - You can then go into the repository folder: - - .. code-block:: shell - - $ cd flower + You can then go into the repository folder: - And here we will need to add an origin to our repository. The origin is the \ of the remote fork repository. - To obtain it, we can do as previously mentioned by going to our fork repository on our GitHub account and copying the link. + .. code-block:: shell - .. image:: _static/cloning_fork.png + $ cd flower - Once the \ is copied, we can type the following command in our terminal: + And here we will need to add an origin to our repository. The origin is the + \ of the remote fork repository. To obtain it, we can do as previously + mentioned by going to our fork repository on our GitHub account and copying the + link. - .. code-block:: shell + .. image:: _static/cloning_fork.png - $ git remote add origin + Once the \ is copied, we can type the following command in our terminal: + .. code-block:: shell + $ git remote add origin 5. **Add upstream** - Now we will add an upstream address to our repository. - Still in the same directory, we must run the following command: + Now we will add an upstream address to our repository. Still in the same + directory, we must run the following command: - .. code-block:: shell + .. code-block:: shell - $ git remote add upstream https://github.com/adap/flower.git + $ git remote add upstream https://github.com/adap/flower.git - The following diagram visually explains what we did in the previous steps: + The following diagram visually explains what we did in the previous steps: - .. image:: _static/github_schema.png + .. image:: _static/github_schema.png - The upstream is the GitHub remote address of the parent repository (in this case Flower), - i.e. the one we eventually want to contribute to and therefore need an up-to-date history of. - The origin is just the GitHub remote address of the forked repository we created, i.e. the copy (fork) in our own account. + The upstream is the GitHub remote address of the parent repository (in this case + Flower), i.e. the one we eventually want to contribute to and therefore need an + up-to-date history of. The origin is just the GitHub remote address of the forked + repository we created, i.e. the copy (fork) in our own account. - To make sure our local version of the fork is up-to-date with the latest changes from the Flower repository, - we can execute the following command: + To make sure our local version of the fork is up-to-date with the latest changes + from the Flower repository, we can execute the following command: - .. code-block:: shell - - $ git pull upstream main + .. code-block:: shell + $ git pull upstream main Setting up the coding environment --------------------------------- -This can be achieved by following this :doc:`getting started guide for contributors ` (note that you won't need to clone the repository). -Once you are able to write code and test it, you can finally start making changes! - +This can be achieved by following this :doc:`getting started guide for contributors +` (note that you won't need to clone +the repository). Once you are able to write code and test it, you can finally start +making changes! Making changes -------------- @@ -112,211 +125,233 @@ And with Flower's repository: $ git pull upstream main 1. **Create a new branch** - To make the history cleaner and easier to work with, it is good practice to - create a new branch for each feature/project that needs to be implemented. - - To do so, just run the following command inside the repository's directory: + To make the history cleaner and easier to work with, it is good practice to + create a new branch for each feature/project that needs to be implemented. - .. code-block:: shell + To do so, just run the following command inside the repository's directory: - $ git switch -c + .. code-block:: shell + $ git switch -c 2. **Make changes** - Write great code and create wonderful changes using your favorite editor! - + Write great code and create wonderful changes using your favorite editor! 3. **Test and format your code** - Don't forget to test and format your code! Otherwise your code won't be able to be merged into the Flower repository. - This is done so the codebase stays consistent and easy to understand. - - To do so, we have written a few scripts that you can execute: + Don't forget to test and format your code! Otherwise your code won't be able to + be merged into the Flower repository. This is done so the codebase stays + consistent and easy to understand. - .. code-block:: shell + To do so, we have written a few scripts that you can execute: - $ ./dev/format.sh # to format your code - $ ./dev/test.sh # to test that your code can be accepted - $ ./baselines/dev/format.sh # same as above but for code added to baselines - $ ./baselines/dev/test.sh # same as above but for code added to baselines + .. code-block:: shell + $ ./dev/format.sh # to format your code + $ ./dev/test.sh # to test that your code can be accepted + $ ./baselines/dev/format.sh # same as above but for code added to baselines + $ ./baselines/dev/test.sh # same as above but for code added to baselines 4. **Stage changes** - Before creating a commit that will update your history, you must specify to Git which files it needs to take into account. - - This can be done with: + Before creating a commit that will update your history, you must specify to Git + which files it needs to take into account. - .. code-block:: shell + This can be done with: - $ git add + .. code-block:: shell - To check which files have been modified compared to the last version (last commit) and to see which files are staged for commit, - you can use the :code:`git status` command. + $ git add + To check which files have been modified compared to the last version (last + commit) and to see which files are staged for commit, you can use the ``git + status`` command. 5. **Commit changes** - Once you have added all the files you wanted to commit using :code:`git add`, you can finally create your commit using this command: + Once you have added all the files you wanted to commit using ``git add``, you can + finally create your commit using this command: - .. code-block:: shell + .. code-block:: shell - $ git commit -m "" - - The \ is there to explain to others what the commit does. It should be written in an imperative style and be concise. - An example would be :code:`git commit -m "Add images to README"`. + $ git commit -m "" + The \ is there to explain to others what the commit does. It + should be written in an imperative style and be concise. An example would be + ``git commit -m "Add images to README"``. 6. **Push the changes to the fork** - Once we have committed our changes, we have effectively updated our local history, but GitHub has no way of knowing this unless we push - our changes to our origin's remote address: - - .. code-block:: shell + Once we have committed our changes, we have effectively updated our local + history, but GitHub has no way of knowing this unless we push our changes to our + origin's remote address: - $ git push -u origin + .. code-block:: shell - Once this is done, you will see on the GitHub that your forked repo was updated with the changes you have made. + $ git push -u origin + Once this is done, you will see on the GitHub that your forked repo was updated + with the changes you have made. Creating and merging a pull request (PR) ---------------------------------------- 1. **Create the PR** - Once you have pushed changes, on the GitHub webpage of your repository you should see the following message: - - .. image:: _static/compare_and_pr.png + Once you have pushed changes, on the GitHub webpage of your repository you should + see the following message: - Otherwise you can always find this option in the ``Branches`` page. + .. image:: _static/compare_and_pr.png - Once you click the ``Compare & pull request`` button, you should see something similar to this: + Otherwise you can always find this option in the ``Branches`` page. - .. image:: _static/creating_pr.png + Once you click the ``Compare & pull request`` button, you should see something + similar to this: - At the top you have an explanation of which branch will be merged where: + .. image:: _static/creating_pr.png - .. image:: _static/merging_branch.png + At the top you have an explanation of which branch will be merged where: - In this example you can see that the request is to merge the branch ``doc-fixes`` from my forked repository to branch ``main`` from the Flower repository. + .. image:: _static/merging_branch.png - The title should be changed to adhere to the :ref:`pr_title_format` guidelines, otherwise it won't be possible to merge the PR. So in this case, - a correct title might be ``docs(framework:skip) Fix typos``. + In this example you can see that the request is to merge the branch ``doc-fixes`` + from my forked repository to branch ``main`` from the Flower repository. - The input box in the middle is there for you to describe what your PR does and to link it to existing issues. - We have placed comments (that won't be rendered once the PR is opened) to guide you through the process. + The title should be changed to adhere to the :ref:`pr_title_format` guidelines, + otherwise it won't be possible to merge the PR. So in this case, a correct title + might be ``docs(framework:skip) Fix typos``. - It is important to follow the instructions described in comments. + The input box in the middle is there for you to describe what your PR does and to + link it to existing issues. We have placed comments (that won't be rendered once + the PR is opened) to guide you through the process. - At the bottom you will find the button to open the PR. This will notify reviewers that a new PR has been opened and - that they should look over it to merge or to request changes. + It is important to follow the instructions described in comments. - If your PR is not yet ready for review, and you don't want to notify anyone, you have the option to create a draft pull request: + At the bottom you will find the button to open the PR. This will notify reviewers + that a new PR has been opened and that they should look over it to merge or to + request changes. - .. image:: _static/draft_pr.png + If your PR is not yet ready for review, and you don't want to notify anyone, you + have the option to create a draft pull request: + .. image:: _static/draft_pr.png 2. **Making new changes** - Once the PR has been opened (as draft or not), you can still push new commits to it the same way we did before, by making changes to the branch associated with the PR. - + Once the PR has been opened (as draft or not), you can still push new commits to + it the same way we did before, by making changes to the branch associated with + the PR. 3. **Review the PR** - Once the PR has been opened or once the draft PR has been marked as ready, a review from code owners will be automatically requested: - - .. image:: _static/opened_pr.png - - Code owners will then look into the code, ask questions, request changes or validate the PR. + Once the PR has been opened or once the draft PR has been marked as ready, a + review from code owners will be automatically requested: - Merging will be blocked if there are ongoing requested changes. + .. image:: _static/opened_pr.png - .. image:: _static/changes_requested.png + Code owners will then look into the code, ask questions, request changes or + validate the PR. - To resolve them, just push the necessary changes to the branch associated with the PR: + Merging will be blocked if there are ongoing requested changes. - .. image:: _static/make_changes.png + .. image:: _static/changes_requested.png - And resolve the conversation: + To resolve them, just push the necessary changes to the branch associated with + the PR: - .. image:: _static/resolve_conv.png + .. image:: _static/make_changes.png - Once all the conversations have been resolved, you can re-request a review. + And resolve the conversation: + .. image:: _static/resolve_conv.png + Once all the conversations have been resolved, you can re-request a review. 4. **Once the PR is merged** - If all the automatic tests have passed and reviewers have no more changes to request, they can approve the PR and merge it. + If all the automatic tests have passed and reviewers have no more changes to + request, they can approve the PR and merge it. - .. image:: _static/merging_pr.png + .. image:: _static/merging_pr.png - Once it is merged, you can delete the branch on GitHub (a button should appear to do so) and also delete it locally by doing: + Once it is merged, you can delete the branch on GitHub (a button should appear to + do so) and also delete it locally by doing: - .. code-block:: shell + .. code-block:: shell - $ git switch main - $ git branch -D + $ git switch main + $ git branch -D - Then you should update your forked repository by doing: + Then you should update your forked repository by doing: - .. code-block:: shell - - $ git pull upstream main # to update the local repository - $ git push origin main # to push the changes to the remote repository + .. code-block:: shell + $ git pull upstream main # to update the local repository + $ git push origin main # to push the changes to the remote repository Example of first contribution ----------------------------- Problem -******* +~~~~~~~ -For our documentation, we've started to use the `Diàtaxis framework `_. +For our documentation, we've started to use the `Diàtaxis framework +`_. -Our "How to" guides should have titles that continue the sentence "How to …", for example, "How to upgrade to Flower 1.0". +Our "How to" guides should have titles that continue the sentence "How to …", for +example, "How to upgrade to Flower 1.0". -Most of our guides do not follow this new format yet, and changing their title is (unfortunately) more involved than one might think. +Most of our guides do not follow this new format yet, and changing their title is +(unfortunately) more involved than one might think. -This issue is about changing the title of a doc from present continuous to present simple. +This issue is about changing the title of a doc from present continuous to present +simple. -Let's take the example of "Saving Progress" which we changed to "Save Progress". Does this pass our check? +Let's take the example of "Saving Progress" which we changed to "Save Progress". Does +this pass our check? Before: "How to saving progress" ❌ After: "How to save progress" ✅ Solution -******** +~~~~~~~~ -This is a tiny change, but it'll allow us to test your end-to-end setup. After cloning and setting up the Flower repo, here's what you should do: +This is a tiny change, but it'll allow us to test your end-to-end setup. After cloning +and setting up the Flower repo, here's what you should do: - Find the source file in ``doc/source`` -- Make the change in the ``.rst`` file (beware, the dashes under the title should be the same length as the title itself) -- Build the docs and `check the result `_ +- Make the change in the ``.rst`` file (beware, the dashes under the title should be the + same length as the title itself) +- Build the docs and `check the result + `_ Rename file -::::::::::: ++++++++++++ -You might have noticed that the file name still reflects the old wording. -If we just change the file, then we break all existing links to it - it is **very important** to avoid that, breaking links can harm our search engine ranking. +You might have noticed that the file name still reflects the old wording. If we just +change the file, then we break all existing links to it - it is **very important** to +avoid that, breaking links can harm our search engine ranking. Here's how to change the file name: - Change the file name to ``save-progress.rst`` - Add a redirect rule to ``doc/source/conf.py`` -This will cause a redirect from ``saving-progress.html`` to ``save-progress.html``, old links will continue to work. +This will cause a redirect from ``saving-progress.html`` to ``save-progress.html``, old +links will continue to work. Apply changes in the index file -::::::::::::::::::::::::::::::: ++++++++++++++++++++++++++++++++ -For the lateral navigation bar to work properly, it is very important to update the ``index.rst`` file as well. -This is where we define the whole arborescence of the navbar. +For the lateral navigation bar to work properly, it is very important to update the +``index.rst`` file as well. This is where we define the whole arborescence of the +navbar. - Find and modify the file name in ``index.rst`` Open PR -::::::: ++++++++ -- Commit the changes (commit messages are always imperative: "Do something", in this case "Change …") +- Commit the changes (commit messages are always imperative: "Do something", in this + case "Change …") - Push the changes to your fork - Open a PR (as shown above) with title ``docs(framework) Update how-to guide title`` - Wait for it to be approved! - Congrats! 🥳 You're now officially a Flower contributor! - Next steps ---------- -Once you have made your first PR, and want to contribute more, be sure to check out the following : - -- :doc:`Good first contributions `, where you should particularly look into the :code:`baselines` contributions. +Once you have made your first PR, and want to contribute more, be sure to check out the +following : +- :doc:`Good first contributions `, where you + should particularly look into the ``baselines`` contributions. Appendix -------- @@ -324,7 +359,7 @@ Appendix .. _pr_title_format: PR title format -*************** +~~~~~~~~~~~~~~~ We enforce the following PR title format: @@ -334,9 +369,10 @@ We enforce the following PR title format: (or ``(:skip) `` to ignore the PR in the changelog) -Where ```` needs to be in ``{ci, fix, feat, docs, refactor, break}``, ```` -should be in ``{framework, baselines, datasets, examples, or '*' when modifying multiple projects which requires the ':skip' flag to be used}``, -and ```` starts with a capitalised verb in the imperative mood. +Where ```` needs to be in ``{ci, fix, feat, docs, refactor, break}``, +```` should be in ``{framework, baselines, datasets, examples, or '*' when +modifying multiple projects which requires the ':skip' flag to be used}``, and +```` starts with a capitalised verb in the imperative mood. Valid examples: diff --git a/doc/source/contributor-tutorial-get-started-as-a-contributor.rst b/doc/source/contributor-tutorial-get-started-as-a-contributor.rst index 3dac8647fa33..60b3ebdef743 100644 --- a/doc/source/contributor-tutorial-get-started-as-a-contributor.rst +++ b/doc/source/contributor-tutorial-get-started-as-a-contributor.rst @@ -9,163 +9,187 @@ Prerequisites - (Optional) `pyenv `_ - (Optional) `pyenv-virtualenv `_ -Flower uses :code:`pyproject.toml` to manage dependencies and configure -development tools (the ones which support it). Poetry is a build tool which -supports `PEP 517 `_. - +Flower uses ``pyproject.toml`` to manage dependencies and configure development tools +(the ones which support it). Poetry is a build tool which supports `PEP 517 +`_. Developer Machine Setup ----------------------- Preliminaries ~~~~~~~~~~~~~ + Some system-wide dependencies are needed. For macOS -^^^^^^^^^ ++++++++++ + +- Install `homebrew `_. Don't forget the post-installation actions to + add `brew` to your PATH. +- Install `xz` (to install different Python versions) and `pandoc` to build the docs: -* Install `homebrew `_. Don't forget the post-installation actions to add `brew` to your PATH. -* Install `xz` (to install different Python versions) and `pandoc` to build the - docs:: + :: - $ brew install xz pandoc + $ brew install xz pandoc For Ubuntu -^^^^^^^^^^ -Ensure you system (Ubuntu 22.04+) is up-to-date, and you have all necessary -packages:: +++++++++++ - $ apt update - $ apt install build-essential zlib1g-dev libssl-dev libsqlite3-dev \ - libreadline-dev libbz2-dev libffi-dev liblzma-dev pandoc +Ensure you system (Ubuntu 22.04+) is up-to-date, and you have all necessary packages: + +:: + $ apt update + $ apt install build-essential zlib1g-dev libssl-dev libsqlite3-dev \ + libreadline-dev libbz2-dev libffi-dev liblzma-dev pandoc Create Flower Dev Environment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -1. Clone the `Flower repository `_ from -GitHub:: +1. Clone the `Flower repository `_ from GitHub: - $ git clone git@github.com:adap/flower.git - $ cd flower +:: + + $ git clone git@github.com:adap/flower.git + $ cd flower + +2. Let's create the Python environment for all-things Flower. If you wish to use + ``pyenv``, we provide two convenience scripts that you can use. If you prefer using + something else than ``pyenv``, create a new environment, activate and skip to the + last point where all packages are installed. -2. Let's create the Python environment for all-things Flower. If you wish to use :code:`pyenv`, we provide two convenience scripts that you can use. If you prefer using something else than :code:`pyenv`, create a new environment, activate and skip to the last point where all packages are installed. +- If you don't have ``pyenv`` installed, the following script that will install it, set + it up, and create the virtual environment (with ``Python 3.9.20`` by default): -* If you don't have :code:`pyenv` installed, the following script that will install it, set it up, and create the virtual environment (with :code:`Python 3.9.20` by default):: + :: - $ ./dev/setup-defaults.sh # once completed, run the bootstrap script + $ ./dev/setup-defaults.sh # once completed, run the bootstrap script -* If you already have :code:`pyenv` installed (along with the :code:`pyenv-virtualenv` plugin), you can use the following convenience script (with :code:`Python 3.9.20` by default):: +- If you already have ``pyenv`` installed (along with the ``pyenv-virtualenv`` plugin), + you can use the following convenience script (with ``Python 3.9.20`` by default): - $ ./dev/venv-create.sh # once completed, run the `bootstrap.sh` script + :: -3. Install the Flower package in development mode (think -:code:`pip install -e`) along with all necessary dependencies:: + $ ./dev/venv-create.sh # once completed, run the `bootstrap.sh` script - (flower-) $ ./dev/bootstrap.sh +3. Install the Flower package in development mode (think ``pip install -e``) along with +all necessary dependencies: + +:: + (flower-) $ ./dev/bootstrap.sh Convenience Scripts ------------------- -The Flower repository contains a number of convenience scripts to make -recurring development tasks easier and less error-prone. See the :code:`/dev` -subdirectory for a full list. The following scripts are amongst the most -important ones: +The Flower repository contains a number of convenience scripts to make recurring +development tasks easier and less error-prone. See the ``/dev`` subdirectory for a full +list. The following scripts are amongst the most important ones: Create/Delete Virtual Environment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ :: - $ ./dev/venv-create.sh # Default is 3.9.20 - $ ./dev/venv-delete.sh # Default is 3.9.20 + $ ./dev/venv-create.sh # Default is 3.9.20 + $ ./dev/venv-delete.sh # Default is 3.9.20 Compile ProtoBuf Definitions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ :: - $ python -m flwr_tool.protoc + $ python -m flwr_tool.protoc Auto-Format Code ~~~~~~~~~~~~~~~~ :: - $ ./dev/format.sh + $ ./dev/format.sh Run Linters and Tests ~~~~~~~~~~~~~~~~~~~~~ :: - $ ./dev/test.sh + $ ./dev/test.sh Add a pre-commit hook ~~~~~~~~~~~~~~~~~~~~~ -Developers may integrate a pre-commit hook into their workflow utilizing the `pre-commit `_ library. The pre-commit hook is configured to execute two primary operations: ``./dev/format.sh`` and ``./dev/test.sh`` scripts. +Developers may integrate a pre-commit hook into their workflow utilizing the `pre-commit +`_ library. The pre-commit hook is configured to +execute two primary operations: ``./dev/format.sh`` and ``./dev/test.sh`` scripts. There are multiple ways developers can use this: 1. Install the pre-commit hook to your local git directory by simply running: :: - - $ pre-commit install - - Each ``git commit`` will trigger the execution of formatting and linting/test scripts. - - If in a hurry, bypass the hook using ``--no-verify`` with the ``git commit`` command. + $ pre-commit install + + - Each ``git commit`` will trigger the execution of formatting and linting/test + scripts. + - If in a hurry, bypass the hook using ``--no-verify`` with the ``git commit`` + command. + :: - - $ git commit --no-verify -m "Add new feature" - -2. For developers who prefer not to install the hook permanently, it is possible to execute a one-time check prior to committing changes by using the following command: - + + $ git commit --no-verify -m "Add new feature" + +2. For developers who prefer not to install the hook permanently, it is possible to + execute a one-time check prior to committing changes by using the following command: + :: - $ pre-commit run --all-files - - This executes the formatting and linting checks/tests on all the files without modifying the default behavior of ``git commit``. + $ pre-commit run --all-files + + This executes the formatting and linting checks/tests on all the files without + modifying the default behavior of ``git commit``. Run Github Actions (CI) locally ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Developers could run the full set of Github Actions workflows under their local -environment by using `Act `_. Please refer to -the installation instructions under the linked repository and run the next -command under Flower main cloned repository folder:: +environment by using `Act `_. Please refer to the +installation instructions under the linked repository and run the next command under +Flower main cloned repository folder: - $ act +:: -The Flower default workflow would run by setting up the required Docker -machines underneath. + $ act +The Flower default workflow would run by setting up the required Docker machines +underneath. Build Release ------------- -Flower uses Poetry to build releases. The necessary command is wrapped in a -simple script:: +Flower uses Poetry to build releases. The necessary command is wrapped in a simple +script: - $ ./dev/build.sh +:: -The resulting :code:`.whl` and :code:`.tar.gz` releases will be stored in the -:code:`/dist` subdirectory. + $ ./dev/build.sh +The resulting ``.whl`` and ``.tar.gz`` releases will be stored in the ``/dist`` +subdirectory. Build Documentation ------------------- Flower's documentation uses `Sphinx `_. There's no -convenience script to re-build the documentation yet, but it's pretty easy:: +convenience script to re-build the documentation yet, but it's pretty easy: + +:: - $ cd doc - $ make html + $ cd doc + $ make html This will generate HTML documentation in ``doc/build/html``. -Note that, in order to build the documentation locally -(with ``poetry run make html``, like described below), -`Pandoc `_ needs to be installed on the system. +Note that, in order to build the documentation locally (with ``poetry run make html``, +like described below), `Pandoc `_ needs to be +installed on the system. diff --git a/doc/source/docker/enable-tls.rst b/doc/source/docker/enable-tls.rst index ac604b708f88..f50edb8c651d 100644 --- a/doc/source/docker/enable-tls.rst +++ b/doc/source/docker/enable-tls.rst @@ -1,152 +1,152 @@ Enable TLS for Secure Connections ================================= -When operating in a production environment, it is strongly recommended to enable Transport Layer -Security (TLS) for each Flower Component to ensure secure communication. +When operating in a production environment, it is strongly recommended to enable +Transport Layer Security (TLS) for each Flower Component to ensure secure communication. -To enable TLS, you will need a PEM-encoded root certificate, a PEM-encoded private key and a -PEM-encoded certificate chain. +To enable TLS, you will need a PEM-encoded root certificate, a PEM-encoded private key +and a PEM-encoded certificate chain. .. note:: - For testing purposes, you can generate your own self-signed certificates. The - `Enable SSL connections `__ - page contains a section that will guide you through the process. + For testing purposes, you can generate your own self-signed certificates. The + `Enable SSL connections + `__ + page contains a section that will guide you through the process. +Because Flower containers, by default, run with a non-root user ``app``, the mounted +files and directories must have the proper permissions for the user ID ``49999``. -Because Flower containers, by default, run with a non-root user ``app``, the mounted files and -directories must have the proper permissions for the user ID ``49999``. +For example, to change the user ID of all files in the ``certificates/`` directory, you +can run ``sudo chown -R 49999:49999 certificates/*``. -For example, to change the user ID of all files in the ``certificates/`` directory, you can run -``sudo chown -R 49999:49999 certificates/*``. - -If you later want to delete the directory, you can change the user ID back to the current user -ID by running ``sudo chown -R $USER:$(id -gn) state``. +If you later want to delete the directory, you can change the user ID back to the +current user ID by running ``sudo chown -R $USER:$(id -gn) state``. SuperLink --------- -Assuming all files we need are in the local ``certificates`` directory, we can use the flag -``--volume`` to mount the local directory into the ``/app/certificates/`` directory of the container: +Assuming all files we need are in the local ``certificates`` directory, we can use the +flag ``--volume`` to mount the local directory into the ``/app/certificates/`` directory +of the container: .. code-block:: bash - :substitutions: + :substitutions: - $ docker run --rm \ - --volume ./certificates/:/app/certificates/:ro \ - flwr/superlink:|stable_flwr_version| \ - --ssl-ca-certfile certificates/ca.crt \ - --ssl-certfile certificates/server.pem \ - --ssl-keyfile certificates/server.key + $ docker run --rm \ + --volume ./certificates/:/app/certificates/:ro \ + flwr/superlink:|stable_flwr_version| \ + --ssl-ca-certfile certificates/ca.crt \ + --ssl-certfile certificates/server.pem \ + --ssl-keyfile certificates/server.key .. dropdown:: Understanding the command - * ``docker run``: This tells Docker to run a container from an image. - * ``--rm``: Remove the container once it is stopped or the command exits. - * | ``--volume ./certificates/:/app/certificates/:ro``: Mount the ``certificates`` directory in - | the current working directory of the host machine as a read-only volume at the - | ``/app/certificates`` directory inside the container. - | - | This allows the container to access the TLS certificates that are stored in the certificates - | directory. - * | :substitution-code:`flwr/superlink:|stable_flwr_version|`: The name of the image to be run and the specific - | tag of the image. The tag :substitution-code:`|stable_flwr_version|` represents a specific version of the image. - * | ``--ssl-ca-certfile certificates/ca.crt``: Specify the location of the CA certificate file - | inside the container. - | - | The ``certificates/ca.crt`` file is a certificate that is used to verify the identity of the - | SuperLink. - * | ``--ssl-certfile certificates/server.pem``: Specify the location of the SuperLink's - | TLS certificate file inside the container. - | - | The ``certificates/server.pem`` file is used to identify the SuperLink and to encrypt the - | data that is transmitted over the network. - * | ``--ssl-keyfile certificates/server.key``: Specify the location of the SuperLink's - | TLS private key file inside the container. - | - | The ``certificates/server.key`` file is used to decrypt the data that is transmitted over - | the network. + * ``docker run``: This tells Docker to run a container from an image. + * ``--rm``: Remove the container once it is stopped or the command exits. + * | ``--volume ./certificates/:/app/certificates/:ro``: Mount the ``certificates`` directory in + | the current working directory of the host machine as a read-only volume at the + | ``/app/certificates`` directory inside the container. + | + | This allows the container to access the TLS certificates that are stored in the certificates + | directory. + * | :substitution-code:`flwr/superlink:|stable_flwr_version|`: The name of the image to be run and the specific + | tag of the image. The tag :substitution-code:`|stable_flwr_version|` represents a specific version of the image. + * | ``--ssl-ca-certfile certificates/ca.crt``: Specify the location of the CA certificate file + | inside the container. + | + | The ``certificates/ca.crt`` file is a certificate that is used to verify the identity of the + | SuperLink. + * | ``--ssl-certfile certificates/server.pem``: Specify the location of the SuperLink's + | TLS certificate file inside the container. + | + | The ``certificates/server.pem`` file is used to identify the SuperLink and to encrypt the + | data that is transmitted over the network. + * | ``--ssl-keyfile certificates/server.key``: Specify the location of the SuperLink's + | TLS private key file inside the container. + | + | The ``certificates/server.key`` file is used to decrypt the data that is transmitted over + | the network. SuperNode --------- -Assuming that the ``ca.crt`` certificate already exists locally, we can use the flag ``--volume`` to mount the local -certificate into the container's ``/app/`` directory. +Assuming that the ``ca.crt`` certificate already exists locally, we can use the flag +``--volume`` to mount the local certificate into the container's ``/app/`` directory. .. note:: - If you're generating self-signed certificates and the ``ca.crt`` certificate doesn't exist - on the SuperNode, you can copy it over after the generation step. + If you're generating self-signed certificates and the ``ca.crt`` certificate doesn't + exist on the SuperNode, you can copy it over after the generation step. .. code-block:: bash - :substitutions: + :substitutions: - $ docker run --rm \ - --volume ./ca.crt:/app/ca.crt/:ro \ - flwr/supernode:|stable_flwr_version| \ - --root-certificates ca.crt + $ docker run --rm \ + --volume ./ca.crt:/app/ca.crt/:ro \ + flwr/supernode:|stable_flwr_version| \ + --root-certificates ca.crt .. dropdown:: Understanding the command - * ``docker run``: This tells Docker to run a container from an image. - * ``--rm``: Remove the container once it is stopped or the command exits. - * | ``--volume ./ca.crt:/app/ca.crt/:ro``: Mount the ``ca.crt`` file from the - | current working directory of the host machine as a read-only volume at the ``/app/ca.crt`` - | directory inside the container. - * | :substitution-code:`flwr/supernode:|stable_flwr_version|`: The name of the image to be run and the specific - | tag of the image. The tag :substitution-code:`|stable_flwr_version|` represents a specific version of the image. - * | ``--root-certificates ca.crt``: This specifies the location of the CA certificate file - | inside the container. - | - | The ``ca.crt`` file is used to verify the identity of the SuperLink. - + * ``docker run``: This tells Docker to run a container from an image. + * ``--rm``: Remove the container once it is stopped or the command exits. + * | ``--volume ./ca.crt:/app/ca.crt/:ro``: Mount the ``ca.crt`` file from the + | current working directory of the host machine as a read-only volume at the ``/app/ca.crt`` + | directory inside the container. + * | :substitution-code:`flwr/supernode:|stable_flwr_version|`: The name of the image to be run and the specific + | tag of the image. The tag :substitution-code:`|stable_flwr_version|` represents a specific version of the image. + * | ``--root-certificates ca.crt``: This specifies the location of the CA certificate file + | inside the container. + | + | The ``ca.crt`` file is used to verify the identity of the SuperLink. SuperExec --------- -Assuming all files we need are in the local ``certificates`` directory where the SuperExec will be executed from, we can use the flag -``--volume`` to mount the local directory into the ``/app/certificates/`` directory of the container: +Assuming all files we need are in the local ``certificates`` directory where the +SuperExec will be executed from, we can use the flag ``--volume`` to mount the local +directory into the ``/app/certificates/`` directory of the container: .. code-block:: bash - :substitutions: - - $ docker run --rm \ - --volume ./certificates/:/app/certificates/:ro \ - flwr/superexec:|stable_flwr_version| \ - --ssl-ca-certfile certificates/ca.crt \ - --ssl-certfile certificates/server.pem \ - --ssl-keyfile certificates/server.key \ - --executor-config \ - root-certificates=\"certificates/superlink_ca.crt\" + :substitutions: + $ docker run --rm \ + --volume ./certificates/:/app/certificates/:ro \ + flwr/superexec:|stable_flwr_version| \ + --ssl-ca-certfile certificates/ca.crt \ + --ssl-certfile certificates/server.pem \ + --ssl-keyfile certificates/server.key \ + --executor-config \ + root-certificates=\"certificates/superlink_ca.crt\" .. dropdown:: Understanding the command - * ``docker run``: This tells Docker to run a container from an image. - * ``--rm``: Remove the container once it is stopped or the command exits. - * | ``--volume ./certificates/:/app/certificates/:ro``: Mount the ``certificates`` directory in - | the current working directory of the host machine as a read-only volume at the - | ``/app/certificates`` directory inside the container. - | - | This allows the container to access the TLS certificates that are stored in the certificates - | directory. - * | :substitution-code:`flwr/superexec:|stable_flwr_version|`: The name of the image to be run and the specific - | tag of the image. The tag :substitution-code:`|stable_flwr_version|` represents a specific version of the image. - * | ``--ssl-ca-certfile certificates/ca.crt``: Specify the location of the CA certificate file - | inside the container. - | - | The ``certificates/ca.crt`` file is a certificate that is used to verify the identity of the - | SuperExec. - * | ``--ssl-certfile certificates/server.pem``: Specify the location of the SuperExec's - | TLS certificate file inside the container. - | - | The ``certificates/server.pem`` file is used to identify the SuperExec and to encrypt the - | data that is transmitted over the network. - * | ``--ssl-keyfile certificates/server.key``: Specify the location of the SuperExec's - | TLS private key file inside the container. - | - | The ``certificates/server.key`` file is used to decrypt the data that is transmitted over - | the network. - * | ``--executor-config root-certificates=\"certificates/superlink_ca.crt\"``: Specify the - | location of the CA certificate file inside the container that the SuperExec executor - | should use to verify the SuperLink's identity. + * ``docker run``: This tells Docker to run a container from an image. + * ``--rm``: Remove the container once it is stopped or the command exits. + * | ``--volume ./certificates/:/app/certificates/:ro``: Mount the ``certificates`` directory in + | the current working directory of the host machine as a read-only volume at the + | ``/app/certificates`` directory inside the container. + | + | This allows the container to access the TLS certificates that are stored in the certificates + | directory. + * | :substitution-code:`flwr/superexec:|stable_flwr_version|`: The name of the image to be run and the specific + | tag of the image. The tag :substitution-code:`|stable_flwr_version|` represents a specific version of the image. + * | ``--ssl-ca-certfile certificates/ca.crt``: Specify the location of the CA certificate file + | inside the container. + | + | The ``certificates/ca.crt`` file is a certificate that is used to verify the identity of the + | SuperExec. + * | ``--ssl-certfile certificates/server.pem``: Specify the location of the SuperExec's + | TLS certificate file inside the container. + | + | The ``certificates/server.pem`` file is used to identify the SuperExec and to encrypt the + | data that is transmitted over the network. + * | ``--ssl-keyfile certificates/server.key``: Specify the location of the SuperExec's + | TLS private key file inside the container. + | + | The ``certificates/server.key`` file is used to decrypt the data that is transmitted over + | the network. + * | ``--executor-config root-certificates=\"certificates/superlink_ca.crt\"``: Specify the + | location of the CA certificate file inside the container that the SuperExec executor + | should use to verify the SuperLink's identity. diff --git a/doc/source/docker/index.rst b/doc/source/docker/index.rst index 2c972b805930..3fd391114dc1 100644 --- a/doc/source/docker/index.rst +++ b/doc/source/docker/index.rst @@ -1,48 +1,48 @@ Run Flower using Docker ======================= -Start your Flower journey with our pre-made Docker images on Docker Hub, supporting ``amd64`` -and ``arm64v8`` architectures. +Start your Flower journey with our pre-made Docker images on Docker Hub, supporting +``amd64`` and ``arm64v8`` architectures. -Our Quickstart guide walks you through containerizing a Flower project and running it end to -end using Docker. +Our Quickstart guide walks you through containerizing a Flower project and running it +end to end using Docker. Getting Started --------------- .. toctree:: - :maxdepth: 1 - - tutorial-quickstart-docker + :maxdepth: 1 + tutorial-quickstart-docker Running in Production --------------------- .. toctree:: - :maxdepth: 1 + :maxdepth: 1 - enable-tls - persist-superlink-state + enable-tls + persist-superlink-state Advanced Options ---------------- .. toctree:: - :maxdepth: 1 + :maxdepth: 1 - set-environment-variables - run-as-root-user - run-as-subprocess - pin-version - use-a-different-version + set-environment-variables + run-as-root-user + run-as-subprocess + pin-version + use-a-different-version Run Flower using Docker Compose ------------------------------- .. toctree:: - :maxdepth: 1 - tutorial-quickstart-docker-compose - run-quickstart-examples-docker-compose - tutorial-deploy-on-multiple-machines + :maxdepth: 1 + + tutorial-quickstart-docker-compose + run-quickstart-examples-docker-compose + tutorial-deploy-on-multiple-machines diff --git a/doc/source/docker/persist-superlink-state.rst b/doc/source/docker/persist-superlink-state.rst index 68e04ed33762..214e408c44c3 100644 --- a/doc/source/docker/persist-superlink-state.rst +++ b/doc/source/docker/persist-superlink-state.rst @@ -1,39 +1,40 @@ Persist the State of the SuperLink ================================== -By default, the Flower SuperLink keeps its state in-memory. When using the Docker flag ``--rm``, the -state is not persisted between container starts. +By default, the Flower SuperLink keeps its state in-memory. When using the Docker flag +``--rm``, the state is not persisted between container starts. -If you want to persist the state of the SuperLink on your host system, all you need to do is specify -a directory where you want to save the file on your host system and a name for the database file. +If you want to persist the state of the SuperLink on your host system, all you need to +do is specify a directory where you want to save the file on your host system and a name +for the database file. -By default, the SuperLink container runs with a non-root user called ``app`` with the user ID -``49999``. It is recommended to create a new directory and change the user ID of the directory to -``49999`` to ensure the mounted directory has the proper permissions. +By default, the SuperLink container runs with a non-root user called ``app`` with the +user ID ``49999``. It is recommended to create a new directory and change the user ID of +the directory to ``49999`` to ensure the mounted directory has the proper permissions. -If you later want to delete the directory, you can change the user ID back to the current user -ID by running ``sudo chown -R $USER:$(id -gn) state``. +If you later want to delete the directory, you can change the user ID back to the +current user ID by running ``sudo chown -R $USER:$(id -gn) state``. Example ------- -In the example below, we create a new directory called ``state``, change the user ID and tell -Docker via the flag ``--volume`` to mount the local ``state`` directory into the ``/app/state`` -directory of the container. Lastly, we use the flag ``--database`` to specify the name of the -database file. +In the example below, we create a new directory called ``state``, change the user ID and +tell Docker via the flag ``--volume`` to mount the local ``state`` directory into the +``/app/state`` directory of the container. Lastly, we use the flag ``--database`` to +specify the name of the database file. .. code-block:: bash - :substitutions: - - $ mkdir state - $ sudo chown -R 49999:49999 state - $ docker run --rm \ - --volume ./state/:/app/state flwr/superlink:|stable_flwr_version| \ - --database state.db \ - ... - -As soon as the SuperLink starts, the file ``state.db`` is created in the ``state`` directory on -your host system. If the file already exists, the SuperLink tries to restore the state from the -file. To start the SuperLink with an empty database, ensure that there is no database -called ``state.db`` in the ``state`` directory (``rm state.db``) before you execute the -``docker run`` command above. + :substitutions: + + $ mkdir state + $ sudo chown -R 49999:49999 state + $ docker run --rm \ + --volume ./state/:/app/state flwr/superlink:|stable_flwr_version| \ + --database state.db \ + ... + +As soon as the SuperLink starts, the file ``state.db`` is created in the ``state`` +directory on your host system. If the file already exists, the SuperLink tries to +restore the state from the file. To start the SuperLink with an empty database, ensure +that there is no database called ``state.db`` in the ``state`` directory (``rm +state.db``) before you execute the ``docker run`` command above. diff --git a/doc/source/docker/pin-version.rst b/doc/source/docker/pin-version.rst index 800e3ed95423..4a69860aa428 100644 --- a/doc/source/docker/pin-version.rst +++ b/doc/source/docker/pin-version.rst @@ -1,10 +1,11 @@ Pin a Docker Image to a Specific Version ======================================== -It may happen that we update the images behind the tags. Such updates usually include security -updates of system dependencies that should not change the functionality of Flower. However, if -you want to ensure that you use a fixed version of the Docker image in your deployments, you can -`specify the digest `_ +It may happen that we update the images behind the tags. Such updates usually include +security updates of system dependencies that should not change the functionality of +Flower. However, if you want to ensure that you use a fixed version of the Docker image +in your deployments, you can `specify the digest +`_ of the image instead of the tag. Example @@ -14,23 +15,23 @@ The following command returns the current image digest referenced by the :substitution-code:`superlink:|stable_flwr_version|` tag: .. code-block:: bash - :substitutions: + :substitutions: - $ docker pull flwr/superlink:|stable_flwr_version| - $ docker inspect --format='{{index .RepoDigests 0}}' flwr/superlink:|stable_flwr_version| + $ docker pull flwr/superlink:|stable_flwr_version| + $ docker inspect --format='{{index .RepoDigests 0}}' flwr/superlink:|stable_flwr_version| This will output .. code-block:: bash - :substitutions: + :substitutions: - flwr/superlink@sha256:|stable__flwr_superlink_docker_digest| + flwr/superlink@sha256:|stable__flwr_superlink_docker_digest| Next, we can pin the digest when running a new SuperLink container: .. code-block:: bash - :substitutions: + :substitutions: - $ docker run \ - --rm flwr/superlink@sha256:|latest_version_docker_sha| \ - [OPTIONS] + $ docker run \ + --rm flwr/superlink@sha256:|latest_version_docker_sha| \ + [OPTIONS] diff --git a/doc/source/docker/run-as-root-user.rst b/doc/source/docker/run-as-root-user.rst index d1b41a9b6168..5f8e5eae43af 100644 --- a/doc/source/docker/run-as-root-user.rst +++ b/doc/source/docker/run-as-root-user.rst @@ -2,11 +2,11 @@ Run with Root User Privileges ============================= Flower Docker images, by default, run with a non-root user (username/groupname: ``app``, -UID/GID: ``49999``). Using root user is **not recommended** unless it is necessary for specific -tasks during the build process. +UID/GID: ``49999``). Using root user is **not recommended** unless it is necessary for +specific tasks during the build process. -Always make sure to run the container as a non-root user in production to maintain security -best practices. +Always make sure to run the container as a non-root user in production to maintain +security best practices. Run a Container with Root User Privileges ----------------------------------------- @@ -14,32 +14,33 @@ Run a Container with Root User Privileges Run the Docker image with the ``-u`` flag and specify ``root`` as the username: .. code-block:: bash - :substitutions: + :substitutions: - $ docker run --rm -u root flwr/superlink:|stable_flwr_version| + $ docker run --rm -u root flwr/superlink:|stable_flwr_version| This command will run the Docker container with root user privileges. Run the Build Process with Root User Privileges ----------------------------------------------- -If you want to switch to the root user during the build process of the Docker image to install -missing system dependencies, you can use the ``USER root`` directive within your Dockerfile. +If you want to switch to the root user during the build process of the Docker image to +install missing system dependencies, you can use the ``USER root`` directive within your +Dockerfile. .. code-block:: dockerfile - :caption: SuperNode Dockerfile - :substitutions: + :caption: SuperNode Dockerfile + :substitutions: - FROM flwr/supernode:|stable_flwr_version| + FROM flwr/supernode:|stable_flwr_version| - # Switch to root user - USER root + # Switch to root user + USER root - # Install missing dependencies (requires root access) - RUN apt-get update && apt-get install -y + # Install missing dependencies (requires root access) + RUN apt-get update && apt-get install -y - # Switch back to non-root user app - USER app + # Switch back to non-root user app + USER app - # Continue with your Docker image build process - # ... + # Continue with your Docker image build process + # ... diff --git a/doc/source/docker/run-as-subprocess.rst b/doc/source/docker/run-as-subprocess.rst index f8c482f632a0..d97319ff52af 100644 --- a/doc/source/docker/run-as-subprocess.rst +++ b/doc/source/docker/run-as-subprocess.rst @@ -1,53 +1,53 @@ Run ClientApp as a Subprocess ============================= -In this mode, the ClientApp is executed as a subprocess within the SuperNode Docker container, -rather than running in a separate container. This approach reduces the number of running containers, -which can be beneficial for environments with limited resources. However, it also means that the -ClientApp is no longer isolated from the SuperNode, which may introduce additional security -concerns. +In this mode, the ClientApp is executed as a subprocess within the SuperNode Docker +container, rather than running in a separate container. This approach reduces the number +of running containers, which can be beneficial for environments with limited resources. +However, it also means that the ClientApp is no longer isolated from the SuperNode, +which may introduce additional security concerns. Prerequisites ------------- -#. Before running the ClientApp as a subprocess, ensure that the FAB dependencies have been installed - in the SuperNode images. This can be done by extending the SuperNode image: +1. Before running the ClientApp as a subprocess, ensure that the FAB dependencies have + been installed in the SuperNode images. This can be done by extending the SuperNode + image: .. code-block:: dockerfile - :caption: Dockerfile.supernode - :linenos: - :substitutions: + :caption: Dockerfile.supernode + :linenos: + :substitutions: - FROM flwr/supernode:|stable_flwr_version| + FROM flwr/supernode:|stable_flwr_version| - WORKDIR /app - COPY pyproject.toml . - RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ - && python -m pip install -U --no-cache-dir . + WORKDIR /app + COPY pyproject.toml . + RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ + && python -m pip install -U --no-cache-dir . - ENTRYPOINT ["flower-supernode"] + ENTRYPOINT ["flower-supernode"] -#. Next, build the SuperNode Docker image by running the following command in the directory where - Dockerfile is located: +2. Next, build the SuperNode Docker image by running the following command in the + directory where Dockerfile is located: .. code-block:: shell - $ docker build -f Dockerfile.supernode -t flwr_supernode:0.0.1 . - + $ docker build -f Dockerfile.supernode -t flwr_supernode:0.0.1 . Run the ClientApp as a Subprocess --------------------------------- -Start the SuperNode with the flag ``--isolation subprocess``, which tells the SuperNode to execute -the ClientApp as a subprocess: +Start the SuperNode with the flag ``--isolation subprocess``, which tells the SuperNode +to execute the ClientApp as a subprocess: .. code-block:: shell - $ docker run --rm \ - --detach \ - flwr_supernode:0.0.1 \ - --insecure \ - --superlink superlink:9092 \ - --node-config "partition-id=1 num-partitions=2" \ - --supernode-address localhost:9094 \ - --isolation subprocess + $ docker run --rm \ + --detach \ + flwr_supernode:0.0.1 \ + --insecure \ + --superlink superlink:9092 \ + --node-config "partition-id=1 num-partitions=2" \ + --supernode-address localhost:9094 \ + --isolation subprocess diff --git a/doc/source/docker/run-quickstart-examples-docker-compose.rst b/doc/source/docker/run-quickstart-examples-docker-compose.rst index 5bdb33e991dd..b31f0035e143 100644 --- a/doc/source/docker/run-quickstart-examples-docker-compose.rst +++ b/doc/source/docker/run-quickstart-examples-docker-compose.rst @@ -1,15 +1,16 @@ Run Flower Quickstart Examples with Docker Compose ================================================== -Flower provides a set of `quickstart examples `_ -to help you get started with the framework. These examples are designed to demonstrate the -capabilities of Flower and by default run using the Simulation Engine. This guide demonstrates -how to run them using Flower's Deployment Engine via Docker Compose. +Flower provides a set of `quickstart examples +`_ to help you get started with the +framework. These examples are designed to demonstrate the capabilities of Flower and by +default run using the Simulation Engine. This guide demonstrates how to run them using +Flower's Deployment Engine via Docker Compose. .. important:: - Some quickstart examples may have limitations or requirements that prevent them from running - on every environment. For more information, please see `Limitations`_. + Some quickstart examples may have limitations or requirements that prevent them from + running on every environment. For more information, please see Limitations_. Prerequisites ------------- @@ -23,68 +24,71 @@ Before you start, make sure that: Run the Quickstart Example -------------------------- -#. Clone the quickstart example you like to run. For example, ``quickstart-pytorch``: +1. Clone the quickstart example you like to run. For example, ``quickstart-pytorch``: .. code-block:: bash - $ git clone --depth=1 https://github.com/adap/flower.git \ - && mv flower/examples/quickstart-pytorch . \ - && rm -rf flower && cd quickstart-pytorch + $ git clone --depth=1 https://github.com/adap/flower.git \ + && mv flower/examples/quickstart-pytorch . \ + && rm -rf flower && cd quickstart-pytorch -#. Download the `compose.yml `_ file into the example directory: +2. Download the `compose.yml + `_ file + into the example directory: .. code-block:: bash - $ curl https://raw.githubusercontent.com/adap/flower/refs/heads/main/src/docker/complete/compose.yml \ - -o compose.yml + $ curl https://raw.githubusercontent.com/adap/flower/refs/heads/main/src/docker/complete/compose.yml \ + -o compose.yml -#. Build and start the services using the following command: +3. Build and start the services using the following command: .. code-block:: bash - $ docker compose up --build -d + $ docker compose up --build -d -#. Append the following lines to the end of the ``pyproject.toml`` file and save it: +4. Append the following lines to the end of the ``pyproject.toml`` file and save it: .. code-block:: toml - :caption: pyproject.toml + :caption: pyproject.toml - [tool.flwr.federations.local-deployment] - address = "127.0.0.1:9093" - insecure = true + [tool.flwr.federations.local-deployment] + address = "127.0.0.1:9093" + insecure = true .. note:: - You can customize the string that follows ``tool.flwr.federations.`` to fit your needs. - However, please note that the string cannot contain a dot (``.``). + You can customize the string that follows ``tool.flwr.federations.`` to fit your + needs. However, please note that the string cannot contain a dot (``.``). - In this example, ``local-deployment`` has been used. Just remember to replace - ``local-deployment`` with your chosen name in both the ``tool.flwr.federations.`` string - and the corresponding ``flwr run .`` command. + In this example, ``local-deployment`` has been used. Just remember to replace + ``local-deployment`` with your chosen name in both the ``tool.flwr.federations.`` + string and the corresponding ``flwr run .`` command. -#. Run the example: +5. Run the example: .. code-block:: bash - $ flwr run . local-deployment + $ flwr run . local-deployment -#. Follow the logs of the SuperExec service: +6. Follow the logs of the SuperExec service: .. code-block:: bash - $ docker compose logs superexec -f + $ docker compose logs superexec -f -That is all it takes! You can monitor the progress of the run through the logs of the SuperExec. +That is all it takes! You can monitor the progress of the run through the logs of the +SuperExec. Run a Different Quickstart Example ---------------------------------- -To run a different quickstart example, such as ``quickstart-tensorflow``, first, shut down the Docker -Compose services of the current example: +To run a different quickstart example, such as ``quickstart-tensorflow``, first, shut +down the Docker Compose services of the current example: .. code-block:: bash - $ docker compose down + $ docker compose down After that, you can repeat the steps above. @@ -92,31 +96,32 @@ Limitations ----------- .. list-table:: - :header-rows: 1 - - * - Quickstart Example - - Limitations - * - quickstart-fastai - - None - * - quickstart-huggingface - - None - * - quickstart-jax - - The example has not yet been updated to work with the latest ``flwr`` version. - * - quickstart-mlcube - - The example has not yet been updated to work with the latest ``flwr`` version. - * - quickstart-mlx - - `Requires to run on macOS with Apple Silicon `_. - * - quickstart-monai - - None - * - quickstart-pandas - - None - * - quickstart-pytorch-lightning - - Requires an older pip version that is not supported by the Flower Docker images. - * - quickstart-pytorch - - None - * - quickstart-sklearn-tabular - - None - * - quickstart-tabnet - - The example has not yet been updated to work with the latest ``flwr`` version. - * - quickstart-tensorflow - - Only runs on AMD64. + :header-rows: 1 + + - - Quickstart Example + - Limitations + - - quickstart-fastai + - None + - - quickstart-huggingface + - None + - - quickstart-jax + - The example has not yet been updated to work with the latest ``flwr`` version. + - - quickstart-mlcube + - The example has not yet been updated to work with the latest ``flwr`` version. + - - quickstart-mlx + - `Requires to run on macOS with Apple Silicon + `_. + - - quickstart-monai + - None + - - quickstart-pandas + - None + - - quickstart-pytorch-lightning + - Requires an older pip version that is not supported by the Flower Docker images. + - - quickstart-pytorch + - None + - - quickstart-sklearn-tabular + - None + - - quickstart-tabnet + - The example has not yet been updated to work with the latest ``flwr`` version. + - - quickstart-tensorflow + - Only runs on AMD64. diff --git a/doc/source/docker/set-environment-variables.rst b/doc/source/docker/set-environment-variables.rst index ff8d6dde0a29..f5d860812bab 100644 --- a/doc/source/docker/set-environment-variables.rst +++ b/doc/source/docker/set-environment-variables.rst @@ -8,7 +8,7 @@ Example ------- .. code-block:: bash - :substitutions: + :substitutions: - $ docker run -e FLWR_TELEMETRY_ENABLED=0 -e FLWR_TELEMETRY_LOGGING=0 \ - --rm flwr/superlink:|stable_flwr_version| + $ docker run -e FLWR_TELEMETRY_ENABLED=0 -e FLWR_TELEMETRY_LOGGING=0 \ + --rm flwr/superlink:|stable_flwr_version| diff --git a/doc/source/docker/tutorial-deploy-on-multiple-machines.rst b/doc/source/docker/tutorial-deploy-on-multiple-machines.rst index 7b6cec8292eb..72958c926ba9 100644 --- a/doc/source/docker/tutorial-deploy-on-multiple-machines.rst +++ b/doc/source/docker/tutorial-deploy-on-multiple-machines.rst @@ -1,191 +1,171 @@ -######################################################## - Deploy Flower on Multiple Machines with Docker Compose -######################################################## - -This guide will help you set up a Flower project on multiple machines -using Docker Compose. - -You will learn how to run the Flower client and server components on two -separate machines, with Flower configured to use TLS encryption and -persist SuperLink state across restarts. A server consists of a -SuperLink and ``SuperExec``. For more details about the Flower -architecture, refer to the :doc:`../explanation-flower-architecture` +Deploy Flower on Multiple Machines with Docker Compose +====================================================== + +This guide will help you set up a Flower project on multiple machines using Docker +Compose. + +You will learn how to run the Flower client and server components on two separate +machines, with Flower configured to use TLS encryption and persist SuperLink state +across restarts. A server consists of a SuperLink and ``SuperExec``. For more details +about the Flower architecture, refer to the :doc:`../explanation-flower-architecture` explainer page. -This guide assumes you have completed the -:doc:`tutorial-quickstart-docker-compose` tutorial. It is highly -recommended that you follow and understand the contents of that tutorial -before proceeding with this guide. +This guide assumes you have completed the :doc:`tutorial-quickstart-docker-compose` +tutorial. It is highly recommended that you follow and understand the contents of that +tutorial before proceeding with this guide. -*************** - Prerequisites -*************** +Prerequisites +------------- Before you begin, make sure you have the following prerequisites: -- The ``flwr`` CLI is :doc:`installed <../how-to-install-flower>` - locally. -- The Docker daemon is running on your local machine and the remote - machine. -- Docker Compose V2 is installed on both your local machine and the - remote machine. -- You can connect to the remote machine from your local machine. -- Ports ``9091`` and ``9093`` are accessible on the remote machine. +- The ``flwr`` CLI is :doc:`installed <../how-to-install-flower>` locally. +- The Docker daemon is running on your local machine and the remote machine. +- Docker Compose V2 is installed on both your local machine and the remote machine. +- You can connect to the remote machine from your local machine. +- Ports ``9091`` and ``9093`` are accessible on the remote machine. .. note:: - The guide uses the |quickstart_sklearn_tabular|_ example as an - example project. + The guide uses the |quickstart_sklearn_tabular|_ example as an example project. - If your project has a different name or location, please remember to - adjust the commands/paths accordingly. + If your project has a different name or location, please remember to adjust the + commands/paths accordingly. -**************** - Step 1: Set Up -**************** +Step 1: Set Up +-------------- -#. Clone the Flower repository and change to the ``distributed`` - directory: +1. Clone the Flower repository and change to the ``distributed`` directory: - .. code:: bash + .. code-block:: bash - $ git clone --depth=1 https://github.com/adap/flower.git - $ cd flower/src/docker/distributed + $ git clone --depth=1 https://github.com/adap/flower.git + $ cd flower/src/docker/distributed -#. Get the IP address from the remote machine and save it for later. - -#. Use the ``certs.yml`` Compose file to generate your own self-signed - certificates. If you have certificates, you can continue with Step 2. +2. Get the IP address from the remote machine and save it for later. +3. Use the ``certs.yml`` Compose file to generate your own self-signed certificates. If + you have certificates, you can continue with Step 2. .. important:: - These certificates should be used only for development purposes. + These certificates should be used only for development purposes. - For production environments, you may have to use dedicated - services to obtain your certificates. + For production environments, you may have to use dedicated services to obtain + your certificates. - First, set the environment variables ``SUPERLINK_IP`` and - ``SUPEREXEC_IP`` with the IP address from the remote machine. For - example, if the IP is ``192.168.2.33``, execute: + First, set the environment variables ``SUPERLINK_IP`` and ``SUPEREXEC_IP`` with the + IP address from the remote machine. For example, if the IP is ``192.168.2.33``, + execute: - .. code:: bash + .. code-block:: bash - $ export SUPERLINK_IP=192.168.2.33 - $ export SUPEREXEC_IP=192.168.2.33 + $ export SUPERLINK_IP=192.168.2.33 + $ export SUPEREXEC_IP=192.168.2.33 Next, generate the self-signed certificates: - .. code:: bash + .. code-block:: bash - $ docker compose -f certs.yml -f ../complete/certs.yml up --build + $ docker compose -f certs.yml -f ../complete/certs.yml up --build -*************************************** - Step 2: Copy the Server Compose Files -*************************************** +Step 2: Copy the Server Compose Files +------------------------------------- -Use the method that works best for you to copy the ``server`` directory, -the certificates, and your Flower project to the remote machine. +Use the method that works best for you to copy the ``server`` directory, the +certificates, and your Flower project to the remote machine. For example, you can use ``scp`` to copy the directories: -.. code:: bash +.. code-block:: bash - $ scp -r ./server \ - ./superexec-certificates \ - ./superlink-certificates \ - ../../../examples/quickstart-sklearn-tabular remote:~/distributed + $ scp -r ./server \ + ./superexec-certificates \ + ./superlink-certificates \ + ../../../examples/quickstart-sklearn-tabular remote:~/distributed -******************************************** - Step 3: Start the Flower Server Components -******************************************** +Step 3: Start the Flower Server Components +------------------------------------------ -Log into the remote machine using ``ssh`` and run the following command -to start the SuperLink and SuperExec services: +Log into the remote machine using ``ssh`` and run the following command to start the +SuperLink and SuperExec services: -.. code:: bash +.. code-block:: bash - $ ssh - # In your remote machine - $ cd - $ export PROJECT_DIR=../quickstart-sklearn-tabular - $ docker compose -f server/compose.yml up --build -d + $ ssh + # In your remote machine + $ cd + $ export PROJECT_DIR=../quickstart-sklearn-tabular + $ docker compose -f server/compose.yml up --build -d .. note:: - The Path of the ``PROJECT_DIR`` should be relative to the location of - the ``server`` Docker Compose files. + The Path of the ``PROJECT_DIR`` should be relative to the location of the ``server`` + Docker Compose files. Go back to your terminal on your local machine. -******************************************** - Step 4: Start the Flower Client Components -******************************************** +Step 4: Start the Flower Client Components +------------------------------------------ -On your local machine, run the following command to start the client -components: +On your local machine, run the following command to start the client components: -.. code:: bash +.. code-block:: bash - # In the `docker/distributed` directory - $ export PROJECT_DIR=../../../../examples/quickstart-sklearn-tabular - $ docker compose -f client/compose.yml up --build -d + # In the `docker/distributed` directory + $ export PROJECT_DIR=../../../../examples/quickstart-sklearn-tabular + $ docker compose -f client/compose.yml up --build -d .. note:: - The Path of the ``PROJECT_DIR`` should be relative to the location of - the ``client`` Docker Compose files. + The Path of the ``PROJECT_DIR`` should be relative to the location of the ``client`` + Docker Compose files. -********************************* - Step 5: Run Your Flower Project -********************************* +Step 5: Run Your Flower Project +------------------------------- -Specify the remote SuperExec IP addresses and the path to the root -certificate in the ``[tool.flwr.federations.remote-superexec]`` table in -the ``pyproject.toml`` file. Here, we have named our remote federation -``remote-superexec``: +Specify the remote SuperExec IP addresses and the path to the root certificate in the +``[tool.flwr.federations.remote-superexec]`` table in the ``pyproject.toml`` file. Here, +we have named our remote federation ``remote-superexec``: -.. code:: toml - :caption: examples/quickstart-sklearn-tabular/pyproject.toml +.. code-block:: toml + :caption: examples/quickstart-sklearn-tabular/pyproject.toml - [tool.flwr.federations.remote-superexec] - address = "192.168.2.33:9093" - root-certificates = "../../src/docker/distributed/superexec-certificates/ca.crt" + [tool.flwr.federations.remote-superexec] + address = "192.168.2.33:9093" + root-certificates = "../../src/docker/distributed/superexec-certificates/ca.crt" .. note:: - The Path of the ``root-certificates`` should be relative to the - location of the ``pyproject.toml`` file. + The Path of the ``root-certificates`` should be relative to the location of the + ``pyproject.toml`` file. To run the project, execute: -.. code:: bash +.. code-block:: bash - $ flwr run ../../../examples/quickstart-sklearn-tabular remote-superexec + $ flwr run ../../../examples/quickstart-sklearn-tabular remote-superexec -That's it! With these steps, you've set up Flower on two separate -machines and are ready to start using it. +That's it! With these steps, you've set up Flower on two separate machines and are ready +to start using it. -****************** - Step 6: Clean Up -****************** +Step 6: Clean Up +---------------- Shut down the Flower client components: -.. code:: bash +.. code-block:: bash - # In the `docker/distributed` directory - $ docker compose -f client/compose.yml down + # In the `docker/distributed` directory + $ docker compose -f client/compose.yml down Shut down the Flower server components and delete the SuperLink state: -.. code:: bash - - $ ssh - $ cd - $ docker compose -f server/compose.yml down -v +.. code-block:: bash -.. |quickstart_sklearn_tabular| replace:: + $ ssh + $ cd + $ docker compose -f server/compose.yml down -v - ``examples/quickstart-sklearn-tabular`` +.. |quickstart_sklearn_tabular| replace:: ``examples/quickstart-sklearn-tabular`` .. _quickstart_sklearn_tabular: https://github.com/adap/flower/tree/main/examples/quickstart-sklearn-tabular diff --git a/doc/source/docker/tutorial-quickstart-docker-compose.rst b/doc/source/docker/tutorial-quickstart-docker-compose.rst index 7aeae1e2fb6b..bff3125c1b16 100644 --- a/doc/source/docker/tutorial-quickstart-docker-compose.rst +++ b/doc/source/docker/tutorial-quickstart-docker-compose.rst @@ -2,11 +2,12 @@ Quickstart with Docker Compose ============================== This quickstart shows you how to set up Flower using Docker Compose in a single command, -allowing you to focus on developing your application without worrying about the underlying -infrastructure. +allowing you to focus on developing your application without worrying about the +underlying infrastructure. -You will also learn how to easily enable TLS encryption and persist application state locally, -giving you the freedom to choose the configuration that best suits your project's needs. +You will also learn how to easily enable TLS encryption and persist application state +locally, giving you the freedom to choose the configuration that best suits your +project's needs. Prerequisites ------------- @@ -20,55 +21,56 @@ Before you start, make sure that: Step 1: Set Up -------------- -#. Clone the Docker Compose ``complete`` directory: +1. Clone the Docker Compose ``complete`` directory: .. code-block:: bash - $ git clone --depth=1 https://github.com/adap/flower.git _tmp \ - && mv _tmp/src/docker/complete . \ - && rm -rf _tmp && cd complete + $ git clone --depth=1 https://github.com/adap/flower.git _tmp \ + && mv _tmp/src/docker/complete . \ + && rm -rf _tmp && cd complete -#. Create a new Flower project (PyTorch): +2. Create a new Flower project (PyTorch): .. code-block:: bash - $ flwr new quickstart-compose --framework PyTorch --username flower + $ flwr new quickstart-compose --framework PyTorch --username flower -#. Export the path of the newly created project. The path should be relative to the location of the - Docker Compose files: +3. Export the path of the newly created project. The path should be relative to the + location of the Docker Compose files: .. code-block:: bash - $ export PROJECT_DIR=quickstart-compose + $ export PROJECT_DIR=quickstart-compose - Setting the ``PROJECT_DIR`` helps Docker Compose locate the ``pyproject.toml`` file, allowing - it to install dependencies in the SuperExec and SuperNode images correctly. + Setting the ``PROJECT_DIR`` helps Docker Compose locate the ``pyproject.toml`` file, + allowing it to install dependencies in the SuperExec and SuperNode images correctly. Step 2: Run Flower in Insecure Mode ----------------------------------- -To begin, start Flower with the most basic configuration. In this setup, Flower -will run without TLS and without persisting the state. +To begin, start Flower with the most basic configuration. In this setup, Flower will run +without TLS and without persisting the state. .. note:: - Without TLS, the data sent between the services remains **unencrypted**. Use it only for development - purposes. + Without TLS, the data sent between the services remains **unencrypted**. Use it only + for development purposes. - For production-oriented use cases, :ref:`enable TLS` for secure data transmission. + For production-oriented use cases, :ref:`enable TLS` for secure data + transmission. Open your terminal and run: .. code-block:: bash - $ docker compose -f compose.yml up --build -d + $ docker compose -f compose.yml up --build -d .. dropdown:: Understand the command - * ``docker compose``: The Docker command to run the Docker Compose tool. - * ``-f compose.yml``: Specify the YAML file that contains the basic Flower service definitions. - * ``--build``: Rebuild the images for each service if they don't already exist. - * ``-d``: Detach the containers from the terminal and run them in the background. + * ``docker compose``: The Docker command to run the Docker Compose tool. + * ``-f compose.yml``: Specify the YAML file that contains the basic Flower service definitions. + * ``--build``: Rebuild the images for each service if they don't already exist. + * ``-d``: Detach the containers from the terminal and run them in the background. Step 3: Run the Quickstart Project ---------------------------------- @@ -76,316 +78,321 @@ Step 3: Run the Quickstart Project Now that the Flower services have been started via Docker Compose, it is time to run the quickstart example. -To ensure the ``flwr`` CLI connects to the SuperExec, you need to specify the SuperExec addresses -in the ``pyproject.toml`` file. +To ensure the ``flwr`` CLI connects to the SuperExec, you need to specify the SuperExec +addresses in the ``pyproject.toml`` file. -#. Add the following lines to the ``quickstart-compose/pyproject.toml``: +1. Add the following lines to the ``quickstart-compose/pyproject.toml``: .. code-block:: toml - :caption: quickstart-compose/pyproject.toml + :caption: quickstart-compose/pyproject.toml - [tool.flwr.federations.docker-compose] - address = "127.0.0.1:9093" - insecure = true + [tool.flwr.federations.docker-compose] + address = "127.0.0.1:9093" + insecure = true -#. Execute the command to run the quickstart example: +2. Execute the command to run the quickstart example: .. code-block:: bash - $ flwr run quickstart-compose docker-compose + $ flwr run quickstart-compose docker-compose -#. Monitor the SuperExec logs and wait for the summary to appear: +3. Monitor the SuperExec logs and wait for the summary to appear: .. code-block:: bash - $ docker compose logs superexec -f + $ docker compose logs superexec -f Step 4: Update the Application ------------------------------ In the next step, change the application code. -#. For example, go to the ``task.py`` file in the ``quickstart-compose/quickstart_compose/`` - directory and add a ``print`` call in the ``get_weights`` function: +1. For example, go to the ``task.py`` file in the + ``quickstart-compose/quickstart_compose/`` directory and add a ``print`` call in the + ``get_weights`` function: .. code-block:: python - :caption: quickstart-compose/quickstart_compose/task.py + :caption: quickstart-compose/quickstart_compose/task.py - # ... - def get_weights(net): - print("Get weights") - return [val.cpu().numpy() for _, val in net.state_dict().items()] - # ... + # ... + def get_weights(net): + print("Get weights") + return [val.cpu().numpy() for _, val in net.state_dict().items()] -#. Rebuild and restart the services. + + # ... + +2. Rebuild and restart the services. .. note:: - If you have modified the dependencies listed in your ``pyproject.toml`` file, it is essential - to rebuild images. + If you have modified the dependencies listed in your ``pyproject.toml`` file, it + is essential to rebuild images. - If you haven't made any changes, you can skip this step. + If you haven't made any changes, you can skip this step. Run the following command to rebuild and restart the services: .. code-block:: bash - $ docker compose -f compose.yml up --build -d + $ docker compose -f compose.yml up --build -d -#. Run the updated quickstart example: +3. Run the updated quickstart example: .. code-block:: bash - $ flwr run quickstart-compose docker-compose - $ docker compose logs superexec -f + $ flwr run quickstart-compose docker-compose + $ docker compose logs superexec -f In the SuperExec logs, you should find the ``Get weights`` line: .. code-block:: - :emphasize-lines: 9 - - superexec-1 | INFO : Starting Flower SuperExec - superexec-1 | WARNING : Option `--insecure` was set. Starting insecure HTTP server. - superexec-1 | INFO : Starting Flower SuperExec gRPC server on 0.0.0.0:9093 - superexec-1 | INFO : ExecServicer.StartRun - superexec-1 | 🎊 Successfully installed quickstart-compose to /app/.flwr/apps/flower/quickstart-compose/1.0.0. - superexec-1 | INFO : Created run -6767165609169293507 - superexec-1 | INFO : Started run -6767165609169293507 - superexec-1 | WARNING : Option `--insecure` was set. Starting insecure HTTP client connected to superlink:9091. - superexec-1 | Get weights - superexec-1 | INFO : Starting Flower ServerApp, config: num_rounds=3, no round_timeout + :emphasize-lines: 9 + + superexec-1 | INFO : Starting Flower SuperExec + superexec-1 | WARNING : Option `--insecure` was set. Starting insecure HTTP server. + superexec-1 | INFO : Starting Flower SuperExec gRPC server on 0.0.0.0:9093 + superexec-1 | INFO : ExecServicer.StartRun + superexec-1 | 🎊 Successfully installed quickstart-compose to /app/.flwr/apps/flower/quickstart-compose/1.0.0. + superexec-1 | INFO : Created run -6767165609169293507 + superexec-1 | INFO : Started run -6767165609169293507 + superexec-1 | WARNING : Option `--insecure` was set. Starting insecure HTTP client connected to superlink:9091. + superexec-1 | Get weights + superexec-1 | INFO : Starting Flower ServerApp, config: num_rounds=3, no round_timeout Step 5: Persisting the SuperLink State -------------------------------------- -In this step, Flower services are configured to persist the state of the SuperLink service, -ensuring that it maintains its state even after a restart. +In this step, Flower services are configured to persist the state of the SuperLink +service, ensuring that it maintains its state even after a restart. .. note:: - When working with Docker Compose on Linux, you may need to create the ``state`` directory first - and change its ownership to ensure proper access and permissions. + When working with Docker Compose on Linux, you may need to create the ``state`` + directory first and change its ownership to ensure proper access and permissions. For more information, consult the following page: :doc:`persist-superlink-state`. -#. Run the command: +1. Run the command: .. code-block:: bash - $ docker compose -f compose.yml -f with-state.yml up --build -d + $ docker compose -f compose.yml -f with-state.yml up --build -d .. dropdown:: Understand the command - * ``docker compose``: The Docker command to run the Docker Compose tool. - * ``-f compose.yml``: Specify the YAML file that contains the basic Flower service definitions. - * | ``-f with-state.yml``: Specifies the path to an additional Docker Compose file that - | contains the configuration for persisting the SuperLink state. - | - | Docker merges Compose files according to `merging rules `_. - * ``--build``: Rebuild the images for each service if they don't already exist. - * ``-d``: Detach the containers from the terminal and run them in the background. + * ``docker compose``: The Docker command to run the Docker Compose tool. + * ``-f compose.yml``: Specify the YAML file that contains the basic Flower service definitions. + * | ``-f with-state.yml``: Specifies the path to an additional Docker Compose file that + | contains the configuration for persisting the SuperLink state. + | + | Docker merges Compose files according to `merging rules `_. + * ``--build``: Rebuild the images for each service if they don't already exist. + * ``-d``: Detach the containers from the terminal and run them in the background. -#. Rerun the ``quickstart-compose`` project: +2. Rerun the ``quickstart-compose`` project: .. code-block:: bash - $ flwr run quickstart-compose docker-compose + $ flwr run quickstart-compose docker-compose -#. Check the content of the ``state`` directory: +3. Check the content of the ``state`` directory: .. code-block:: bash - $ ls state/ - state.db + $ ls state/ + state.db - You should see a ``state.db`` file in the ``state`` directory. If you restart the service, the - state file will be used to restore the state from the previously saved data. This ensures that - the data persists even if the containers are stopped and started again. + You should see a ``state.db`` file in the ``state`` directory. If you restart the + service, the state file will be used to restore the state from the previously saved + data. This ensures that the data persists even if the containers are stopped and + started again. -.. _TLS: +.. _tls: Step 6: Run Flower with TLS --------------------------- -#. To demonstrate how to enable TLS, generate self-signed certificates using the ``certs.yml`` - Compose file. +1. To demonstrate how to enable TLS, generate self-signed certificates using the + ``certs.yml`` Compose file. .. important:: - These certificates should be used only for development purposes. + These certificates should be used only for development purposes. - For production environments, use a service like `Let's Encrypt `_ - to obtain your certificates. + For production environments, use a service like `Let's Encrypt + `_ to obtain your certificates. Run the command: .. code-block:: bash - $ docker compose -f certs.yml up --build + $ docker compose -f certs.yml up --build -#. Add the following lines to the ``quickstart-compose/pyproject.toml``: +2. Add the following lines to the ``quickstart-compose/pyproject.toml``: .. code-block:: toml - :caption: quickstart-compose/pyproject.toml + :caption: quickstart-compose/pyproject.toml - [tool.flwr.federations.docker-compose-tls] - address = "127.0.0.1:9093" - root-certificates = "../superexec-certificates/ca.crt" + [tool.flwr.federations.docker-compose-tls] + address = "127.0.0.1:9093" + root-certificates = "../superexec-certificates/ca.crt" -#. Restart the services with TLS enabled: +3. Restart the services with TLS enabled: .. code-block:: bash - $ docker compose -f compose.yml -f with-tls.yml up --build -d + $ docker compose -f compose.yml -f with-tls.yml up --build -d -#. Rerun the ``quickstart-compose`` project: +4. Rerun the ``quickstart-compose`` project: .. code-block:: bash - $ flwr run quickstart-compose docker-compose-tls - $ docker compose logs superexec -f + $ flwr run quickstart-compose docker-compose-tls + $ docker compose logs superexec -f Step 7: Add another SuperNode ----------------------------- -You can add more SuperNodes and ClientApps by duplicating their definitions in the ``compose.yml`` -file. +You can add more SuperNodes and ClientApps by duplicating their definitions in the +``compose.yml`` file. -Just give each new SuperNode and ClientApp service a unique service name like ``supernode-3``, -``clientapp-3``, etc. +Just give each new SuperNode and ClientApp service a unique service name like +``supernode-3``, ``clientapp-3``, etc. In ``compose.yml``, add the following: .. code-block:: yaml - :caption: compose.yml - :substitutions: - - # other service definitions - - supernode-3: - image: flwr/supernode:${FLWR_VERSION:-|stable_flwr_version|} - command: - - --insecure - - --superlink - - superlink:9092 - - --supernode-address - - 0.0.0.0:9096 - - --isolation - - process - - --node-config - - "partition-id=1 num-partitions=2" - depends_on: - - superlink - - clientapp-3: - build: - context: ${PROJECT_DIR:-.} - dockerfile_inline: | - FROM flwr/clientapp:${FLWR_VERSION:-|stable_flwr_version|} - - USER root - RUN apt-get update \ - && apt-get -y --no-install-recommends install \ - build-essential \ - && rm -rf /var/lib/apt/lists/* - USER app - - WORKDIR /app - COPY --chown=app:app pyproject.toml . - RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ - && python -m pip install -U --no-cache-dir . - - ENTRYPOINT ["flwr-clientapp"] - command: - - --supernode - - supernode-3:9096 - deploy: - resources: - limits: - cpus: "2" - stop_signal: SIGINT - depends_on: - - supernode-3 - -If you also want to enable TLS for the new SuperNodes, duplicate the SuperNode definition for -each new SuperNode service in the ``with-tls.yml`` file. + :caption: compose.yml + :substitutions: + + # other service definitions + + supernode-3: + image: flwr/supernode:${FLWR_VERSION:-|stable_flwr_version|} + command: + - --insecure + - --superlink + - superlink:9092 + - --supernode-address + - 0.0.0.0:9096 + - --isolation + - process + - --node-config + - "partition-id=1 num-partitions=2" + depends_on: + - superlink + + clientapp-3: + build: + context: ${PROJECT_DIR:-.} + dockerfile_inline: | + FROM flwr/clientapp:${FLWR_VERSION:-|stable_flwr_version|} + + USER root + RUN apt-get update \ + && apt-get -y --no-install-recommends install \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + USER app + + WORKDIR /app + COPY --chown=app:app pyproject.toml . + RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ + && python -m pip install -U --no-cache-dir . + + ENTRYPOINT ["flwr-clientapp"] + command: + - --supernode + - supernode-3:9096 + deploy: + resources: + limits: + cpus: "2" + stop_signal: SIGINT + depends_on: + - supernode-3 + +If you also want to enable TLS for the new SuperNodes, duplicate the SuperNode +definition for each new SuperNode service in the ``with-tls.yml`` file. Make sure that the names of the services match with the one in the ``compose.yml`` file. In ``with-tls.yml``, add the following: .. code-block:: yaml - :caption: with-tls.yml - - # other service definitions - - supernode-3: - command: - - --superlink - - superlink:9092 - - --supernode-address - - 0.0.0.0:9096 - - --isolation - - process - - --node-config - - "partition-id=1 num-partitions=2" - - --root-certificates - - certificates/ca.crt - secrets: - - source: superlink-ca-certfile - target: /app/certificates/ca.crt + :caption: with-tls.yml + + # other service definitions + + supernode-3: + command: + - --superlink + - superlink:9092 + - --supernode-address + - 0.0.0.0:9096 + - --isolation + - process + - --node-config + - "partition-id=1 num-partitions=2" + - --root-certificates + - certificates/ca.crt + secrets: + - source: superlink-ca-certfile + target: /app/certificates/ca.crt Step 8: Persisting the SuperLink State and Enabling TLS ------------------------------------------------------- -To run Flower with persisted SuperLink state and enabled TLS, a slight change in the ``with-state.yml`` -file is required: +To run Flower with persisted SuperLink state and enabled TLS, a slight change in the +``with-state.yml`` file is required: -#. Comment out the lines 2-4 and uncomment the lines 5-9: +1. Comment out the lines 2-4 and uncomment the lines 5-9: .. code-block:: yaml - :caption: with-state.yml - :linenos: - :emphasize-lines: 2-9 - - superlink: - # command: - # - --insecure - # - --database=state/state.db - command: - - --ssl-ca-certfile=certificates/ca.crt - - --ssl-certfile=certificates/server.pem - - --ssl-keyfile=certificates/server.key - - --database=state/state.db - volumes: - - ./state/:/app/state/:rw - -#. Restart the services: + :caption: with-state.yml + :linenos: + :emphasize-lines: 2-9 + + superlink: + # command: + # - --insecure + # - --database=state/state.db + command: + - --ssl-ca-certfile=certificates/ca.crt + - --ssl-certfile=certificates/server.pem + - --ssl-keyfile=certificates/server.key + - --database=state/state.db + volumes: + - ./state/:/app/state/:rw + +2. Restart the services: .. code-block:: bash - $ docker compose -f compose.yml -f with-tls.yml -f with-state.yml up --build -d + $ docker compose -f compose.yml -f with-tls.yml -f with-state.yml up --build -d -#. Rerun the ``quickstart-compose`` project: +3. Rerun the ``quickstart-compose`` project: .. code-block:: bash - $ flwr run quickstart-compose docker-compose-tls - $ docker compose logs superexec -f + $ flwr run quickstart-compose docker-compose-tls + $ docker compose logs superexec -f Step 9: Merge Multiple Compose Files ------------------------------------ -You can merge multiple Compose files into a single file. For instance, if you wish to combine -the basic configuration with the TLS configuration, execute the following command: +You can merge multiple Compose files into a single file. For instance, if you wish to +combine the basic configuration with the TLS configuration, execute the following +command: .. code-block:: bash - $ docker compose -f compose.yml \ - -f with-tls.yml config --no-path-resolution > my_compose.yml + $ docker compose -f compose.yml \ + -f with-tls.yml config --no-path-resolution > my_compose.yml -This will merge the contents of ``compose.yml`` and ``with-tls.yml`` into a new file called -``my_compose.yml``. +This will merge the contents of ``compose.yml`` and ``with-tls.yml`` into a new file +called ``my_compose.yml``. Step 10: Clean Up ----------------- @@ -394,10 +401,10 @@ Remove all services and volumes: .. code-block:: bash - $ docker compose down -v - $ docker compose -f certs.yml down -v + $ docker compose down -v + $ docker compose -f certs.yml down -v Where to Go Next ---------------- -* :doc:`run-quickstart-examples-docker-compose` +- :doc:`run-quickstart-examples-docker-compose` diff --git a/doc/source/docker/tutorial-quickstart-docker.rst b/doc/source/docker/tutorial-quickstart-docker.rst index 189d019cb097..993754dcf109 100644 --- a/doc/source/docker/tutorial-quickstart-docker.rst +++ b/doc/source/docker/tutorial-quickstart-docker.rst @@ -1,11 +1,11 @@ Quickstart with Docker ====================== -This quickstart aims to guide you through the process of containerizing a Flower project and -running it end to end using Docker on your local machine. +This quickstart aims to guide you through the process of containerizing a Flower project +and running it end to end using Docker on your local machine. -This tutorial does not use production-ready settings, so you can focus on understanding the basic -workflow that uses the minimum configurations. +This tutorial does not use production-ready settings, so you can focus on understanding +the basic workflow that uses the minimum configurations. Prerequisites ------------- @@ -18,33 +18,33 @@ Before you start, make sure that: Step 1: Set Up -------------- -#. Create a new Flower project (PyTorch): +1. Create a new Flower project (PyTorch): .. code-block:: bash - $ flwr new quickstart-docker --framework PyTorch --username flower + $ flwr new quickstart-docker --framework PyTorch --username flower - 🔨 Creating Flower project quickstart-docker... - 🎊 Project creation successful. + 🔨 Creating Flower project quickstart-docker... + 🎊 Project creation successful. - Use the following command to run your project: + Use the following command to run your project: - cd quickstart-docker - pip install -e . - flwr run + cd quickstart-docker + pip install -e . + flwr run - $ cd quickstart-docker - $ pip install -e . + $ cd quickstart-docker + $ pip install -e . -#. Create a new Docker bridge network called ``flwr-network``: +2. Create a new Docker bridge network called ``flwr-network``: .. code-block:: bash - $ docker network create --driver bridge flwr-network + $ docker network create --driver bridge flwr-network - User-defined networks, such as ``flwr-network``, enable IP resolution of container names, a feature - absent in the default bridge network. This simplifies quickstart example by avoiding the need to - determine host IP first. + User-defined networks, such as ``flwr-network``, enable IP resolution of container + names, a feature absent in the default bridge network. This simplifies quickstart + example by avoiding the need to determine host IP first. Step 2: Start the SuperLink --------------------------- @@ -52,334 +52,339 @@ Step 2: Start the SuperLink Open your terminal and run: .. code-block:: bash - :substitutions: + :substitutions: - $ docker run --rm \ - -p 9091:9091 -p 9092:9092 \ - --network flwr-network \ - --name superlink \ - --detach \ - flwr/superlink:|stable_flwr_version| --insecure + $ docker run --rm \ + -p 9091:9091 -p 9092:9092 \ + --network flwr-network \ + --name superlink \ + --detach \ + flwr/superlink:|stable_flwr_version| --insecure .. dropdown:: Understand the command - * ``docker run``: This tells Docker to run a container from an image. - * ``--rm``: Remove the container once it is stopped or the command exits. - * | ``-p 9091:9091 -p 9092:9092``: Map port ``9091`` and ``9092`` of the container to the same port of - | the host machine, allowing other services to access the Driver API on - | ``http://localhost:9091`` and the Fleet API on ``http://localhost:9092``. - * ``--network flwr-network``: Make the container join the network named ``flwr-network``. - * ``--name superlink``: Assign the name ``superlink`` to the container. - * ``--detach``: Run the container in the background, freeing up the terminal. - * | :substitution-code:`flwr/superlink:|stable_flwr_version|`: The name of the image to be run and the specific - | tag of the image. The tag :substitution-code:`|stable_flwr_version|` represents a :doc:`specific version ` of the image. - * | ``--insecure``: This flag tells the container to operate in an insecure mode, allowing - | unencrypted communication. + * ``docker run``: This tells Docker to run a container from an image. + * ``--rm``: Remove the container once it is stopped or the command exits. + * | ``-p 9091:9091 -p 9092:9092``: Map port ``9091`` and ``9092`` of the container to the same port of + | the host machine, allowing other services to access the Driver API on + | ``http://localhost:9091`` and the Fleet API on ``http://localhost:9092``. + * ``--network flwr-network``: Make the container join the network named ``flwr-network``. + * ``--name superlink``: Assign the name ``superlink`` to the container. + * ``--detach``: Run the container in the background, freeing up the terminal. + * | :substitution-code:`flwr/superlink:|stable_flwr_version|`: The name of the image to be run and the specific + | tag of the image. The tag :substitution-code:`|stable_flwr_version|` represents a :doc:`specific version ` of the image. + * | ``--insecure``: This flag tells the container to operate in an insecure mode, allowing + | unencrypted communication. Step 3: Start the SuperNode --------------------------- Start two SuperNode containers. -#. Start the first container: +1. Start the first container: .. code-block:: bash - :substitutions: - - $ docker run --rm \ - -p 9094:9094 \ - --network flwr-network \ - --name supernode-1 \ - --detach \ - flwr/supernode:|stable_flwr_version| \ - --insecure \ - --superlink superlink:9092 \ - --node-config "partition-id=0 num-partitions=2" \ - --supernode-address 0.0.0.0:9094 \ - --isolation process + :substitutions: + + $ docker run --rm \ + -p 9094:9094 \ + --network flwr-network \ + --name supernode-1 \ + --detach \ + flwr/supernode:|stable_flwr_version| \ + --insecure \ + --superlink superlink:9092 \ + --node-config "partition-id=0 num-partitions=2" \ + --supernode-address 0.0.0.0:9094 \ + --isolation process .. dropdown:: Understand the command - * ``docker run``: This tells Docker to run a container from an image. - * ``--rm``: Remove the container once it is stopped or the command exits. - * | ``-p 9094:9094``: Map port ``9094`` of the container to the same port of - | the host machine, allowing other services to access the SuperNode API on - | ``http://localhost:9094``. - * ``--network flwr-network``: Make the container join the network named ``flwr-network``. - * ``--name supernode-1``: Assign the name ``supernode-1`` to the container. - * ``--detach``: Run the container in the background, freeing up the terminal. - * | ``flwr/supernode:|stable_flwr_version|``: This is the name of the image to be run and the specific tag - | of the image. - * | ``--insecure``: This flag tells the container to operate in an insecure mode, allowing - | unencrypted communication. - * | ``--superlink superlink:9092``: Connect to the SuperLink's Fleet API at the address - | ``superlink:9092``. - * | ``--node-config "partition-id=0 num-partitions=2"``: Set the partition ID to ``0`` and the - | number of partitions to ``2`` for the SuperNode configuration. - * | ``--supernode-address 0.0.0.0:9094``: Set the address and port number that the SuperNode - | is listening on. - * | ``--isolation process``: Tells the SuperNode that the ClientApp is created by separate - | independent process. The SuperNode does not attempt to create it. - -#. Start the second container: + * ``docker run``: This tells Docker to run a container from an image. + * ``--rm``: Remove the container once it is stopped or the command exits. + * | ``-p 9094:9094``: Map port ``9094`` of the container to the same port of + | the host machine, allowing other services to access the SuperNode API on + | ``http://localhost:9094``. + * ``--network flwr-network``: Make the container join the network named ``flwr-network``. + * ``--name supernode-1``: Assign the name ``supernode-1`` to the container. + * ``--detach``: Run the container in the background, freeing up the terminal. + * | ``flwr/supernode:|stable_flwr_version|``: This is the name of the image to be run and the specific tag + | of the image. + * | ``--insecure``: This flag tells the container to operate in an insecure mode, allowing + | unencrypted communication. + * | ``--superlink superlink:9092``: Connect to the SuperLink's Fleet API at the address + | ``superlink:9092``. + * | ``--node-config "partition-id=0 num-partitions=2"``: Set the partition ID to ``0`` and the + | number of partitions to ``2`` for the SuperNode configuration. + * | ``--supernode-address 0.0.0.0:9094``: Set the address and port number that the SuperNode + | is listening on. + * | ``--isolation process``: Tells the SuperNode that the ClientApp is created by separate + | independent process. The SuperNode does not attempt to create it. + +2. Start the second container: .. code-block:: shell - :substitutions: - - $ docker run --rm \ - -p 9095:9095 \ - --network flwr-network \ - --name supernode-2 \ - --detach \ - flwr/supernode:|stable_flwr_version| \ - --insecure \ - --superlink superlink:9092 \ - --node-config "partition-id=1 num-partitions=2" \ - --supernode-address 0.0.0.0:9095 \ - --isolation process + :substitutions: + + $ docker run --rm \ + -p 9095:9095 \ + --network flwr-network \ + --name supernode-2 \ + --detach \ + flwr/supernode:|stable_flwr_version| \ + --insecure \ + --superlink superlink:9092 \ + --node-config "partition-id=1 num-partitions=2" \ + --supernode-address 0.0.0.0:9095 \ + --isolation process Step 4: Start the ClientApp --------------------------- -The ClientApp Docker image comes with a pre-installed version of Flower and serves as a base for -building your own ClientApp image. In order to install the FAB dependencies, you will need to create -a Dockerfile that extends the ClientApp image and installs the required dependencies. +The ClientApp Docker image comes with a pre-installed version of Flower and serves as a +base for building your own ClientApp image. In order to install the FAB dependencies, +you will need to create a Dockerfile that extends the ClientApp image and installs the +required dependencies. -#. Create a ClientApp Dockerfile called ``Dockerfile.clientapp`` and paste the following code into it: +1. Create a ClientApp Dockerfile called ``Dockerfile.clientapp`` and paste the following + code into it: .. code-block:: dockerfile - :caption: Dockerfile.clientapp - :linenos: - :substitutions: + :caption: Dockerfile.clientapp + :linenos: + :substitutions: - FROM flwr/clientapp:|stable_flwr_version| + FROM flwr/clientapp:|stable_flwr_version| - WORKDIR /app - COPY pyproject.toml . - RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ - && python -m pip install -U --no-cache-dir . + WORKDIR /app + COPY pyproject.toml . + RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ + && python -m pip install -U --no-cache-dir . - ENTRYPOINT ["flwr-clientapp"] + ENTRYPOINT ["flwr-clientapp"] .. dropdown:: Understand the Dockerfile - * | :substitution-code:`FROM flwr/clientapp:|stable_flwr_version|`: This line specifies that the Docker image - | to be built from is the ``flwr/clientapp image``, version :substitution-code:`|stable_flwr_version|`. - * | ``WORKDIR /app``: Set the working directory for the container to ``/app``. - | Any subsequent commands that reference a directory will be relative to this directory. - * | ``COPY pyproject.toml .``: Copy the ``pyproject.toml`` file - | from the current working directory into the container's ``/app`` directory. - * | ``RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml``: Remove the ``flwr`` dependency - | from the ``pyproject.toml``. - * | ``python -m pip install -U --no-cache-dir .``: Run the ``pip`` install command to - | install the dependencies defined in the ``pyproject.toml`` file - | - | The ``-U`` flag indicates that any existing packages should be upgraded, and - | ``--no-cache-dir`` prevents pip from using the cache to speed up the installation. - * | ``ENTRYPOINT ["flwr-clientapp"]``: Set the command ``flwr-clientapp`` to be - | the default command run when the container is started. + * | :substitution-code:`FROM flwr/clientapp:|stable_flwr_version|`: This line specifies that the Docker image + | to be built from is the ``flwr/clientapp image``, version :substitution-code:`|stable_flwr_version|`. + * | ``WORKDIR /app``: Set the working directory for the container to ``/app``. + | Any subsequent commands that reference a directory will be relative to this directory. + * | ``COPY pyproject.toml .``: Copy the ``pyproject.toml`` file + | from the current working directory into the container's ``/app`` directory. + * | ``RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml``: Remove the ``flwr`` dependency + | from the ``pyproject.toml``. + * | ``python -m pip install -U --no-cache-dir .``: Run the ``pip`` install command to + | install the dependencies defined in the ``pyproject.toml`` file + | + | The ``-U`` flag indicates that any existing packages should be upgraded, and + | ``--no-cache-dir`` prevents pip from using the cache to speed up the installation. + * | ``ENTRYPOINT ["flwr-clientapp"]``: Set the command ``flwr-clientapp`` to be + | the default command run when the container is started. .. important:: - Note that `flwr `__ is already installed in the ``flwr/clientapp`` - base image, so only other package dependencies such as ``flwr-datasets``, ``torch``, etc., - need to be installed. As a result, the ``flwr`` dependency is removed from the - ``pyproject.toml`` after it has been copied into the Docker image (see line 5). + Note that `flwr `__ is already installed in the + ``flwr/clientapp`` base image, so only other package dependencies such as + ``flwr-datasets``, ``torch``, etc., need to be installed. As a result, the + ``flwr`` dependency is removed from the ``pyproject.toml`` after it has been + copied into the Docker image (see line 5). -#. Next, build the ClientApp Docker image by running the following command in the directory where - the Dockerfile is located: +2. Next, build the ClientApp Docker image by running the following command in the + directory where the Dockerfile is located: .. code-block:: bash - $ docker build -f Dockerfile.clientapp -t flwr_clientapp:0.0.1 . + $ docker build -f Dockerfile.clientapp -t flwr_clientapp:0.0.1 . .. note:: - The image name was set as ``flwr_clientapp`` with the tag ``0.0.1``. Remember that - these values are merely examples, and you can customize them according to your requirements. + The image name was set as ``flwr_clientapp`` with the tag ``0.0.1``. Remember + that these values are merely examples, and you can customize them according to + your requirements. -#. Start the first ClientApp container: +3. Start the first ClientApp container: .. code-block:: bash - $ docker run --rm \ - --network flwr-network \ - --detach \ - flwr_clientapp:0.0.1 \ - --supernode supernode-1:9094 + $ docker run --rm \ + --network flwr-network \ + --detach \ + flwr_clientapp:0.0.1 \ + --supernode supernode-1:9094 .. dropdown:: Understand the command - * ``docker run``: This tells Docker to run a container from an image. - * ``--rm``: Remove the container once it is stopped or the command exits. - * ``--network flwr-network``: Make the container join the network named ``flwr-network``. - * ``--detach``: Run the container in the background, freeing up the terminal. - * | ``flwr_clientapp:0.0.1``: This is the name of the image to be run and the specific tag - | of the image. - * | ``--supernode supernode-1:9094``: Connect to the SuperNode's Fleet API at the address - | ``supernode-1:9094``. + * ``docker run``: This tells Docker to run a container from an image. + * ``--rm``: Remove the container once it is stopped or the command exits. + * ``--network flwr-network``: Make the container join the network named ``flwr-network``. + * ``--detach``: Run the container in the background, freeing up the terminal. + * | ``flwr_clientapp:0.0.1``: This is the name of the image to be run and the specific tag + | of the image. + * | ``--supernode supernode-1:9094``: Connect to the SuperNode's Fleet API at the address + | ``supernode-1:9094``. -#. Start the second ClientApp container: +4. Start the second ClientApp container: .. code-block:: shell - $ docker run --rm \ - --network flwr-network \ - --detach \ - flwr_clientapp:0.0.1 \ - --supernode supernode-2:9095 + $ docker run --rm \ + --network flwr-network \ + --detach \ + flwr_clientapp:0.0.1 \ + --supernode supernode-2:9095 Step 5: Start the SuperExec --------------------------- -The procedure for building and running a SuperExec image is almost identical to the ClientApp image. +The procedure for building and running a SuperExec image is almost identical to the +ClientApp image. -Similar to the ClientApp image, you will need to create a Dockerfile that extends the SuperExec -image and installs the required FAB dependencies. +Similar to the ClientApp image, you will need to create a Dockerfile that extends the +SuperExec image and installs the required FAB dependencies. -#. Create a SuperExec Dockerfile called ``Dockerfile.superexec`` and paste the following code in: +1. Create a SuperExec Dockerfile called ``Dockerfile.superexec`` and paste the following + code in: .. code-block:: dockerfile - :caption: Dockerfile.superexec - :substitutions: + :caption: Dockerfile.superexec + :substitutions: - FROM flwr/superexec:|stable_flwr_version| + FROM flwr/superexec:|stable_flwr_version| - WORKDIR /app + WORKDIR /app - COPY pyproject.toml . - RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ - && python -m pip install -U --no-cache-dir . + COPY pyproject.toml . + RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ + && python -m pip install -U --no-cache-dir . - ENTRYPOINT ["flower-superexec", "--executor", "flwr.superexec.deployment:executor"] + ENTRYPOINT ["flower-superexec", "--executor", "flwr.superexec.deployment:executor"] .. dropdown:: Understand the Dockerfile - * | :substitution-code:`FROM flwr/superexec:|stable_flwr_version|`: This line specifies that the Docker image - | to be built from is the ``flwr/superexec image``, version :substitution-code:`|stable_flwr_version|`. - * | ``WORKDIR /app``: Set the working directory for the container to ``/app``. - | Any subsequent commands that reference a directory will be relative to this directory. - * | ``COPY pyproject.toml .``: Copy the ``pyproject.toml`` file - | from the current working directory into the container's ``/app`` directory. - * | ``RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml``: Remove the ``flwr`` dependency - | from the ``pyproject.toml``. - * | ``python -m pip install -U --no-cache-dir .``: Run the ``pip`` install command to - | install the dependencies defined in the ``pyproject.toml`` file - | - | The ``-U`` flag indicates that any existing packages should be upgraded, and - | ``--no-cache-dir`` prevents pip from using the cache to speed up the installation. - * | ``ENTRYPOINT ["flower-superexec"``: Set the command ``flower-superexec`` to be - | the default command run when the container is started. - | - | ``"--executor", "flwr.superexec.deployment:executor"]`` Use the - | ``flwr.superexec.deployment:executor`` executor to run the ServerApps. - -#. Afterward, in the directory that holds the Dockerfile, execute this Docker command to + * | :substitution-code:`FROM flwr/superexec:|stable_flwr_version|`: This line specifies that the Docker image + | to be built from is the ``flwr/superexec image``, version :substitution-code:`|stable_flwr_version|`. + * | ``WORKDIR /app``: Set the working directory for the container to ``/app``. + | Any subsequent commands that reference a directory will be relative to this directory. + * | ``COPY pyproject.toml .``: Copy the ``pyproject.toml`` file + | from the current working directory into the container's ``/app`` directory. + * | ``RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml``: Remove the ``flwr`` dependency + | from the ``pyproject.toml``. + * | ``python -m pip install -U --no-cache-dir .``: Run the ``pip`` install command to + | install the dependencies defined in the ``pyproject.toml`` file + | + | The ``-U`` flag indicates that any existing packages should be upgraded, and + | ``--no-cache-dir`` prevents pip from using the cache to speed up the installation. + * | ``ENTRYPOINT ["flower-superexec"``: Set the command ``flower-superexec`` to be + | the default command run when the container is started. + | + | ``"--executor", "flwr.superexec.deployment:executor"]`` Use the + | ``flwr.superexec.deployment:executor`` executor to run the ServerApps. + +2. Afterward, in the directory that holds the Dockerfile, execute this Docker command to build the SuperExec image: .. code-block:: bash - $ docker build -f Dockerfile.superexec -t flwr_superexec:0.0.1 . + $ docker build -f Dockerfile.superexec -t flwr_superexec:0.0.1 . - -#. Start the SuperExec container: +3. Start the SuperExec container: .. code-block:: bash - $ docker run --rm \ - -p 9093:9093 \ - --network flwr-network \ - --name superexec \ - --detach \ - flwr_superexec:0.0.1 \ - --insecure \ - --executor-config superlink=\"superlink:9091\" + $ docker run --rm \ + -p 9093:9093 \ + --network flwr-network \ + --name superexec \ + --detach \ + flwr_superexec:0.0.1 \ + --insecure \ + --executor-config superlink=\"superlink:9091\" .. dropdown:: Understand the command - * ``docker run``: This tells Docker to run a container from an image. - * ``--rm``: Remove the container once it is stopped or the command exits. - * | ``-p 9093:9093``: Map port ``9093`` of the container to the same port of - | the host machine, allowing you to access the SuperExec API on ``http://localhost:9093``. - * ``--network flwr-network``: Make the container join the network named ``flwr-network``. - * ``--name superexec``: Assign the name ``superexec`` to the container. - * ``--detach``: Run the container in the background, freeing up the terminal. - * | ``flwr_superexec:0.0.1``: This is the name of the image to be run and the specific tag - | of the image. - * | ``--insecure``: This flag tells the container to operate in an insecure mode, allowing - | unencrypted communication. - * | ``--executor-config superlink=\"superlink:9091\"``: Configure the SuperExec executor to - | connect to the SuperLink running on port ``9091``. + * ``docker run``: This tells Docker to run a container from an image. + * ``--rm``: Remove the container once it is stopped or the command exits. + * | ``-p 9093:9093``: Map port ``9093`` of the container to the same port of + | the host machine, allowing you to access the SuperExec API on ``http://localhost:9093``. + * ``--network flwr-network``: Make the container join the network named ``flwr-network``. + * ``--name superexec``: Assign the name ``superexec`` to the container. + * ``--detach``: Run the container in the background, freeing up the terminal. + * | ``flwr_superexec:0.0.1``: This is the name of the image to be run and the specific tag + | of the image. + * | ``--insecure``: This flag tells the container to operate in an insecure mode, allowing + | unencrypted communication. + * | ``--executor-config superlink=\"superlink:9091\"``: Configure the SuperExec executor to + | connect to the SuperLink running on port ``9091``. Step 6: Run the Quickstart Project ---------------------------------- -#. Add the following lines to the ``pyproject.toml``: +1. Add the following lines to the ``pyproject.toml``: .. code-block:: toml - :caption: pyproject.toml + :caption: pyproject.toml - [tool.flwr.federations.docker] - address = "127.0.0.1:9093" - insecure = true + [tool.flwr.federations.docker] + address = "127.0.0.1:9093" + insecure = true -#. Run the ``quickstart-docker`` project by executing the command: +2. Run the ``quickstart-docker`` project by executing the command: .. code-block:: bash - $ flwr run . docker + $ flwr run . docker -#. Follow the SuperExec logs to track the execution of the run: +3. Follow the SuperExec logs to track the execution of the run: .. code-block:: bash - $ docker logs -f superexec + $ docker logs -f superexec Step 7: Update the Application ------------------------------ -#. Change the application code. For example, change the ``seed`` in ``quickstart_docker/task.py`` - to ``43`` and save it: +1. Change the application code. For example, change the ``seed`` in + ``quickstart_docker/task.py`` to ``43`` and save it: .. code-block:: python - :caption: quickstart_docker/task.py + :caption: quickstart_docker/task.py - # ... - partition_train_test = partition.train_test_split(test_size=0.2, seed=43) - # ... + # ... + partition_train_test = partition.train_test_split(test_size=0.2, seed=43) + # ... -#. Stop the current ClientApp containers: +2. Stop the current ClientApp containers: .. code-block:: bash - $ docker stop $(docker ps -a -q --filter ancestor=flwr_clientapp:0.0.1) + $ docker stop $(docker ps -a -q --filter ancestor=flwr_clientapp:0.0.1) -#. Rebuild the FAB and ClientApp image: +3. Rebuild the FAB and ClientApp image: .. code-block:: bash - $ docker build -f Dockerfile.clientapp -t flwr_clientapp:0.0.1 . + $ docker build -f Dockerfile.clientapp -t flwr_clientapp:0.0.1 . -#. Launch two new ClientApp containers based on the newly built image: +4. Launch two new ClientApp containers based on the newly built image: .. code-block:: bash - $ docker run --rm \ - --network flwr-network \ - --detach \ - flwr_clientapp:0.0.1 \ - --supernode supernode-1:9094 - $ docker run --rm \ - --network flwr-network \ - --detach \ - flwr_clientapp:0.0.1 \ - --supernode supernode-2:9095 + $ docker run --rm \ + --network flwr-network \ + --detach \ + flwr_clientapp:0.0.1 \ + --supernode supernode-1:9094 + $ docker run --rm \ + --network flwr-network \ + --detach \ + flwr_clientapp:0.0.1 \ + --supernode supernode-2:9095 -#. Run the updated project: +5. Run the updated project: .. code-block:: bash - $ flwr run . docker + $ flwr run . docker Step 8: Clean Up ---------------- @@ -388,16 +393,16 @@ Remove the containers and the bridge network: .. code-block:: bash - $ docker stop $(docker ps -a -q --filter ancestor=flwr_clientapp:0.0.1) \ - supernode-1 \ - supernode-2 \ - superexec \ - superlink - $ docker network rm flwr-network + $ docker stop $(docker ps -a -q --filter ancestor=flwr_clientapp:0.0.1) \ + supernode-1 \ + supernode-2 \ + superexec \ + superlink + $ docker network rm flwr-network Where to Go Next ---------------- -* :doc:`enable-tls` -* :doc:`persist-superlink-state` -* :doc:`tutorial-quickstart-docker-compose` +- :doc:`enable-tls` +- :doc:`persist-superlink-state` +- :doc:`tutorial-quickstart-docker-compose` diff --git a/doc/source/docker/use-a-different-version.rst b/doc/source/docker/use-a-different-version.rst index 73e5f4218663..9108f5157dcd 100644 --- a/doc/source/docker/use-a-different-version.rst +++ b/doc/source/docker/use-a-different-version.rst @@ -1,12 +1,13 @@ Use a Different Flower Version ============================== -If you want to use a different version of Flower, for example Flower nightly, you can do so by -changing the tag. All available versions are on `Docker Hub `__. +If you want to use a different version of Flower, for example Flower nightly, you can do +so by changing the tag. All available versions are on `Docker Hub +`__. .. important:: - When using Flower nightly, the SuperLink nightly image must be paired with the corresponding - SuperNode and ServerApp nightly images released on the same day. To ensure the versions are - in sync, using the concrete tag, e.g., ``1.10.0.dev20240610`` instead of ``nightly`` is - recommended. + When using Flower nightly, the SuperLink nightly image must be paired with the + corresponding SuperNode and ServerApp nightly images released on the same day. To + ensure the versions are in sync, using the concrete tag, e.g., + ``1.10.0.dev20240610`` instead of ``nightly`` is recommended. diff --git a/doc/source/example-fedbn-pytorch-from-centralized-to-federated.rst b/doc/source/example-fedbn-pytorch-from-centralized-to-federated.rst index 0139f3b8dc31..4a9d4607d9a5 100644 --- a/doc/source/example-fedbn-pytorch-from-centralized-to-federated.rst +++ b/doc/source/example-fedbn-pytorch-from-centralized-to-federated.rst @@ -1,16 +1,22 @@ Example: FedBN in PyTorch - From Centralized To Federated ========================================================= -This tutorial will show you how to use Flower to build a federated version of an existing machine learning workload with `FedBN `_, a federated training strategy designed for non-iid data. -We are using PyTorch to train a Convolutional Neural Network(with Batch Normalization layers) on the CIFAR-10 dataset. -When applying FedBN, only few changes needed compared to :doc:`Example: PyTorch - From Centralized To Federated `. +This tutorial will show you how to use Flower to build a federated version of an +existing machine learning workload with `FedBN `_, a +federated training strategy designed for non-iid data. We are using PyTorch to train a +Convolutional Neural Network(with Batch Normalization layers) on the CIFAR-10 dataset. +When applying FedBN, only few changes needed compared to :doc:`Example: PyTorch - From +Centralized To Federated `. Centralized Training -------------------- -All files are revised based on :doc:`Example: PyTorch - From Centralized To Federated `. -The only thing to do is modifying the file called :code:`cifar.py`, revised part is shown below: -The model architecture defined in class Net() is added with Batch Normalization layers accordingly. +All files are revised based on :doc:`Example: PyTorch - From Centralized To Federated +`. The only thing to do is modifying the +file called ``cifar.py``, revised part is shown below: + +The model architecture defined in class Net() is added with Batch Normalization layers +accordingly. .. code-block:: python @@ -40,26 +46,33 @@ The model architecture defined in class Net() is added with Batch Normalization You can now run your machine learning workload: -.. code-block:: python +.. code-block:: bash python3 cifar.py -So far this should all look fairly familiar if you've used PyTorch before. -Let's take the next step and use what we've built to create a federated learning system within FedBN, the system consists of one server and two clients. +So far this should all look fairly familiar if you've used PyTorch before. Let's take +the next step and use what we've built to create a federated learning system within +FedBN, the system consists of one server and two clients. Federated Training ------------------ -If you have read :doc:`Example: PyTorch - From Centralized To Federated `, the following parts are easy to follow, only :code:`get_parameters` and :code:`set_parameters` function in :code:`client.py` needed to revise. -If not, please read the :doc:`Example: PyTorch - From Centralized To Federated `. first. +If you have read :doc:`Example: PyTorch - From Centralized To Federated +`, the following parts are easy to +follow, only ``get_parameters`` and ``set_parameters`` function in ``client.py`` needed +to revise. If not, please read the :doc:`Example: PyTorch - From Centralized To +Federated `. first. -Our example consists of one *server* and two *clients*. In FedBN, :code:`server.py` keeps unchanged, we can start the server directly. +Our example consists of one *server* and two *clients*. In FedBN, ``server.py`` keeps +unchanged, we can start the server directly. -.. code-block:: python +.. code-block:: bash python3 server.py -Finally, we will revise our *client* logic by changing :code:`get_parameters` and :code:`set_parameters` in :code:`client.py`, we will exclude batch normalization parameters from model parameter list when sending to or receiving from the server. +Finally, we will revise our *client* logic by changing ``get_parameters`` and +``set_parameters`` in ``client.py``, we will exclude batch normalization parameters from +model parameter list when sending to or receiving from the server. .. code-block:: python @@ -71,11 +84,15 @@ Finally, we will revise our *client* logic by changing :code:`get_parameters` an def get_parameters(self, config) -> List[np.ndarray]: # Return model parameters as a list of NumPy ndarrays, excluding parameters of BN layers when using FedBN - return [val.cpu().numpy() for name, val in self.model.state_dict().items() if 'bn' not in name] + return [ + val.cpu().numpy() + for name, val in self.model.state_dict().items() + if "bn" not in name + ] def set_parameters(self, parameters: List[np.ndarray]) -> None: # Set model parameters from a list of NumPy ndarrays - keys = [k for k in self.model.state_dict().keys() if 'bn' not in k] + keys = [k for k in self.model.state_dict().keys() if "bn" not in k] params_dict = zip(keys, parameters) state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict}) self.model.load_state_dict(state_dict, strict=False) @@ -84,15 +101,20 @@ Finally, we will revise our *client* logic by changing :code:`get_parameters` an Now, you can now open two additional terminal windows and run -.. code-block:: python +.. code-block:: bash python3 client.py -in each window (make sure that the server is still running before you do so) and see your (previously centralized) PyTorch project run federated learning with FedBN strategy across two clients. Congratulations! +in each window (make sure that the server is still running before you do so) and see +your (previously centralized) PyTorch project run federated learning with FedBN strategy +across two clients. Congratulations! Next Steps ---------- -The full source code for this example can be found `here `_. -Our example is of course somewhat over-simplified because both clients load the exact same dataset, which isn't realistic. -You're now prepared to explore this topic further. How about using different subsets of CIFAR-10 on each client? How about adding more clients? +The full source code for this example can be found `here +`_. +Our example is of course somewhat over-simplified because both clients load the exact +same dataset, which isn't realistic. You're now prepared to explore this topic further. +How about using different subsets of CIFAR-10 on each client? How about adding more +clients? diff --git a/doc/source/example-pytorch-from-centralized-to-federated.rst b/doc/source/example-pytorch-from-centralized-to-federated.rst index 0c458a136a81..9629a7fed6e8 100644 --- a/doc/source/example-pytorch-from-centralized-to-federated.rst +++ b/doc/source/example-pytorch-from-centralized-to-federated.rst @@ -1,21 +1,25 @@ Example: PyTorch - From Centralized To Federated ================================================ -This tutorial will show you how to use Flower to build a federated version of an existing machine learning workload. -We are using PyTorch to train a Convolutional Neural Network on the CIFAR-10 dataset. -First, we introduce this machine learning task with a centralized training approach based on the `Deep Learning with PyTorch `_ tutorial. -Then, we build upon the centralized training code to run the training in a federated fashion. +This tutorial will show you how to use Flower to build a federated version of an +existing machine learning workload. We are using PyTorch to train a Convolutional Neural +Network on the CIFAR-10 dataset. First, we introduce this machine learning task with a +centralized training approach based on the `Deep Learning with PyTorch +`_ tutorial. Then, +we build upon the centralized training code to run the training in a federated fashion. Centralized Training -------------------- -We begin with a brief description of the centralized CNN training code. -If you want a more in-depth explanation of what's going on then have a look at the official `PyTorch tutorial `_. +We begin with a brief description of the centralized CNN training code. If you want a +more in-depth explanation of what's going on then have a look at the official `PyTorch +tutorial `_. -Let's create a new file called :code:`cifar.py` with all the components required for a traditional (centralized) training on CIFAR-10. -First, all required packages (such as :code:`torch` and :code:`torchvision`) need to be imported. -You can see that we do not import any package for federated learning. -You can keep all these imports as they are even when we add the federated learning components at a later point. +Let's create a new file called ``cifar.py`` with all the components required for a +traditional (centralized) training on CIFAR-10. First, all required packages (such as +``torch`` and ``torchvision``) need to be imported. You can see that we do not import +any package for federated learning. You can keep all these imports as they are even when +we add the federated learning components at a later point. .. code-block:: python @@ -29,7 +33,9 @@ You can keep all these imports as they are even when we add the federated learni from torch import Tensor from torchvision.datasets import CIFAR10 -As already mentioned we will use the CIFAR-10 dataset for this machine learning workload. The model architecture (a very simple Convolutional Neural Network) is defined in :code:`class Net()`. +As already mentioned we will use the CIFAR-10 dataset for this machine learning +workload. The model architecture (a very simple Convolutional Neural Network) is defined +in ``class Net()``. .. code-block:: python @@ -53,13 +59,17 @@ As already mentioned we will use the CIFAR-10 dataset for this machine learning x = self.fc3(x) return x -The :code:`load_data()` function loads the CIFAR-10 training and test sets. The :code:`transform` normalized the data after loading. +The ``load_data()`` function loads the CIFAR-10 training and test sets. The +``transform`` normalized the data after loading. .. code-block:: python DATA_ROOT = "~/data/cifar-10" - def load_data() -> Tuple[torch.utils.data.DataLoader, torch.utils.data.DataLoader, Dict]: + + def load_data() -> ( + Tuple[torch.utils.data.DataLoader, torch.utils.data.DataLoader, Dict] + ): """Load CIFAR-10 (training and test set).""" transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] @@ -68,12 +78,15 @@ The :code:`load_data()` function loads the CIFAR-10 training and test sets. The trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True) testset = CIFAR10(DATA_ROOT, train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False) - num_examples = {"trainset" : len(trainset), "testset" : len(testset)} + num_examples = {"trainset": len(trainset), "testset": len(testset)} return trainloader, testloader, num_examples -We now need to define the training (function :code:`train()`) which loops over the training set, measures the loss, backpropagates it, and then takes one optimizer step for each batch of training examples. +We now need to define the training (function ``train()``) which loops over the training +set, measures the loss, backpropagates it, and then takes one optimizer step for each +batch of training examples. -The evaluation of the model is defined in the function :code:`test()`. The function loops over all test samples and measures the loss of the model based on the test dataset. +The evaluation of the model is defined in the function ``test()``. The function loops +over all test samples and measures the loss of the model based on the test dataset. .. code-block:: python @@ -133,7 +146,8 @@ The evaluation of the model is defined in the function :code:`test()`. The funct accuracy = correct / total return loss, accuracy -Having defined the data loading, model architecture, training, and evaluation we can put everything together and train our CNN on CIFAR-10. +Having defined the data loading, model architecture, training, and evaluation we can put +everything together and train our CNN on CIFAR-10. .. code-block:: python @@ -143,7 +157,7 @@ Having defined the data loading, model architecture, training, and evaluation we print("Load data") trainloader, testloader, _ = load_data() print("Start training") - net=Net().to(DEVICE) + net = Net().to(DEVICE) train(net=net, trainloader=trainloader, epochs=2, device=DEVICE) print("Evaluate model") loss, accuracy = test(net=net, testloader=testloader, device=DEVICE) @@ -156,46 +170,57 @@ Having defined the data loading, model architecture, training, and evaluation we You can now run your machine learning workload: -.. code-block:: python +.. code-block:: bash python3 cifar.py -So far, this should all look fairly familiar if you've used PyTorch before. -Let's take the next step and use what we've built to create a simple federated learning system consisting of one server and two clients. +So far, this should all look fairly familiar if you've used PyTorch before. Let's take +the next step and use what we've built to create a simple federated learning system +consisting of one server and two clients. Federated Training ------------------ -The simple machine learning project discussed in the previous section trains the model on a single dataset (CIFAR-10), we call this centralized learning. -This concept of centralized learning, as shown in the previous section, is probably known to most of you, and many of you have used it previously. -Normally, if you'd want to run machine learning workloads in a federated fashion, then you'd have to change most of your code and set everything up from scratch. This can be a considerable effort. +The simple machine learning project discussed in the previous section trains the model +on a single dataset (CIFAR-10), we call this centralized learning. This concept of +centralized learning, as shown in the previous section, is probably known to most of +you, and many of you have used it previously. Normally, if you'd want to run machine +learning workloads in a federated fashion, then you'd have to change most of your code +and set everything up from scratch. This can be a considerable effort. -However, with Flower you can evolve your pre-existing code into a federated learning setup without the need for a major rewrite. +However, with Flower you can evolve your pre-existing code into a federated learning +setup without the need for a major rewrite. -The concept is easy to understand. -We have to start a *server* and then use the code in :code:`cifar.py` for the *clients* that are connected to the *server*. -The *server* sends model parameters to the clients. The *clients* run the training and update the parameters. -The updated parameters are sent back to the *server* which averages all received parameter updates. -This describes one round of the federated learning process and we repeat this for multiple rounds. +The concept is easy to understand. We have to start a *server* and then use the code in +``cifar.py`` for the *clients* that are connected to the *server*. The *server* sends +model parameters to the clients. The *clients* run the training and update the +parameters. The updated parameters are sent back to the *server* which averages all +received parameter updates. This describes one round of the federated learning process +and we repeat this for multiple rounds. -Our example consists of one *server* and two *clients*. Let's set up :code:`server.py` first. The *server* needs to import the Flower package :code:`flwr`. -Next, we use the :code:`start_server` function to start a server and tell it to perform three rounds of federated learning. +Our example consists of one *server* and two *clients*. Let's set up ``server.py`` +first. The *server* needs to import the Flower package ``flwr``. Next, we use the +``start_server`` function to start a server and tell it to perform three rounds of +federated learning. .. code-block:: python import flwr as fl if __name__ == "__main__": - fl.server.start_server(server_address="0.0.0.0:8080", config=fl.server.ServerConfig(num_rounds=3)) + fl.server.start_server( + server_address="0.0.0.0:8080", config=fl.server.ServerConfig(num_rounds=3) + ) We can already start the *server*: -.. code-block:: python +.. code-block:: bash python3 server.py -Finally, we will define our *client* logic in :code:`client.py` and build upon the previously defined centralized training in :code:`cifar.py`. -Our *client* needs to import :code:`flwr`, but also :code:`torch` to update the parameters on our PyTorch model: +Finally, we will define our *client* logic in ``client.py`` and build upon the +previously defined centralized training in ``cifar.py``. Our *client* needs to import +``flwr``, but also ``torch`` to update the parameters on our PyTorch model: .. code-block:: python @@ -210,28 +235,38 @@ Our *client* needs to import :code:`flwr`, but also :code:`torch` to update the DEVICE: str = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -Implementing a Flower *client* basically means implementing a subclass of either :code:`flwr.client.Client` or :code:`flwr.client.NumPyClient`. -Our implementation will be based on :code:`flwr.client.NumPyClient` and we'll call it :code:`CifarClient`. -:code:`NumPyClient` is slightly easier to implement than :code:`Client` if you use a framework with good NumPy interoperability (like PyTorch or TensorFlow/Keras) because it avoids some of the boilerplate that would otherwise be necessary. -:code:`CifarClient` needs to implement four methods, two methods for getting/setting model parameters, one method for training the model, and one method for testing the model: - -#. :code:`set_parameters` - * set the model parameters on the local model that are received from the server - * loop over the list of model parameters received as NumPy :code:`ndarray`'s (think list of neural network layers) -#. :code:`get_parameters` - * get the model parameters and return them as a list of NumPy :code:`ndarray`'s (which is what :code:`flwr.client.NumPyClient` expects) -#. :code:`fit` - * update the parameters of the local model with the parameters received from the server - * train the model on the local training set - * get the updated local model weights and return them to the server -#. :code:`evaluate` - * update the parameters of the local model with the parameters received from the server - * evaluate the updated model on the local test set - * return the local loss and accuracy to the server - -The two :code:`NumPyClient` methods :code:`fit` and :code:`evaluate` make use of the functions :code:`train()` and :code:`test()` previously defined in :code:`cifar.py`. -So what we really do here is we tell Flower through our :code:`NumPyClient` subclass which of our already defined functions to call for training and evaluation. -We included type annotations to give you a better understanding of the data types that get passed around. +Implementing a Flower *client* basically means implementing a subclass of either +``flwr.client.Client`` or ``flwr.client.NumPyClient``. Our implementation will be based +on ``flwr.client.NumPyClient`` and we'll call it ``CifarClient``. ``NumPyClient`` is +slightly easier to implement than ``Client`` if you use a framework with good NumPy +interoperability (like PyTorch or TensorFlow/Keras) because it avoids some of the +boilerplate that would otherwise be necessary. ``CifarClient`` needs to implement four +methods, two methods for getting/setting model parameters, one method for training the +model, and one method for testing the model: + +1. ``set_parameters`` + - set the model parameters on the local model that are received from the server + - loop over the list of model parameters received as NumPy ``ndarray``'s (think + list of neural network layers) +2. ``get_parameters`` + - get the model parameters and return them as a list of NumPy ``ndarray``'s + (which is what ``flwr.client.NumPyClient`` expects) +3. ``fit`` + - update the parameters of the local model with the parameters received from the + server + - train the model on the local training set + - get the updated local model weights and return them to the server +4. ``evaluate`` + - update the parameters of the local model with the parameters received from the + server + - evaluate the updated model on the local test set + - return the local loss and accuracy to the server + +The two ``NumPyClient`` methods ``fit`` and ``evaluate`` make use of the functions +``train()`` and ``test()`` previously defined in ``cifar.py``. So what we really do here +is we tell Flower through our ``NumPyClient`` subclass which of our already defined +functions to call for training and evaluation. We included type annotations to give you +a better understanding of the data types that get passed around. .. code-block:: python @@ -277,8 +312,10 @@ We included type annotations to give you a better understanding of the data type loss, accuracy = cifar.test(self.model, self.testloader, device=DEVICE) return float(loss), self.num_examples["testset"], {"accuracy": float(accuracy)} -All that's left to do it to define a function that loads both model and data, creates a :code:`CifarClient`, and starts this client. -You load your data and model by using :code:`cifar.py`. Start :code:`CifarClient` with the function :code:`fl.client.start_client()` by pointing it at the same IP address we used in :code:`server.py`: +All that's left to do it to define a function that loads both model and data, creates a +``CifarClient``, and starts this client. You load your data and model by using +``cifar.py``. Start ``CifarClient`` with the function ``fl.client.start_client()`` by +pointing it at the same IP address we used in ``server.py``: .. code-block:: python @@ -300,15 +337,20 @@ You load your data and model by using :code:`cifar.py`. Start :code:`CifarClient And that's it. You can now open two additional terminal windows and run -.. code-block:: python +.. code-block:: bash python3 client.py -in each window (make sure that the server is running before you do so) and see your (previously centralized) PyTorch project run federated learning across two clients. Congratulations! +in each window (make sure that the server is running before you do so) and see your +(previously centralized) PyTorch project run federated learning across two clients. +Congratulations! Next Steps ---------- -The full source code for this example: `PyTorch: From Centralized To Federated (Code) `_. -Our example is, of course, somewhat over-simplified because both clients load the exact same dataset, which isn't realistic. -You're now prepared to explore this topic further. How about using different subsets of CIFAR-10 on each client? How about adding more clients? +The full source code for this example: `PyTorch: From Centralized To Federated (Code) +`_. +Our example is, of course, somewhat over-simplified because both clients load the exact +same dataset, which isn't realistic. You're now prepared to explore this topic further. +How about using different subsets of CIFAR-10 on each client? How about adding more +clients? diff --git a/doc/source/explanation-differential-privacy.rst b/doc/source/explanation-differential-privacy.rst index e488f5ccbd57..06e9dbdedd39 100644 --- a/doc/source/explanation-differential-privacy.rst +++ b/doc/source/explanation-differential-privacy.rst @@ -1,133 +1,171 @@ Differential Privacy ==================== -The information in datasets like healthcare, financial transactions, user preferences, etc., is valuable and has the potential for scientific breakthroughs and provides important business insights. -However, such data is also sensitive and there is a risk of compromising individual privacy. -Traditional methods like anonymization alone would not work because of attacks like Re-identification and Data Linkage. -That's where differential privacy comes in. It provides the possibility of analyzing data while ensuring the privacy of individuals. +The information in datasets like healthcare, financial transactions, user preferences, +etc., is valuable and has the potential for scientific breakthroughs and provides +important business insights. However, such data is also sensitive and there is a risk of +compromising individual privacy. +Traditional methods like anonymization alone would not work because of attacks like +Re-identification and Data Linkage. That's where differential privacy comes in. It +provides the possibility of analyzing data while ensuring the privacy of individuals. Differential Privacy -------------------- -Imagine two datasets that are identical except for a single record (for instance, Alice's data). -Differential Privacy (DP) guarantees that any analysis (M), like calculating the average income, will produce nearly identical results for both datasets (O and O' would be similar). -This preserves group patterns while obscuring individual details, ensuring the individual's information remains hidden in the crowd. -.. image:: ./_static/DP/dp-intro.png - :align: center - :width: 400 - :alt: DP Intro +Imagine two datasets that are identical except for a single record (for instance, +Alice's data). Differential Privacy (DP) guarantees that any analysis (M), like +calculating the average income, will produce nearly identical results for both datasets +(O and O' would be similar). This preserves group patterns while obscuring individual +details, ensuring the individual's information remains hidden in the crowd. +.. image:: ./_static/DP/dp-intro.png + :align: center + :width: 400 + :alt: DP Intro -One of the most commonly used mechanisms to achieve DP is adding enough noise to the output of the analysis to mask the contribution of each individual in the data while preserving the overall accuracy of the analysis. +One of the most commonly used mechanisms to achieve DP is adding enough noise to the +output of the analysis to mask the contribution of each individual in the data while +preserving the overall accuracy of the analysis. Formal Definition ~~~~~~~~~~~~~~~~~ -Differential Privacy (DP) provides statistical guarantees against the information an adversary can infer through the output of a randomized algorithm. -It provides an unconditional upper bound on the influence of a single individual on the output of the algorithm by adding noise [1]. -A randomized mechanism -M provides (:math:`\epsilon`, :math:`\delta`)-differential privacy if for any two neighboring databases, D :sub:`1` and D :sub:`2`, that differ in only a single record, -and for all possible outputs S ⊆ Range(A): - -.. math:: - \small - P[M(D_{1} \in A)] \leq e^{\epsilon} P[M(D_{2} \in A)] + \delta +Differential Privacy (DP) provides statistical guarantees against the information an +adversary can infer through the output of a randomized algorithm. It provides an +unconditional upper bound on the influence of a single individual on the output of the +algorithm by adding noise [1]. A randomized mechanism M provides (:math:`\epsilon`, +:math:`\delta`)-differential privacy if for any two neighboring databases, D :sub:`1` +and D :sub:`2`, that differ in only a single record, and for all possible outputs S ⊆ +Range(A): +.. math:: -The :math:`\epsilon` parameter, also known as the privacy budget, is a metric of privacy loss. -It also controls the privacy-utility trade-off; lower :math:`\epsilon` values indicate higher levels of privacy but are likely to reduce utility as well. -The :math:`\delta` parameter accounts for a small probability on which the upper bound :math:`\epsilon` does not hold. -The amount of noise needed to achieve differential privacy is proportional to the sensitivity of the output, which measures the maximum change in the output due to the inclusion or removal of a single record. + \small + P[M(D_{1} \in A)] \leq e^{\epsilon} P[M(D_{2} \in A)] + \delta +The :math:`\epsilon` parameter, also known as the privacy budget, is a metric of privacy +loss. It also controls the privacy-utility trade-off; lower :math:`\epsilon` values +indicate higher levels of privacy but are likely to reduce utility as well. The +:math:`\delta` parameter accounts for a small probability on which the upper bound +:math:`\epsilon` does not hold. The amount of noise needed to achieve differential +privacy is proportional to the sensitivity of the output, which measures the maximum +change in the output due to the inclusion or removal of a single record. Differential Privacy in Machine Learning ---------------------------------------- + DP can be utilized in machine learning to preserve the privacy of the training data. -Differentially private machine learning algorithms are designed in a way to prevent the algorithm to learn any specific information about any individual data points and subsequently prevent the model from revealing sensitive information. -Depending on the stage at which noise is introduced, various methods exist for applying DP to machine learning algorithms. -One approach involves adding noise to the training data (either to the features or labels), while another method entails injecting noise into the gradients of the loss function during model training. -Additionally, such noise can be incorporated into the model's output. +Differentially private machine learning algorithms are designed in a way to prevent the +algorithm to learn any specific information about any individual data points and +subsequently prevent the model from revealing sensitive information. Depending on the +stage at which noise is introduced, various methods exist for applying DP to machine +learning algorithms. One approach involves adding noise to the training data (either to +the features or labels), while another method entails injecting noise into the gradients +of the loss function during model training. Additionally, such noise can be incorporated +into the model's output. Differential Privacy in Federated Learning ------------------------------------------ -Federated learning is a data minimization approach that allows multiple parties to collaboratively train a model without sharing their raw data. -However, federated learning also introduces new privacy challenges. The model updates between parties and the central server can leak information about the local data. -These leaks can be exploited by attacks such as membership inference and property inference attacks, or model inversion attacks. -DP can play a crucial role in federated learning to provide privacy for the clients' data. +Federated learning is a data minimization approach that allows multiple parties to +collaboratively train a model without sharing their raw data. However, federated +learning also introduces new privacy challenges. The model updates between parties and +the central server can leak information about the local data. These leaks can be +exploited by attacks such as membership inference and property inference attacks, or +model inversion attacks. -Depending on the granularity of privacy provision or the location of noise addition, different forms of DP exist in federated learning. -In this explainer, we focus on two approaches of DP utilization in federated learning based on where the noise is added: at the server (also known as the center) or at the client (also known as the local). +DP can play a crucial role in federated learning to provide privacy for the clients' +data. -- **Central Differential Privacy**: DP is applied by the server and the goal is to prevent the aggregated model from leaking information about each client's data. +Depending on the granularity of privacy provision or the location of noise addition, +different forms of DP exist in federated learning. In this explainer, we focus on two +approaches of DP utilization in federated learning based on where the noise is added: at +the server (also known as the center) or at the client (also known as the local). -- **Local Differential Privacy**: DP is applied on the client side before sending any information to the server and the goal is to prevent the updates that are sent to the server from leaking any information about the client's data. +- **Central Differential Privacy**: DP is applied by the server and the goal is to + prevent the aggregated model from leaking information about each client's data. +- **Local Differential Privacy**: DP is applied on the client side before sending any + information to the server and the goal is to prevent the updates that are sent to the + server from leaking any information about the client's data. Central Differential Privacy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In this approach, which is also known as user-level DP, the central server is responsible for adding noise to the globally aggregated parameters. It should be noted that trust in the server is required. + +In this approach, which is also known as user-level DP, the central server is +responsible for adding noise to the globally aggregated parameters. It should be noted +that trust in the server is required. .. image:: ./_static/DP/CDP.png - :align: center - :width: 400 - :alt: Central Differential Privacy - -While there are various ways to implement central DP in federated learning, we concentrate on the algorithms proposed by [2] and [3]. -The overall approach is to clip the model updates sent by the clients and add some amount of noise to the aggregated model. -In each iteration, a random set of clients is chosen with a specific probability for training. -Each client performs local training on its own data. -The update of each client is then clipped by some value `S` (sensitivity `S`). -This would limit the impact of any individual client which is crucial for privacy and often beneficial for robustness. -A common approach to achieve this is by restricting the `L2` norm of the clients' model updates, ensuring that larger updates are scaled down to fit within the norm `S`. + :align: center + :width: 400 + :alt: Central Differential Privacy + +While there are various ways to implement central DP in federated learning, we +concentrate on the algorithms proposed by [2] and [3]. The overall approach is to clip +the model updates sent by the clients and add some amount of noise to the aggregated +model. In each iteration, a random set of clients is chosen with a specific probability +for training. Each client performs local training on its own data. The update of each +client is then clipped by some value `S` (sensitivity `S`). This would limit the impact +of any individual client which is crucial for privacy and often beneficial for +robustness. A common approach to achieve this is by restricting the `L2` norm of the +clients' model updates, ensuring that larger updates are scaled down to fit within the +norm `S`. .. image:: ./_static/DP/clipping.png - :align: center - :width: 300 - :alt: clipping + :align: center + :width: 300 + :alt: clipping -Afterwards, the Gaussian mechanism is used to add noise in order to distort the sum of all clients' updates. -The amount of noise is scaled to the sensitivity value to obtain a privacy guarantee. -The Gaussian mechanism is used with a noise sampled from `N (0, σ²)` where `σ = ( noise_scale * S ) / (number of sampled clients)`. +Afterwards, the Gaussian mechanism is used to add noise in order to distort the sum of +all clients' updates. The amount of noise is scaled to the sensitivity value to obtain a +privacy guarantee. The Gaussian mechanism is used with a noise sampled from `N (0, σ²)` +where `σ = ( noise_scale * S ) / (number of sampled clients)`. Clipping -^^^^^^^^ - -There are two forms of clipping commonly used in Central DP: Fixed Clipping and Adaptive Clipping. +++++++++ -- **Fixed Clipping** : A predefined fix threshold is set for the magnitude of clients' updates. Any update exceeding this threshold is clipped back to the threshold value. +There are two forms of clipping commonly used in Central DP: Fixed Clipping and Adaptive +Clipping. -- **Adaptive Clipping** : The clipping threshold dynamically adjusts based on the observed update distribution [4]. It means that the clipping value is tuned during the rounds with respect to the quantile of the update norm distribution. +- **Fixed Clipping** : A predefined fix threshold is set for the magnitude of clients' + updates. Any update exceeding this threshold is clipped back to the threshold value. +- **Adaptive Clipping** : The clipping threshold dynamically adjusts based on the + observed update distribution [4]. It means that the clipping value is tuned during the + rounds with respect to the quantile of the update norm distribution. -The choice between fixed and adaptive clipping depends on various factors such as privacy requirements, data distribution, model complexity, and others. +The choice between fixed and adaptive clipping depends on various factors such as +privacy requirements, data distribution, model complexity, and others. Local Differential Privacy ~~~~~~~~~~~~~~~~~~~~~~~~~~ -In this approach, each client is responsible for performing DP. -Local DP avoids the need for a fully trusted aggregator, but it should be noted that local DP leads to a decrease in accuracy but better privacy in comparison to central DP. +In this approach, each client is responsible for performing DP. Local DP avoids the need +for a fully trusted aggregator, but it should be noted that local DP leads to a decrease +in accuracy but better privacy in comparison to central DP. .. image:: ./_static/DP/LDP.png - :align: center - :width: 400 - :alt: Local Differential Privacy - + :align: center + :width: 400 + :alt: Local Differential Privacy In this explainer, we focus on two forms of achieving Local DP: -- Each client adds noise to the local updates before sending them to the server. To achieve (:math:`\epsilon`, :math:`\delta`)-DP, considering the sensitivity of the local model to be ∆, Gaussian noise is applied with a noise scale of σ where: +- Each client adds noise to the local updates before sending them to the server. To + achieve (:math:`\epsilon`, :math:`\delta`)-DP, considering the sensitivity of the + local model to be ∆, Gaussian noise is applied with a noise scale of σ where: .. math:: + \small \frac{∆ \times \sqrt{2 \times \log\left(\frac{1.25}{\delta}\right)}}{\epsilon} - -- Each client adds noise to the gradients of the model during the local training (DP-SGD). More specifically, in this approach, gradients are clipped and an amount of calibrated noise is injected into the gradients. - +- Each client adds noise to the gradients of the model during the local training + (DP-SGD). More specifically, in this approach, gradients are clipped and an amount of + calibrated noise is injected into the gradients. Please note that these two approaches are providing privacy at different levels. - **References:** [1] Dwork et al. The Algorithmic Foundations of Differential Privacy. diff --git a/doc/source/explanation-federated-evaluation.rst b/doc/source/explanation-federated-evaluation.rst index bcdca9bae700..c56a5d48b2f6 100644 --- a/doc/source/explanation-federated-evaluation.rst +++ b/doc/source/explanation-federated-evaluation.rst @@ -1,8 +1,8 @@ Federated evaluation ==================== -There are two main approaches to evaluating models in federated learning systems: centralized (or server-side) evaluation and federated (or client-side) evaluation. - +There are two main approaches to evaluating models in federated learning systems: +centralized (or server-side) evaluation and federated (or client-side) evaluation. Centralized Evaluation ---------------------- @@ -10,15 +10,17 @@ Centralized Evaluation Built-In Strategies ~~~~~~~~~~~~~~~~~~~ -All built-in strategies support centralized evaluation by providing an evaluation function during initialization. -An evaluation function is any function that can take the current global model parameters as input and return evaluation results: +All built-in strategies support centralized evaluation by providing an evaluation +function during initialization. An evaluation function is any function that can take the +current global model parameters as input and return evaluation results: .. code-block:: python - + from flwr.common import NDArrays, Scalar - + from typing import Dict, Optional, Tuple + def get_evaluate_fn(model): """Return an evaluation function for server-side evaluation.""" @@ -38,6 +40,7 @@ An evaluation function is any function that can take the current global model pa return evaluate + # Load and compile model for server-side parameter evaluation model = tf.keras.applications.EfficientNetB0( input_shape=(32, 32, 3), weights=None, classes=10 @@ -47,7 +50,7 @@ An evaluation function is any function that can take the current global model pa # Create strategy strategy = fl.server.strategy.FedAvg( - # ... other FedAvg arguments + # ... other FedAvg arguments evaluate_fn=get_evaluate_fn(model), ) @@ -57,9 +60,10 @@ An evaluation function is any function that can take the current global model pa Custom Strategies ~~~~~~~~~~~~~~~~~ -The :code:`Strategy` abstraction provides a method called :code:`evaluate` that can directly be used to evaluate the current global model parameters. -The current server implementation calls :code:`evaluate` after parameter aggregation and before federated evaluation (see next paragraph). - +The ``Strategy`` abstraction provides a method called ``evaluate`` that can directly be +used to evaluate the current global model parameters. The current server implementation +calls ``evaluate`` after parameter aggregation and before federated evaluation (see next +paragraph). Federated Evaluation -------------------- @@ -67,7 +71,8 @@ Federated Evaluation Implementing Federated Evaluation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Client-side evaluation happens in the :code:`Client.evaluate` method and can be configured from the server side. +Client-side evaluation happens in the ``Client.evaluate`` method and can be configured +from the server side. .. code-block:: python @@ -79,9 +84,11 @@ Client-side evaluation happens in the :code:`Client.evaluate` method and can be def get_parameters(self, config): # ... + pass def fit(self, parameters, config): # ... + pass def evaluate(self, parameters, config): """Evaluate parameters on the locally held test set.""" @@ -100,12 +107,27 @@ Client-side evaluation happens in the :code:`Client.evaluate` method and can be Configuring Federated Evaluation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Federated evaluation can be configured from the server side. Built-in strategies support the following arguments: - -- :code:`fraction_evaluate`: a :code:`float` defining the fraction of clients that will be selected for evaluation. If :code:`fraction_evaluate` is set to :code:`0.1` and :code:`100` clients are connected to the server, then :code:`10` will be randomly selected for evaluation. If :code:`fraction_evaluate` is set to :code:`0.0`, federated evaluation will be disabled. -- :code:`min_evaluate_clients`: an :code:`int`: the minimum number of clients to be selected for evaluation. If :code:`fraction_evaluate` is set to :code:`0.1`, :code:`min_evaluate_clients` is set to 20, and :code:`100` clients are connected to the server, then :code:`20` clients will be selected for evaluation. -- :code:`min_available_clients`: an :code:`int` that defines the minimum number of clients which need to be connected to the server before a round of federated evaluation can start. If fewer than :code:`min_available_clients` are connected to the server, the server will wait until more clients are connected before it continues to sample clients for evaluation. -- :code:`on_evaluate_config_fn`: a function that returns a configuration dictionary which will be sent to the selected clients. The function will be called during each round and provides a convenient way to customize client-side evaluation from the server side, for example, to configure the number of validation steps performed. +Federated evaluation can be configured from the server side. Built-in strategies support +the following arguments: + +- ``fraction_evaluate``: a ``float`` defining the fraction of clients that will be + selected for evaluation. If ``fraction_evaluate`` is set to ``0.1`` and ``100`` + clients are connected to the server, then ``10`` will be randomly selected for + evaluation. If ``fraction_evaluate`` is set to ``0.0``, federated evaluation will be + disabled. +- ``min_evaluate_clients``: an ``int``: the minimum number of clients to be selected for + evaluation. If ``fraction_evaluate`` is set to ``0.1``, ``min_evaluate_clients`` is + set to 20, and ``100`` clients are connected to the server, then ``20`` clients will + be selected for evaluation. +- ``min_available_clients``: an ``int`` that defines the minimum number of clients which + need to be connected to the server before a round of federated evaluation can start. + If fewer than ``min_available_clients`` are connected to the server, the server will + wait until more clients are connected before it continues to sample clients for + evaluation. +- ``on_evaluate_config_fn``: a function that returns a configuration dictionary which + will be sent to the selected clients. The function will be called during each round + and provides a convenient way to customize client-side evaluation from the server + side, for example, to configure the number of validation steps performed. .. code-block:: python @@ -118,6 +140,7 @@ Federated evaluation can be configured from the server side. Built-in strategies val_steps = 5 if server_round < 4 else 10 return {"val_steps": val_steps} + # Create strategy strategy = fl.server.strategy.FedAvg( # ... other FedAvg arguments @@ -130,11 +153,11 @@ Federated evaluation can be configured from the server side. Built-in strategies # Start Flower server for four rounds of federated learning fl.server.start_server(server_address="[::]:8080", strategy=strategy) - Evaluating Local Model Updates During Training ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Model parameters can also be evaluated during training. :code:`Client.fit` can return arbitrary evaluation results as a dictionary: +Model parameters can also be evaluated during training. ``Client.fit`` can return +arbitrary evaluation results as a dictionary: .. code-block:: python @@ -146,6 +169,7 @@ Model parameters can also be evaluated during training. :code:`Client.fit` can r def get_parameters(self, config): # ... + pass def fit(self, parameters, config): """Train parameters on the locally held training set.""" @@ -171,9 +195,12 @@ Model parameters can also be evaluated during training. :code:`Client.fit` can r def evaluate(self, parameters, config): # ... - + pass Full Code Example ----------------- -For a full code example that uses both centralized and federated evaluation, see the *Advanced TensorFlow Example* (the same approach can be applied to workloads implemented in any other framework): https://github.com/adap/flower/tree/main/examples/advanced-tensorflow +For a full code example that uses both centralized and federated evaluation, see the +*Advanced TensorFlow Example* (the same approach can be applied to workloads implemented +in any other framework): +https://github.com/adap/flower/tree/main/examples/advanced-tensorflow diff --git a/doc/source/explanation-flower-architecture.rst b/doc/source/explanation-flower-architecture.rst index 22691d6091ac..e82da56dcefa 100644 --- a/doc/source/explanation-flower-architecture.rst +++ b/doc/source/explanation-flower-architecture.rst @@ -1,180 +1,158 @@ -##################### - Flower Architecture -##################### +Flower Architecture +=================== -This page explains the architecture of deployed Flower federated -learning system. +This page explains the architecture of deployed Flower federated learning system. -In federated learning (FL), there is typically one server and a number -of clients that are connected to the server. This is often called a -federation. +In federated learning (FL), there is typically one server and a number of clients that +are connected to the server. This is often called a federation. -The role of the server is to coordinate the training process. The role -of each client is to receive tasks from the server, execute those tasks -and return the results back to the server. +The role of the server is to coordinate the training process. The role of each client is +to receive tasks from the server, execute those tasks and return the results back to the +server. This is sometimes called a hub-and-spoke topology: .. figure:: ./_static/flower-architecture-hub-and-spoke.svg - :align: center - :width: 600 - :alt: Hub-and-spoke topology in federated learning - :class: no-scaled-link + :align: center + :width: 600 + :alt: Hub-and-spoke topology in federated learning + :class: no-scaled-link - Hub-and-spoke topology in federated learning (one server, multiple clients). + Hub-and-spoke topology in federated learning (one server, multiple clients). -In a real-world deployment, we typically want to run different projects -on such a federation. Each project could use different hyperparameters, -different model architectures, different aggregation strategies, or even -different machine learning frameworks like PyTorch and TensorFlow. +In a real-world deployment, we typically want to run different projects on such a +federation. Each project could use different hyperparameters, different model +architectures, different aggregation strategies, or even different machine learning +frameworks like PyTorch and TensorFlow. -This is why, in Flower, both the server side and the client side are -split into two parts. One part is long-lived and responsible for -communicating across the network, the other part is short-lived and -executes task-specific code. +This is why, in Flower, both the server side and the client side are split into two +parts. One part is long-lived and responsible for communicating across the network, the +other part is short-lived and executes task-specific code. A Flower `server` consists of **SuperLink** and ``ServerApp``: -- **SuperLink**: a long-running process that forwards task instructions - to clients (SuperNodes) and receives task results back. - -- ``ServerApp``: a short-lived process with project-spcific code that - customizes all server-side aspects of federated learning systems - (client selection, client configuration, result aggregation). This is - what AI researchers and AI engineers write when they build Flower - apps. +- **SuperLink**: a long-running process that forwards task instructions to clients + (SuperNodes) and receives task results back. +- ``ServerApp``: a short-lived process with project-spcific code that customizes all + server-side aspects of federated learning systems (client selection, client + configuration, result aggregation). This is what AI researchers and AI engineers write + when they build Flower apps. A Flower `client` consists of **SuperNode** and ``ClientApp``: -- **SuperNode**: a long-running process that connects to the SuperLink, - asks for tasks, executes tasks (for example, "train this model on - your local data") and returns task results back to the SuperLink. - -- ``ClientApp``: a short-lived process with project-specific code that - customizes all client-side aspects of federated learning systems - (local model training and evaluation, pre- and post-processing). This - is what AI researchers and AI engineers write when they build Flower - apps. +- **SuperNode**: a long-running process that connects to the SuperLink, asks for tasks, + executes tasks (for example, "train this model on your local data") and returns task + results back to the SuperLink. +- ``ClientApp``: a short-lived process with project-specific code that customizes all + client-side aspects of federated learning systems (local model training and + evaluation, pre- and post-processing). This is what AI researchers and AI engineers + write when they build Flower apps. -Why SuperNode and SuperLink? Well, in federated learning, the clients -are the actual stars of the show. They hold the training data and they -run the actual training. This is why Flower decided to name them -**SuperNode**. The **SuperLink** is then responsible for acting as the -`missing link` between all those SuperNodes. +Why SuperNode and SuperLink? Well, in federated learning, the clients are the actual +stars of the show. They hold the training data and they run the actual training. This is +why Flower decided to name them **SuperNode**. The **SuperLink** is then responsible for +acting as the `missing link` between all those SuperNodes. .. figure:: ./_static/flower-architecture-basic-architecture.svg - :align: center - :width: 600 - :alt: Basic Flower architecture - :class: no-scaled-link + :align: center + :width: 600 + :alt: Basic Flower architecture + :class: no-scaled-link - The basic Flower architecture for federated learning. + The basic Flower architecture for federated learning. -In a Flower app project, users will typically develop the ``ServerApp`` -and the ``ClientApp``. All the network communication between `server` -and `clients` is taken care of by the SuperLink and SuperNodes. +In a Flower app project, users will typically develop the ``ServerApp`` and the +``ClientApp``. All the network communication between `server` and `clients` is taken +care of by the SuperLink and SuperNodes. .. tip:: - For more details, please refer to the |serverapp_link|_ and - |clientapp_link|_ documentation. + For more details, please refer to the |serverapp_link|_ and |clientapp_link|_ + documentation. -With *multi-run*, multiple ``ServerApp``\s and ``ClientApp``\s are now -capable of running on the same federation consisting of a single -long-running SuperLink and multiple long-running SuperNodes. This is -sometimes referred to as `multi-tenancy` or `multi-job`. +With *multi-run*, multiple ``ServerApp``\s and ``ClientApp``\s are now capable of +running on the same federation consisting of a single long-running SuperLink and +multiple long-running SuperNodes. This is sometimes referred to as `multi-tenancy` or +`multi-job`. -As shown in the figure below, two projects, each consisting of a -``ServerApp`` and a ``ClientApp``, could share the same SuperLink and -SuperNodes. +As shown in the figure below, two projects, each consisting of a ``ServerApp`` and a +``ClientApp``, could share the same SuperLink and SuperNodes. .. figure:: ./_static/flower-architecture-multi-run.svg - :align: center - :width: 600 - :alt: Multi-tenancy federated learning architecture - :class: no-scaled-link + :align: center + :width: 600 + :alt: Multi-tenancy federated learning architecture + :class: no-scaled-link - Multi-tenancy federated learning architecture with Flower + Multi-tenancy federated learning architecture with Flower -To illustrate how multi-run works, consider one federated learning -training run where a ``ServerApp`` and a ``ClientApp`` are participating -in ``[run 1]``. Note that a SuperNode will only run a ``ClientApp`` if -it is selected to participate in the training run. +To illustrate how multi-run works, consider one federated learning training run where a +``ServerApp`` and a ``ClientApp`` are participating in ``[run 1]``. Note that a +SuperNode will only run a ``ClientApp`` if it is selected to participate in the training +run. -In ``[run 1]`` below, all the SuperNodes are selected and therefore run -their corresponding ``ClientApp``\s: +In ``[run 1]`` below, all the SuperNodes are selected and therefore run their +corresponding ``ClientApp``\s: .. figure:: ./_static/flower-architecture-multi-run-1.svg - :align: center - :width: 600 - :alt: Multi-tenancy federated learning architecture - Run 1 - :class: no-scaled-link + :align: center + :width: 600 + :alt: Multi-tenancy federated learning architecture - Run 1 + :class: no-scaled-link - Run 1 in a multi-run federated learning architecture with Flower. - All SuperNodes participate in the training round. + Run 1 in a multi-run federated learning architecture with Flower. All SuperNodes + participate in the training round. -However, in ``[run 2]``, only the first and third SuperNodes are -selected to participate in the training: +However, in ``[run 2]``, only the first and third SuperNodes are selected to participate +in the training: .. figure:: ./_static/flower-architecture-multi-run-2.svg - :align: center - :width: 600 - :alt: Multi-tenancy federated learning architecture - Run 2 - :class: no-scaled-link - - Run 2 in a multi-run federated learning architecture with Flower. - Only the first and third SuperNodes are selected to participate in the - training round. - -Therefore, with Flower multi-run, different projects (each consisting of -a ``ServerApp`` and ``ClientApp``) can run on different sets of clients. - -To help you start and manage all of the concurrently executing training -runs, Flower offers one additional long-running server-side service -called **SuperExec**. When you type ``flwr run`` to start a new training -run, the ``flwr`` CLI bundles your local project (mainly your -``ServerApp`` and ``ClientApp``) and sends it to the **SuperExec**. The -**SuperExec** will then take care of starting and managing your -``ServerApp``, which in turn selects SuperNodes to execute your -``ClientApp``. - -This architecture allows many users to (concurrently) run their projects -on the same federation, simply by typing ``flwr run`` on their local -developer machine. + :align: center + :width: 600 + :alt: Multi-tenancy federated learning architecture - Run 2 + :class: no-scaled-link + + Run 2 in a multi-run federated learning architecture with Flower. Only the first and + third SuperNodes are selected to participate in the training round. + +Therefore, with Flower multi-run, different projects (each consisting of a ``ServerApp`` +and ``ClientApp``) can run on different sets of clients. + +To help you start and manage all of the concurrently executing training runs, Flower +offers one additional long-running server-side service called **SuperExec**. When you +type ``flwr run`` to start a new training run, the ``flwr`` CLI bundles your local +project (mainly your ``ServerApp`` and ``ClientApp``) and sends it to the **SuperExec**. +The **SuperExec** will then take care of starting and managing your ``ServerApp``, which +in turn selects SuperNodes to execute your ``ClientApp``. + +This architecture allows many users to (concurrently) run their projects on the same +federation, simply by typing ``flwr run`` on their local developer machine. .. figure:: ./_static/flower-architecture-deployment-engine.svg - :align: center - :width: 800 - :alt: Flower Deployment Engine with SuperExec - :class: no-scaled-link + :align: center + :width: 800 + :alt: Flower Deployment Engine with SuperExec + :class: no-scaled-link - The SuperExec service for managing concurrent training runs in - Flower. + The SuperExec service for managing concurrent training runs in Flower. .. note:: - This explanation covers the Flower Deployment Engine. An explanation - covering the Flower Simulation Engine will follow. + This explanation covers the Flower Deployment Engine. An explanation covering the + Flower Simulation Engine will follow. .. important:: - As we continue to enhance Flower at a rapid pace, we'll periodically - update this explainer document. Feel free to share any feedback with - us. - -.. |clientapp_link| replace:: + As we continue to enhance Flower at a rapid pace, we'll periodically update this + explainer document. Feel free to share any feedback with us. - ``ClientApp`` +.. |clientapp_link| replace:: ``ClientApp`` -.. |serverapp_link| replace:: - - ``ServerApp`` +.. |serverapp_link| replace:: ``ServerApp`` .. _clientapp_link: ref-api/flwr.client.ClientApp.html .. _serverapp_link: ref-api/flwr.server.ServerApp.html -.. title:: Flower federated learning architecture - .. meta:: - :description: Explore the federated learning architecture of the Flower framework, featuring multi-run, concurrent execution, and scalable, secure machine learning while preserving data privacy. + :description: Explore the federated learning architecture of the Flower framework, featuring multi-run, concurrent execution, and scalable, secure machine learning while preserving data privacy. diff --git a/doc/source/how-to-aggregate-evaluation-results.rst b/doc/source/how-to-aggregate-evaluation-results.rst index fa4ba88b8ff0..be6e20068c88 100644 --- a/doc/source/how-to-aggregate-evaluation-results.rst +++ b/doc/source/how-to-aggregate-evaluation-results.rst @@ -1,14 +1,15 @@ Aggregate evaluation results ============================ -The Flower server does not prescribe a way to aggregate evaluation results, but it enables the user to fully customize result aggregation. - +The Flower server does not prescribe a way to aggregate evaluation results, but it +enables the user to fully customize result aggregation. Aggregate Custom Evaluation Results ----------------------------------- -The same :code:`Strategy`-customization approach can be used to aggregate custom evaluation results coming from individual clients. -Clients can return custom metrics to the server by returning a dictionary: +The same ``Strategy``-customization approach can be used to aggregate custom evaluation +results coming from individual clients. Clients can return custom metrics to the server +by returning a dictionary: .. code-block:: python @@ -16,9 +17,11 @@ Clients can return custom metrics to the server by returning a dictionary: def get_parameters(self, config): # ... + pass def fit(self, parameters, config): # ... + pass def evaluate(self, parameters, config): """Evaluate parameters on the locally held test set.""" @@ -33,7 +36,8 @@ Clients can return custom metrics to the server by returning a dictionary: num_examples_test = len(self.x_test) return loss, num_examples_test, {"accuracy": accuracy} -The server can then use a customized strategy to aggregate the metrics provided in these dictionaries: +The server can then use a customized strategy to aggregate the metrics provided in these +dictionaries: .. code-block:: python @@ -50,7 +54,9 @@ The server can then use a customized strategy to aggregate the metrics provided return None, {} # Call aggregate_evaluate from base class (FedAvg) to aggregate loss and metrics - aggregated_loss, aggregated_metrics = super().aggregate_evaluate(server_round, results, failures) + aggregated_loss, aggregated_metrics = super().aggregate_evaluate( + server_round, results, failures + ) # Weigh accuracy of each client by number of examples used accuracies = [r.metrics["accuracy"] * r.num_examples for _, r in results] @@ -58,11 +64,14 @@ The server can then use a customized strategy to aggregate the metrics provided # Aggregate and print custom metric aggregated_accuracy = sum(accuracies) / sum(examples) - print(f"Round {server_round} accuracy aggregated from client results: {aggregated_accuracy}") + print( + f"Round {server_round} accuracy aggregated from client results: {aggregated_accuracy}" + ) # Return aggregated loss and metrics (i.e., aggregated accuracy) return aggregated_loss, {"accuracy": aggregated_accuracy} + # Create strategy and run server strategy = AggregateCustomMetricStrategy( # (same arguments as FedAvg here) diff --git a/doc/source/how-to-authenticate-supernodes.rst b/doc/source/how-to-authenticate-supernodes.rst index 9b001531ee33..a2dd499dbc10 100644 --- a/doc/source/how-to-authenticate-supernodes.rst +++ b/doc/source/how-to-authenticate-supernodes.rst @@ -1,29 +1,38 @@ Authenticate SuperNodes ======================= -Flower has built-in support for authenticated SuperNodes that you can use to verify the identities of each SuperNode connecting to a SuperLink. -Flower node authentication works similar to how GitHub SSH authentication works: +Flower has built-in support for authenticated SuperNodes that you can use to verify the +identities of each SuperNode connecting to a SuperLink. Flower node authentication works +similar to how GitHub SSH authentication works: -* SuperLink (server) stores a list of known (client) node public keys -* Using ECDH, both SuperNode and SuperLink independently derive a shared secret -* Shared secret is used to compute the HMAC value of the message sent from SuperNode to SuperLink as a token -* SuperLink verifies the token +- SuperLink (server) stores a list of known (client) node public keys +- Using ECDH, both SuperNode and SuperLink independently derive a shared secret +- Shared secret is used to compute the HMAC value of the message sent from SuperNode to + SuperLink as a token +- SuperLink verifies the token -We recommend you to check out the complete `code example `_ demonstrating federated learning with Flower in an authenticated setting. +We recommend you to check out the complete `code example +`_ +demonstrating federated learning with Flower in an authenticated setting. .. note:: + This guide covers a preview feature that might change in future versions of Flower. .. note:: - For increased security, node authentication can only be used when encrypted connections (SSL/TLS) are enabled. -Enable node authentication in :code:`SuperLink` ------------------------------------------------ + For increased security, node authentication can only be used when encrypted + connections (SSL/TLS) are enabled. + +Enable node authentication in ``SuperLink`` +------------------------------------------- -To enable node authentication, first you need to configure SSL/TLS connections to secure the SuperLink<>SuperNode communication. You can find the complete guide -`here `_. -After configuring secure connections, you can enable client authentication in a long-running Flower :code:`SuperLink`. -Use the following terminal command to start a Flower :code:`SuperNode` that has both secure connections and node authentication enabled: +To enable node authentication, first you need to configure SSL/TLS connections to secure +the SuperLink<>SuperNode communication. You can find the complete guide `here +`_. After +configuring secure connections, you can enable client authentication in a long-running +Flower ``SuperLink``. Use the following terminal command to start a Flower ``SuperNode`` +that has both secure connections and node authentication enabled: .. code-block:: bash @@ -37,43 +46,59 @@ Use the following terminal command to start a Flower :code:`SuperNode` that has Let's break down the authentication flags: -1. The first flag :code:`--auth-list-public-keys` expects a path to a CSV file storing all known node public keys. You need to store all known node public keys that are allowed to participate in a federation in one CSV file (:code:`.csv`). +1. The first flag ``--auth-list-public-keys`` expects a path to a CSV file storing all + known node public keys. You need to store all known node public keys that are allowed + to participate in a federation in one CSV file (``.csv``). - A valid CSV file storing known node public keys should list the keys in OpenSSH format, separated by commas and without any comments. For an example, refer to our code sample, which contains a CSV file with two known node public keys. + A valid CSV file storing known node public keys should list the keys in OpenSSH + format, separated by commas and without any comments. For an example, refer to + our code sample, which contains a CSV file with two known node public keys. -2. The second and third flags :code:`--auth-superlink-private-key` and :code:`--auth-superlink-public-key` expect paths to the server's private and public keys. For development purposes, you can generate a private and public key pair using :code:`ssh-keygen -t ecdsa -b 384`. +2. The second and third flags ``--auth-superlink-private-key`` and + ``--auth-superlink-public-key`` expect paths to the server's private and public keys. + For development purposes, you can generate a private and public key pair using + ``ssh-keygen -t ecdsa -b 384``. .. note:: - In Flower 1.9, there is no support for dynamically removing, editing, or adding known node public keys to the SuperLink. - To change the set of known nodes, you need to shut the server down, edit the CSV file, and start the server again. - Support for dynamically changing the set of known nodes is on the roadmap to be released in Flower 1.10 (ETA: June). + In Flower 1.9, there is no support for dynamically removing, editing, or adding + known node public keys to the SuperLink. To change the set of known nodes, you need + to shut the server down, edit the CSV file, and start the server again. Support for + dynamically changing the set of known nodes is on the roadmap to be released in + Flower 1.10 (ETA: June). -Enable node authentication in :code:`SuperNode` -------------------------------------------------- +Enable node authentication in ``SuperNode`` +------------------------------------------- -Similar to the long-running Flower server (:code:`SuperLink`), you can easily enable node authentication in the long-running Flower client (:code:`SuperNode`). -Use the following terminal command to start an authenticated :code:`SuperNode`: +Similar to the long-running Flower server (``SuperLink``), you can easily enable node +authentication in the long-running Flower client (``SuperNode``). Use the following +terminal command to start an authenticated ``SuperNode``: .. code-block:: bash - flower-supernode - --root-certificates certificates/ca.crt - --superlink 127.0.0.1:9092 - --auth-supernode-private-key keys/client_credentials - --auth-supernode-public-key keys/client_credentials.pub - -The :code:`--auth-supernode-private-key` flag expects a path to the node's private key file and the :code:`--auth-supernode-public-key` flag expects a path to the node's public key file. For development purposes, you can generate a private and public key pair using :code:`ssh-keygen -t ecdsa -b 384`. + flower-supernode + --root-certificates certificates/ca.crt + --superlink 127.0.0.1:9092 + --auth-supernode-private-key keys/client_credentials + --auth-supernode-public-key keys/client_credentials.pub +The ``--auth-supernode-private-key`` flag expects a path to the node's private key file +and the ``--auth-supernode-public-key`` flag expects a path to the node's public key +file. For development purposes, you can generate a private and public key pair using +``ssh-keygen -t ecdsa -b 384``. Security notice --------------- -The system's security relies on the credentials of the SuperLink and each SuperNode. Therefore, it is imperative to safeguard and safely store the credentials to avoid security risks such as Public Key Infrastructure (PKI) impersonation attacks. -The node authentication mechanism also involves human interaction, so please ensure that all of the communication is done in a secure manner, using trusted communication methods. - +The system's security relies on the credentials of the SuperLink and each SuperNode. +Therefore, it is imperative to safeguard and safely store the credentials to avoid +security risks such as Public Key Infrastructure (PKI) impersonation attacks. The node +authentication mechanism also involves human interaction, so please ensure that all of +the communication is done in a secure manner, using trusted communication methods. Conclusion ---------- -You should now have learned how to start a long-running Flower server (:code:`SuperLink`) and client (:code:`SuperNode`) with node authentication enabled. You should also know the significance of the private key and store it safely to minimize security risks. +You should now have learned how to start a long-running Flower server (``SuperLink``) +and client (``SuperNode``) with node authentication enabled. You should also know the +significance of the private key and store it safely to minimize security risks. diff --git a/doc/source/how-to-configure-clients.rst b/doc/source/how-to-configure-clients.rst index ff0a2f4033df..c950ab3be9e7 100644 --- a/doc/source/how-to-configure-clients.rst +++ b/doc/source/how-to-configure-clients.rst @@ -1,37 +1,55 @@ Configure clients ================= -Along with model parameters, Flower can send configuration values to clients. Configuration values can be used for various purposes. They are, for example, a popular way to control client-side hyperparameters from the server. +Along with model parameters, Flower can send configuration values to clients. +Configuration values can be used for various purposes. They are, for example, a popular +way to control client-side hyperparameters from the server. Configuration values -------------------- -Configuration values are represented as a dictionary with ``str`` keys and values of type ``bool``, ``bytes``, ``double`` (64-bit precision float), ``int``, or ``str`` (or equivalent types in different languages). Here is an example of a configuration dictionary in Python: +Configuration values are represented as a dictionary with ``str`` keys and values of +type ``bool``, ``bytes``, ``double`` (64-bit precision float), ``int``, or ``str`` (or +equivalent types in different languages). Here is an example of a configuration +dictionary in Python: .. code-block:: python config_dict = { - "dropout": True, # str key, bool value + "dropout": True, # str key, bool value "learning_rate": 0.01, # str key, float value - "batch_size": 32, # str key, int value - "optimizer": "sgd", # str key, str value + "batch_size": 32, # str key, int value + "optimizer": "sgd", # str key, str value } -Flower serializes these configuration dictionaries (or *config dict* for short) to their ProtoBuf representation, transports them to the client using gRPC, and then deserializes them back to Python dictionaries. +Flower serializes these configuration dictionaries (or *config dict* for short) to their +ProtoBuf representation, transports them to the client using gRPC, and then deserializes +them back to Python dictionaries. .. note:: - Currently, there is no support for directly sending collection types (e.g., ``Set``, ``List``, ``Map``) as values in configuration dictionaries. There are several workarounds to send collections as values by converting them to one of the supported value types (and converting them back on the client-side). - - One can, for example, convert a list of floating-point numbers to a JSON string, then send the JSON string using the configuration dictionary, and then convert the JSON string back to a list of floating-point numbers on the client. + Currently, there is no support for directly sending collection types (e.g., ``Set``, + ``List``, ``Map``) as values in configuration dictionaries. There are several + workarounds to send collections as values by converting them to one of the supported + value types (and converting them back on the client-side). + One can, for example, convert a list of floating-point numbers to a JSON string, + then send the JSON string using the configuration dictionary, and then convert the + JSON string back to a list of floating-point numbers on the client. Configuration through built-in strategies ----------------------------------------- -The easiest way to send configuration values to clients is to use a built-in strategy like :code:`FedAvg`. Built-in strategies support so-called configuration functions. A configuration function is a function that the built-in strategy calls to get the configuration dictionary for the current round. It then forwards the configuration dictionary to all the clients selected during that round. +The easiest way to send configuration values to clients is to use a built-in strategy +like ``FedAvg``. Built-in strategies support so-called configuration functions. A +configuration function is a function that the built-in strategy calls to get the +configuration dictionary for the current round. It then forwards the configuration +dictionary to all the clients selected during that round. -Let's start with a simple example. Imagine we want to send (a) the batch size that the client should use, (b) the current global round of federated learning, and (c) the number of epochs to train on the client-side. Our configuration function could look like this: +Let's start with a simple example. Imagine we want to send (a) the batch size that the +client should use, (b) the current global round of federated learning, and (c) the +number of epochs to train on the client-side. Our configuration function could look like +this: .. code-block:: python @@ -44,12 +62,13 @@ Let's start with a simple example. Imagine we want to send (a) the batch size th } return config -To make the built-in strategies use this function, we can pass it to ``FedAvg`` during initialization using the parameter :code:`on_fit_config_fn`: +To make the built-in strategies use this function, we can pass it to ``FedAvg`` during +initialization using the parameter ``on_fit_config_fn``: .. code-block:: python strategy = FedAvg( - ..., # Other FedAvg parameters + ..., # Other FedAvg parameters on_fit_config_fn=fit_config, # The fit_config function we defined earlier ) @@ -64,9 +83,15 @@ One the client side, we receive the configuration dictionary in ``fit``: print(config["local_epochs"]) # Prints `2` # ... (rest of `fit` method) -There is also an `on_evaluate_config_fn` to configure evaluation, which works the same way. They are separate functions because one might want to send different configuration values to `evaluate` (for example, to use a different batch size). +There is also an `on_evaluate_config_fn` to configure evaluation, which works the same +way. They are separate functions because one might want to send different configuration +values to `evaluate` (for example, to use a different batch size). -The built-in strategies call this function every round (that is, every time `Strategy.configure_fit` or `Strategy.configure_evaluate` runs). Calling `on_evaluate_config_fn` every round allows us to vary/change the config dict over consecutive rounds. If we wanted to implement a hyperparameter schedule, for example, to increase the number of local epochs during later rounds, we could do the following: +The built-in strategies call this function every round (that is, every time +`Strategy.configure_fit` or `Strategy.configure_evaluate` runs). Calling +`on_evaluate_config_fn` every round allows us to vary/change the config dict over +consecutive rounds. If we wanted to implement a hyperparameter schedule, for example, to +increase the number of local epochs during later rounds, we could do the following: .. code-block:: python @@ -79,14 +104,19 @@ The built-in strategies call this function every round (that is, every time `Str } return config -The :code:`FedAvg` strategy will call this function *every round*. +The ``FedAvg`` strategy will call this function *every round*. Configuring individual clients ------------------------------ -In some cases, it is necessary to send different configuration values to different clients. +In some cases, it is necessary to send different configuration values to different +clients. -This can be achieved by customizing an existing strategy or by :doc:`implementing a custom strategy from scratch `. Here's a nonsensical example that customizes :code:`FedAvg` by adding a custom ``"hello": "world"`` configuration key/value pair to the config dict of a *single client* (only the first client in the list, the other clients in this round to not receive this "special" config value): +This can be achieved by customizing an existing strategy or by :doc:`implementing a +custom strategy from scratch `. Here's a nonsensical +example that customizes ``FedAvg`` by adding a custom ``"hello": "world"`` configuration +key/value pair to the config dict of a *single client* (only the first client in the +list, the other clients in this round to not receive this "special" config value): .. code-block:: python @@ -94,7 +124,9 @@ This can be achieved by customizing an existing strategy or by :doc:`implementin def configure_fit( self, server_round: int, parameters: Parameters, client_manager: ClientManager ) -> List[Tuple[ClientProxy, FitIns]]: - client_instructions = super().configure_fit(server_round, parameters, client_manager) + client_instructions = super().configure_fit( + server_round, parameters, client_manager + ) # Add special "hello": "world" config key/value pair, # but only to the first client in the list @@ -103,6 +135,7 @@ This can be achieved by customizing an existing strategy or by :doc:`implementin return client_instructions + # Create strategy and run server strategy = CustomClientConfigStrategy( # ... (same arguments as plain FedAvg here) diff --git a/doc/source/how-to-configure-logging.rst b/doc/source/how-to-configure-logging.rst index d5559429a73c..bb7461390b42 100644 --- a/doc/source/how-to-configure-logging.rst +++ b/doc/source/how-to-configure-logging.rst @@ -1,17 +1,19 @@ Configure logging ================= -The Flower logger keeps track of all core events that take place in federated learning workloads. -It presents information by default following a standard message format: +The Flower logger keeps track of all core events that take place in federated learning +workloads. It presents information by default following a standard message format: .. code-block:: python DEFAULT_FORMATTER = logging.Formatter( - "%(levelname)s %(name)s %(asctime)s | %(filename)s:%(lineno)d | %(message)s" + "%(levelname)s %(name)s %(asctime)s | %(filename)s:%(lineno)d | %(message)s" ) -containing relevant information including: log message level (e.g. :code:`INFO`, :code:`DEBUG`), a timestamp, the line where the logging took place from, as well as the log message itself. -In this way, the logger would typically display information on your terminal as follows: +containing relevant information including: log message level (e.g. ``INFO``, ``DEBUG``), +a timestamp, the line where the logging took place from, as well as the log message +itself. In this way, the logger would typically display information on your terminal as +follows: .. code-block:: bash @@ -29,29 +31,35 @@ In this way, the logger would typically display information on your terminal as INFO flwr 2023-07-15 15:32:36,118 | server.py:125 | fit progress: (5, 358.6936808824539, {'accuracy': 0.3467}, 18.964264554999318) ... - Saving log to file -------------------- +------------------ -By default, the Flower log is outputted to the terminal where you launch your Federated Learning workload from. This applies for both gRPC-based federation (i.e. when you do :code:`fl.server.start_server`) and when using the :code:`VirtualClientEngine` (i.e. when you do :code:`fl.simulation.start_simulation`). -In some situations you might want to save this log to disk. You can do so by calling the `fl.common.logger.configure() `_ function. For example: +By default, the Flower log is outputted to the terminal where you launch your Federated +Learning workload from. This applies for both gRPC-based federation (i.e. when you do +``fl.server.start_server``) and when using the ``VirtualClientEngine`` (i.e. when you do +``fl.simulation.start_simulation``). In some situations you might want to save this log +to disk. You can do so by calling the `fl.common.logger.configure() +`_ function. For +example: .. code-block:: python - - import flwr as fl - - ... - # in your main file and before launching your experiment - # add an identifier to your logger - # then specify the name of the file where the log should be outputted to - fl.common.logger.configure(identifier="myFlowerExperiment", filename="log.txt") + import flwr as fl + + ... - # then start your workload - fl.simulation.start_simulation(...) # or fl.server.start_server(...) + # in your main file and before launching your experiment + # add an identifier to your logger + # then specify the name of the file where the log should be outputted to + fl.common.logger.configure(identifier="myFlowerExperiment", filename="log.txt") -With the above, Flower will record the log you see on your terminal to :code:`log.txt`. This file will be created in the same directory as were you are running the code from. -If we inspect we see the log above is also recorded but prefixing with :code:`identifier` each line: + # then start your workload + fl.simulation.start_simulation(...) # or fl.server.start_server(...) + +With the above, Flower will record the log you see on your terminal to ``log.txt``. This +file will be created in the same directory as were you are running the code from. If we +inspect we see the log above is also recorded but prefixing with ``identifier`` each +line: .. code-block:: bash @@ -69,12 +77,11 @@ If we inspect we see the log above is also recorded but prefixing with :code:`id myFlowerExperiment | INFO flwr 2023-07-15 15:32:36,118 | server.py:125 | fit progress: (5, 358.6936808824539, {'accuracy': 0.3467}, 18.964264554999318) ... - Log your own messages --------------------- -You might expand the information shown by default with the Flower logger by adding more messages relevant to your application. -You can achieve this easily as follows. +You might expand the information shown by default with the Flower logger by adding more +messages relevant to your application. You can achieve this easily as follows. .. code-block:: python @@ -84,25 +91,31 @@ You can achieve this easily as follows. # For example, let's say you want to add to the log some info about the training on your client for debugging purposes + class FlowerClient(fl.client.NumPyClient): - def __init__(self, cid: int ...): + def __init__( + self, + cid: int, + # ... + ): self.cid = cid - self.net = ... - ... + self.net = net + # ... def fit(self, parameters, config): log(INFO, f"Printing a custom INFO message at the start of fit() :)") - + set_params(self.net, parameters) log(DEBUG, f"Client {self.cid} is doing fit() with config: {config}") - ... + # ... -In this way your logger will show, in addition to the default messages, the ones introduced by the clients as specified above. +In this way your logger will show, in addition to the default messages, the ones +introduced by the clients as specified above. .. code-block:: bash - + ... INFO flwr 2023-07-15 16:18:21,726 | server.py:89 | Initializing global parameters INFO flwr 2023-07-15 16:18:21,726 | server.py:276 | Requesting initial parameters from one random client @@ -123,10 +136,13 @@ In this way your logger will show, in addition to the default messages, the ones DEBUG flwr 2023-07-15 16:18:28,617 | main.py:63 | Client 13 is doing fit() with config: {'epochs': 5, 'batch_size': 64} ... - Log to a remote service ----------------------- -The :code:`fl.common.logger.configure` function, also allows specifying a host to which logs can be pushed (via :code:`POST`) through a native Python :code:`logging.handler.HTTPHandler`. -This is a particularly useful feature in :code:`gRPC`-based Federated Learning workloads where otherwise gathering logs from all entities (i.e. the server and the clients) might be cumbersome. -Note that in Flower simulation, the server automatically displays all logs. You can still specify a :code:`HTTPHandler` should you wish to backup or analyze the logs somewhere else. +The ``fl.common.logger.configure`` function, also allows specifying a host to which logs +can be pushed (via ``POST``) through a native Python ``logging.handler.HTTPHandler``. +This is a particularly useful feature in ``gRPC``-based Federated Learning workloads +where otherwise gathering logs from all entities (i.e. the server and the clients) might +be cumbersome. Note that in Flower simulation, the server automatically displays all +logs. You can still specify a ``HTTPHandler`` should you wish to backup or analyze the +logs somewhere else. diff --git a/doc/source/how-to-enable-ssl-connections.rst b/doc/source/how-to-enable-ssl-connections.rst index fc8e89914ac2..cd8590bc3436 100644 --- a/doc/source/how-to-enable-ssl-connections.rst +++ b/doc/source/how-to-enable-ssl-connections.rst @@ -1,44 +1,46 @@ Enable SSL connections ====================== -This guide describes how to a SSL-enabled secure Flower server (:code:`SuperLink`) can be started and -how a Flower client (:code:`SuperNode`) can establish a secure connections to it. +This guide describes how to a SSL-enabled secure Flower server (``SuperLink``) can be +started and how a Flower client (``SuperNode``) can establish a secure connections to +it. -A complete code example demonstrating a secure connection can be found -`here `_. - -The code example comes with a :code:`README.md` file which explains how to start it. Although it is -already SSL-enabled, it might be less descriptive on how it does so. Stick to this guide for a deeper -introduction to the topic. +A complete code example demonstrating a secure connection can be found `here +`_. +The code example comes with a ``README.md`` file which explains how to start it. +Although it is already SSL-enabled, it might be less descriptive on how it does so. +Stick to this guide for a deeper introduction to the topic. Certificates ------------ -Using SSL-enabled connections requires certificates to be passed to the server and client. For -the purpose of this guide we are going to generate self-signed certificates. As this can become -quite complex we are going to ask you to run the script in -:code:`examples/advanced-tensorflow/certificates/generate.sh` -with the following command sequence: +Using SSL-enabled connections requires certificates to be passed to the server and +client. For the purpose of this guide we are going to generate self-signed certificates. +As this can become quite complex we are going to ask you to run the script in +``examples/advanced-tensorflow/certificates/generate.sh`` with the following command +sequence: .. code-block:: bash - cd examples/advanced-tensorflow/certificates - ./generate.sh - -This will generate the certificates in :code:`examples/advanced-tensorflow/.cache/certificates`. + cd examples/advanced-tensorflow/certificates + ./generate.sh -The approach for generating SSL certificates in the context of this example can serve as an inspiration and -starting point, but it should not be used as a reference for production environments. Please refer to other -sources regarding the issue of correctly generating certificates for production environments. -For non-critical prototyping or research projects, it might be sufficient to use the self-signed certificates generated using -the scripts mentioned in this guide. +This will generate the certificates in +``examples/advanced-tensorflow/.cache/certificates``. +The approach for generating SSL certificates in the context of this example can serve as +an inspiration and starting point, but it should not be used as a reference for +production environments. Please refer to other sources regarding the issue of correctly +generating certificates for production environments. For non-critical prototyping or +research projects, it might be sufficient to use the self-signed certificates generated +using the scripts mentioned in this guide. Server (SuperLink) ------------------ -Use the following terminal command to start a sever (SuperLink) that uses the previously generated certificates: +Use the following terminal command to start a sever (SuperLink) that uses the previously +generated certificates: .. code-block:: bash @@ -47,34 +49,36 @@ Use the following terminal command to start a sever (SuperLink) that uses the pr --ssl-certfile certificates/server.pem --ssl-keyfile certificates/server.key -When providing certificates, the server expects a tuple of three certificates paths: CA certificate, server certificate and server private key. - +When providing certificates, the server expects a tuple of three certificates paths: CA +certificate, server certificate and server private key. Client (SuperNode) ------------------ -Use the following terminal command to start a client (SuperNode) that uses the previously generated certificates: +Use the following terminal command to start a client (SuperNode) that uses the +previously generated certificates: .. code-block:: bash - flower-supernode - --root-certificates certificates/ca.crt - --superlink 127.0.0.1:9092 - -When setting :code:`root_certificates`, the client expects a file path to PEM-encoded root certificates. + flower-supernode + --root-certificates certificates/ca.crt + --superlink 127.0.0.1:9092 +When setting ``root_certificates``, the client expects a file path to PEM-encoded root +certificates. Conclusion ---------- -You should now have learned how to generate self-signed certificates using the given script, start an -SSL-enabled server and have a client establish a secure connection to it. - +You should now have learned how to generate self-signed certificates using the given +script, start an SSL-enabled server and have a client establish a secure connection to +it. Additional resources -------------------- -These additional sources might be relevant if you would like to dive deeper into the topic of certificates: +These additional sources might be relevant if you would like to dive deeper into the +topic of certificates: -* `Let's Encrypt `_ -* `certbot `_ +- `Let's Encrypt `_ +- `certbot `_ diff --git a/doc/source/how-to-implement-strategies.rst b/doc/source/how-to-implement-strategies.rst index 01bbb3042973..075d8a0116c4 100644 --- a/doc/source/how-to-implement-strategies.rst +++ b/doc/source/how-to-implement-strategies.rst @@ -1,22 +1,21 @@ Implement strategies ==================== -The strategy abstraction enables implementation of fully custom strategies. A -strategy is basically the federated learning algorithm that runs on the server. -Strategies decide how to sample clients, how to configure clients for training, -how to aggregate updates, and how to evaluate models. Flower provides a few -built-in strategies which are based on the same API described below. +The strategy abstraction enables implementation of fully custom strategies. A strategy +is basically the federated learning algorithm that runs on the server. Strategies decide +how to sample clients, how to configure clients for training, how to aggregate updates, +and how to evaluate models. Flower provides a few built-in strategies which are based on +the same API described below. -The :code:`Strategy` abstraction --------------------------------- +The ``Strategy`` abstraction +---------------------------- All strategy implementation are derived from the abstract base class -:code:`flwr.server.strategy.Strategy`, both built-in implementations and third -party implementations. This means that custom strategy implementations have the -exact same capabilities at their disposal as built-in ones. +``flwr.server.strategy.Strategy``, both built-in implementations and third party +implementations. This means that custom strategy implementations have the exact same +capabilities at their disposal as built-in ones. -The strategy abstraction defines a few abstract methods that need to be -implemented: +The strategy abstraction defines a few abstract methods that need to be implemented: .. code-block:: python @@ -31,10 +30,7 @@ implemented: @abstractmethod def configure_fit( - self, - server_round: int, - parameters: Parameters, - client_manager: ClientManager + self, server_round: int, parameters: Parameters, client_manager: ClientManager ) -> List[Tuple[ClientProxy, FitIns]]: """Configure the next round of training.""" @@ -49,10 +45,7 @@ implemented: @abstractmethod def configure_evaluate( - self, - server_round: int, - parameters: Parameters, - client_manager: ClientManager + self, server_round: int, parameters: Parameters, client_manager: ClientManager ) -> List[Tuple[ClientProxy, EvaluateIns]]: """Configure the next round of evaluation.""" @@ -71,31 +64,35 @@ implemented: ) -> Optional[Tuple[float, Dict[str, Scalar]]]: """Evaluate the current model parameters.""" - -Creating a new strategy means implementing a new :code:`class` (derived from the -abstract base class :code:`Strategy`) that implements for the previously shown -abstract methods: +Creating a new strategy means implementing a new ``class`` (derived from the abstract +base class ``Strategy``) that implements for the previously shown abstract methods: .. code-block:: python class SotaStrategy(Strategy): def initialize_parameters(self, client_manager): # Your implementation here + pass def configure_fit(self, server_round, parameters, client_manager): # Your implementation here + pass def aggregate_fit(self, server_round, results, failures): # Your implementation here + pass def configure_evaluate(self, server_round, parameters, client_manager): # Your implementation here + pass def aggregate_evaluate(self, server_round, results, failures): # Your implementation here + pass def evaluate(self, parameters): # Your implementation here + pass The Flower server calls these methods in the following order: @@ -176,12 +173,15 @@ The Flower server calls these methods in the following order: The following sections describe each of those methods in more detail. -The :code:`initialize_parameters` method ----------------------------------------- +The ``initialize_parameters`` method +------------------------------------ -:code:`initialize_parameters` is called only once, at the very beginning of an execution. It is responsible for providing the initial global model parameters in a serialized form (i.e., as a :code:`Parameters` object). +``initialize_parameters`` is called only once, at the very beginning of an execution. It +is responsible for providing the initial global model parameters in a serialized form +(i.e., as a ``Parameters`` object). -Built-in strategies return user-provided initial parameters. The following example shows how initial parameters can be passed to :code:`FedAvg`: +Built-in strategies return user-provided initial parameters. The following example shows +how initial parameters can be passed to ``FedAvg``: .. code-block:: python @@ -200,49 +200,68 @@ Built-in strategies return user-provided initial parameters. The following examp # Serialize ndarrays to `Parameters` parameters = fl.common.ndarrays_to_parameters(weights) - # Use the serialized parameters as the initial global parameters + # Use the serialized parameters as the initial global parameters strategy = fl.server.strategy.FedAvg( initial_parameters=parameters, ) fl.server.start_server(config=fl.server.ServerConfig(num_rounds=3), strategy=strategy) -The Flower server will call :code:`initialize_parameters`, which either returns the parameters that were passed to :code:`initial_parameters`, or :code:`None`. If no parameters are returned from :code:`initialize_parameters` (i.e., :code:`None`), the server will randomly select one client and ask it to provide its parameters. This is a convenience feature and not recommended in practice, but it can be useful for prototyping. In practice, it is recommended to always use server-side parameter initialization. +The Flower server will call ``initialize_parameters``, which either returns the +parameters that were passed to ``initial_parameters``, or ``None``. If no parameters are +returned from ``initialize_parameters`` (i.e., ``None``), the server will randomly +select one client and ask it to provide its parameters. This is a convenience feature +and not recommended in practice, but it can be useful for prototyping. In practice, it +is recommended to always use server-side parameter initialization. .. note:: - Server-side parameter initialization is a powerful mechanism. It can be used, for example, to resume training from a previously saved checkpoint. It is also the fundamental capability needed to implement hybrid approaches, for example, to fine-tune a pre-trained model using federated learning. + Server-side parameter initialization is a powerful mechanism. It can be used, for + example, to resume training from a previously saved checkpoint. It is also the + fundamental capability needed to implement hybrid approaches, for example, to + fine-tune a pre-trained model using federated learning. -The :code:`configure_fit` method --------------------------------- +The ``configure_fit`` method +---------------------------- -:code:`configure_fit` is responsible for configuring the upcoming round of training. What does *configure* mean in this context? Configuring a round means selecting clients and deciding what instructions to send to these clients. The signature of :code:`configure_fit` makes this clear: +``configure_fit`` is responsible for configuring the upcoming round of training. What +does *configure* mean in this context? Configuring a round means selecting clients and +deciding what instructions to send to these clients. The signature of ``configure_fit`` +makes this clear: .. code-block:: python @abstractmethod def configure_fit( - self, - server_round: int, - parameters: Parameters, - client_manager: ClientManager + self, server_round: int, parameters: Parameters, client_manager: ClientManager ) -> List[Tuple[ClientProxy, FitIns]]: """Configure the next round of training.""" -The return value is a list of tuples, each representing the instructions that will be sent to a particular client. Strategy implementations usually perform the following steps in :code:`configure_fit`: +The return value is a list of tuples, each representing the instructions that will be +sent to a particular client. Strategy implementations usually perform the following +steps in ``configure_fit``: -* Use the :code:`client_manager` to randomly sample all (or a subset of) available clients (each represented as a :code:`ClientProxy` object) -* Pair each :code:`ClientProxy` with the same :code:`FitIns` holding the current global model :code:`parameters` and :code:`config` dict +- Use the ``client_manager`` to randomly sample all (or a subset of) available clients + (each represented as a ``ClientProxy`` object) +- Pair each ``ClientProxy`` with the same ``FitIns`` holding the current global model + ``parameters`` and ``config`` dict -More sophisticated implementations can use :code:`configure_fit` to implement custom client selection logic. A client will only participate in a round if the corresponding :code:`ClientProxy` is included in the list returned from :code:`configure_fit`. +More sophisticated implementations can use ``configure_fit`` to implement custom client +selection logic. A client will only participate in a round if the corresponding +``ClientProxy`` is included in the list returned from ``configure_fit``. .. note:: - The structure of this return value provides a lot of flexibility to the user. Since instructions are defined on a per-client basis, different instructions can be sent to each client. This enables custom strategies to train, for example, different models on different clients, or use different hyperparameters on different clients (via the :code:`config` dict). + The structure of this return value provides a lot of flexibility to the user. Since + instructions are defined on a per-client basis, different instructions can be sent + to each client. This enables custom strategies to train, for example, different + models on different clients, or use different hyperparameters on different clients + (via the ``config`` dict). -The :code:`aggregate_fit` method --------------------------------- +The ``aggregate_fit`` method +---------------------------- -:code:`aggregate_fit` is responsible for aggregating the results returned by the clients that were selected and asked to train in :code:`configure_fit`. +``aggregate_fit`` is responsible for aggregating the results returned by the clients +that were selected and asked to train in ``configure_fit``. .. code-block:: python @@ -255,42 +274,58 @@ The :code:`aggregate_fit` method ) -> Tuple[Optional[Parameters], Dict[str, Scalar]]: """Aggregate training results.""" -Of course, failures can happen, so there is no guarantee that the server will get results from all the clients it sent instructions to (via :code:`configure_fit`). :code:`aggregate_fit` therefore receives a list of :code:`results`, but also a list of :code:`failures`. +Of course, failures can happen, so there is no guarantee that the server will get +results from all the clients it sent instructions to (via ``configure_fit``). +``aggregate_fit`` therefore receives a list of ``results``, but also a list of +``failures``. -:code:`aggregate_fit` returns an optional :code:`Parameters` object and a dictionary of aggregated metrics. The :code:`Parameters` return value is optional because :code:`aggregate_fit` might decide that the results provided are not sufficient for aggregation (e.g., too many failures). +``aggregate_fit`` returns an optional ``Parameters`` object and a dictionary of +aggregated metrics. The ``Parameters`` return value is optional because +``aggregate_fit`` might decide that the results provided are not sufficient for +aggregation (e.g., too many failures). -The :code:`configure_evaluate` method -------------------------------------- +The ``configure_evaluate`` method +--------------------------------- -:code:`configure_evaluate` is responsible for configuring the upcoming round of evaluation. What does *configure* mean in this context? Configuring a round means selecting clients and deciding what instructions to send to these clients. The signature of :code:`configure_evaluate` makes this clear: +``configure_evaluate`` is responsible for configuring the upcoming round of evaluation. +What does *configure* mean in this context? Configuring a round means selecting clients +and deciding what instructions to send to these clients. The signature of +``configure_evaluate`` makes this clear: .. code-block:: python @abstractmethod def configure_evaluate( - self, - server_round: int, - parameters: Parameters, - client_manager: ClientManager + self, server_round: int, parameters: Parameters, client_manager: ClientManager ) -> List[Tuple[ClientProxy, EvaluateIns]]: """Configure the next round of evaluation.""" -The return value is a list of tuples, each representing the instructions that will be sent to a particular client. Strategy implementations usually perform the following steps in :code:`configure_evaluate`: +The return value is a list of tuples, each representing the instructions that will be +sent to a particular client. Strategy implementations usually perform the following +steps in ``configure_evaluate``: -* Use the :code:`client_manager` to randomly sample all (or a subset of) available clients (each represented as a :code:`ClientProxy` object) -* Pair each :code:`ClientProxy` with the same :code:`EvaluateIns` holding the current global model :code:`parameters` and :code:`config` dict +- Use the ``client_manager`` to randomly sample all (or a subset of) available clients + (each represented as a ``ClientProxy`` object) +- Pair each ``ClientProxy`` with the same ``EvaluateIns`` holding the current global + model ``parameters`` and ``config`` dict -More sophisticated implementations can use :code:`configure_evaluate` to implement custom client selection logic. A client will only participate in a round if the corresponding :code:`ClientProxy` is included in the list returned from :code:`configure_evaluate`. +More sophisticated implementations can use ``configure_evaluate`` to implement custom +client selection logic. A client will only participate in a round if the corresponding +``ClientProxy`` is included in the list returned from ``configure_evaluate``. .. note:: - The structure of this return value provides a lot of flexibility to the user. Since instructions are defined on a per-client basis, different instructions can be sent to each client. This enables custom strategies to evaluate, for example, different models on different clients, or use different hyperparameters on different clients (via the :code:`config` dict). - + The structure of this return value provides a lot of flexibility to the user. Since + instructions are defined on a per-client basis, different instructions can be sent + to each client. This enables custom strategies to evaluate, for example, different + models on different clients, or use different hyperparameters on different clients + (via the ``config`` dict). -The :code:`aggregate_evaluate` method -------------------------------------- +The ``aggregate_evaluate`` method +--------------------------------- -:code:`aggregate_evaluate` is responsible for aggregating the results returned by the clients that were selected and asked to evaluate in :code:`configure_evaluate`. +``aggregate_evaluate`` is responsible for aggregating the results returned by the +clients that were selected and asked to evaluate in ``configure_evaluate``. .. code-block:: python @@ -303,21 +338,29 @@ The :code:`aggregate_evaluate` method ) -> Tuple[Optional[float], Dict[str, Scalar]]: """Aggregate evaluation results.""" -Of course, failures can happen, so there is no guarantee that the server will get results from all the clients it sent instructions to (via :code:`configure_evaluate`). :code:`aggregate_evaluate` therefore receives a list of :code:`results`, but also a list of :code:`failures`. +Of course, failures can happen, so there is no guarantee that the server will get +results from all the clients it sent instructions to (via ``configure_evaluate``). +``aggregate_evaluate`` therefore receives a list of ``results``, but also a list of +``failures``. -:code:`aggregate_evaluate` returns an optional :code:`float` (loss) and a dictionary of aggregated metrics. The :code:`float` return value is optional because :code:`aggregate_evaluate` might decide that the results provided are not sufficient for aggregation (e.g., too many failures). +``aggregate_evaluate`` returns an optional ``float`` (loss) and a dictionary of +aggregated metrics. The ``float`` return value is optional because +``aggregate_evaluate`` might decide that the results provided are not sufficient for +aggregation (e.g., too many failures). -The :code:`evaluate` method ---------------------------- +The ``evaluate`` method +----------------------- -:code:`evaluate` is responsible for evaluating model parameters on the server-side. Having :code:`evaluate` in addition to :code:`configure_evaluate`/:code:`aggregate_evaluate` enables strategies to perform both servers-side and client-side (federated) evaluation. +``evaluate`` is responsible for evaluating model parameters on the server-side. Having +``evaluate`` in addition to ``configure_evaluate``/``aggregate_evaluate`` enables +strategies to perform both servers-side and client-side (federated) evaluation. .. code-block:: python @abstractmethod - def evaluate( - self, parameters: Parameters - ) -> Optional[Tuple[float, Dict[str, Scalar]]]: + def evaluate(self, parameters: Parameters) -> Optional[Tuple[float, Dict[str, Scalar]]]: """Evaluate the current model parameters.""" -The return value is again optional because the strategy might not need to implement server-side evaluation or because the user-defined :code:`evaluate` method might not complete successfully (e.g., it might fail to load the server-side evaluation data). +The return value is again optional because the strategy might not need to implement +server-side evaluation or because the user-defined ``evaluate`` method might not +complete successfully (e.g., it might fail to load the server-side evaluation data). diff --git a/doc/source/how-to-install-flower.rst b/doc/source/how-to-install-flower.rst index a621377c8ce6..89cdf8b836cf 100644 --- a/doc/source/how-to-install-flower.rst +++ b/doc/source/how-to-install-flower.rst @@ -1,12 +1,11 @@ Install Flower ============== - Python version -------------- -Flower requires at least `Python 3.9 `_, but `Python 3.10 `_ or above is recommended. - +Flower requires at least `Python 3.9 `_, but `Python 3.10 +`_ or above is recommended. Install stable release ---------------------- @@ -14,45 +13,56 @@ Install stable release Using pip ~~~~~~~~~ -Stable releases are available on `PyPI `_:: +Stable releases are available on `PyPI `_: + +:: - python -m pip install flwr + python -m pip install flwr -For simulations that use the Virtual Client Engine, ``flwr`` should be installed with the ``simulation`` extra:: +For simulations that use the Virtual Client Engine, ``flwr`` should be installed with +the ``simulation`` extra: - python -m pip install "flwr[simulation]" +:: + python -m pip install "flwr[simulation]" Using conda (or mamba) ~~~~~~~~~~~~~~~~~~~~~~ Flower can also be installed from the ``conda-forge`` channel. -If you have not added ``conda-forge`` to your channels, you will first need to run the following:: +If you have not added ``conda-forge`` to your channels, you will first need to run the +following: + +:: + + conda config --add channels conda-forge + conda config --set channel_priority strict - conda config --add channels conda-forge - conda config --set channel_priority strict +Once the ``conda-forge`` channel has been enabled, ``flwr`` can be installed with +``conda``: -Once the ``conda-forge`` channel has been enabled, ``flwr`` can be installed with ``conda``:: +:: - conda install flwr + conda install flwr -or with ``mamba``:: +or with ``mamba``: - mamba install flwr +:: + mamba install flwr Verify installation ------------------- -The following command can be used to verify if Flower was successfully installed. If everything worked, it should print the version of Flower to the command line: +The following command can be used to verify if Flower was successfully installed. If +everything worked, it should print the version of Flower to the command line: .. code-block:: bash - :substitutions: - - python -c "import flwr;print(flwr.__version__)" - |stable_flwr_version| + :substitutions: + python -c "import flwr;print(flwr.__version__)" + |stable_flwr_version| Advanced installation options ----------------------------- @@ -65,21 +75,32 @@ Install via Docker Install pre-release ~~~~~~~~~~~~~~~~~~~ -New (possibly unstable) versions of Flower are sometimes available as pre-release versions (alpha, beta, release candidate) before the stable release happens:: +New (possibly unstable) versions of Flower are sometimes available as pre-release +versions (alpha, beta, release candidate) before the stable release happens: + +:: + + python -m pip install -U --pre flwr - python -m pip install -U --pre flwr +For simulations that use the Virtual Client Engine, ``flwr`` pre-releases should be +installed with the ``simulation`` extra: -For simulations that use the Virtual Client Engine, ``flwr`` pre-releases should be installed with the ``simulation`` extra:: +:: - python -m pip install -U --pre 'flwr[simulation]' + python -m pip install -U --pre 'flwr[simulation]' Install nightly release ~~~~~~~~~~~~~~~~~~~~~~~ -The latest (potentially unstable) changes in Flower are available as nightly releases:: +The latest (potentially unstable) changes in Flower are available as nightly releases: + +:: + + python -m pip install -U flwr-nightly - python -m pip install -U flwr-nightly +For simulations that use the Virtual Client Engine, ``flwr-nightly`` should be installed +with the ``simulation`` extra: -For simulations that use the Virtual Client Engine, ``flwr-nightly`` should be installed with the ``simulation`` extra:: +:: - python -m pip install -U flwr-nightly[simulation] + python -m pip install -U flwr-nightly[simulation] diff --git a/doc/source/how-to-monitor-simulation.rst b/doc/source/how-to-monitor-simulation.rst index f6c26a701d94..f540e22a6a77 100644 --- a/doc/source/how-to-monitor-simulation.rst +++ b/doc/source/how-to-monitor-simulation.rst @@ -1,109 +1,120 @@ Monitor simulation ================== -Flower allows you to monitor system resources while running your simulation. Moreover, the Flower simulation engine is powerful and enables you to decide how to allocate resources per client manner and constrain the total usage. Insights from resource consumption can help you make smarter decisions and speed up the execution time. - -The specific instructions assume you are using macOS and have the `Homebrew `_ package manager installed. +Flower allows you to monitor system resources while running your simulation. Moreover, +the Flower simulation engine is powerful and enables you to decide how to allocate +resources per client manner and constrain the total usage. Insights from resource +consumption can help you make smarter decisions and speed up the execution time. +The specific instructions assume you are using macOS and have the `Homebrew +`_ package manager installed. Downloads --------- .. code-block:: bash - brew install prometheus grafana + brew install prometheus grafana -`Prometheus `_ is used for data collection, while `Grafana `_ will enable you to visualize the collected data. They are both well integrated with `Ray `_ which Flower uses under the hood. +`Prometheus `_ is used for data collection, while `Grafana +`_ will enable you to visualize the collected data. They are both +well integrated with `Ray `_ which Flower uses under the hood. -Overwrite the configuration files (depending on your device, it might be installed on a different path). +Overwrite the configuration files (depending on your device, it might be installed on a +different path). If you are on an M1 Mac, it should be: .. code-block:: bash - /opt/homebrew/etc/prometheus.yml - /opt/homebrew/etc/grafana/grafana.ini + /opt/homebrew/etc/prometheus.yml + /opt/homebrew/etc/grafana/grafana.ini On the previous generation Intel Mac devices, it should be: .. code-block:: bash - /usr/local/etc/prometheus.yml - /usr/local/etc/grafana/grafana.ini + /usr/local/etc/prometheus.yml + /usr/local/etc/grafana/grafana.ini -Open the respective configuration files and change them. Depending on your device, use one of the two following commands: +Open the respective configuration files and change them. Depending on your device, use +one of the two following commands: .. code-block:: bash - # M1 macOS - open /opt/homebrew/etc/prometheus.yml + # M1 macOS + open /opt/homebrew/etc/prometheus.yml - # Intel macOS - open /usr/local/etc/prometheus.yml + # Intel macOS + open /usr/local/etc/prometheus.yml -and then delete all the text in the file and paste a new Prometheus config you see below. You may adjust the time intervals to your requirements: +and then delete all the text in the file and paste a new Prometheus config you see +below. You may adjust the time intervals to your requirements: .. code-block:: bash - global: - scrape_interval: 1s - evaluation_interval: 1s + global: + scrape_interval: 1s + evaluation_interval: 1s - scrape_configs: - # Scrape from each ray node as defined in the service_discovery.json provided by ray. - - job_name: 'ray' - file_sd_configs: - - files: - - '/tmp/ray/prom_metrics_service_discovery.json' + scrape_configs: + # Scrape from each ray node as defined in the service_discovery.json provided by ray. + - job_name: 'ray' + file_sd_configs: + - files: + - '/tmp/ray/prom_metrics_service_discovery.json' -Now after you have edited the Prometheus configuration, do the same with the Grafana configuration files. Open those using one of the following commands as before: +Now after you have edited the Prometheus configuration, do the same with the Grafana +configuration files. Open those using one of the following commands as before: .. code-block:: python - # M1 macOS - open /opt/homebrew/etc/grafana/grafana.ini + # M1 macOS + open / opt / homebrew / etc / grafana / grafana.ini - # Intel macOS - open /usr/local/etc/grafana/grafana.ini + # Intel macOS + open / usr / local / etc / grafana / grafana.ini -Your terminal editor should open and allow you to apply the following configuration as before. +Your terminal editor should open and allow you to apply the following configuration as +before. .. code-block:: bash - [security] - allow_embedding = true - - [auth.anonymous] - enabled = true - org_name = Main Org. - org_role = Viewer + [security] + allow_embedding = true - [paths] - provisioning = /tmp/ray/session_latest/metrics/grafana/provisioning + [auth.anonymous] + enabled = true + org_name = Main Org. + org_role = Viewer -Congratulations, you just downloaded all the necessary software needed for metrics tracking. Now, let’s start it. + [paths] + provisioning = /tmp/ray/session_latest/metrics/grafana/provisioning +Congratulations, you just downloaded all the necessary software needed for metrics +tracking. Now, let’s start it. Tracking metrics ---------------- -Before running your Flower simulation, you have to start the monitoring tools you have just installed and configured. +Before running your Flower simulation, you have to start the monitoring tools you have +just installed and configured. .. code-block:: bash - brew services start prometheus - brew services start grafana + brew services start prometheus + brew services start grafana Please include the following argument in your Python code when starting a simulation. .. code-block:: python - fl.simulation.start_simulation( - # ... - # all the args you used before - # ... - ray_init_args = {"include_dashboard": True} - ) + fl.simulation.start_simulation( + # ... + # all the args you used before + # ... + ray_init_args={"include_dashboard": True} + ) Now, you are ready to start your workload. @@ -111,126 +122,140 @@ Shortly after the simulation starts, you should see the following logs in your t .. code-block:: bash - 2023-01-20 16:22:58,620 INFO [worker.py:1529](http://worker.py:1529/) -- Started a local Ray instance. View the dashboard at http://127.0.0.1:8265 - + 2023-01-20 16:22:58,620 INFO [worker.py:1529](http://worker.py:1529/) -- Started a local Ray instance. View the dashboard at http://127.0.0.1:8265 -You can look at everything at ``_ . +You can look at everything at http://127.0.0.1:8265 . -It's a Ray Dashboard. You can navigate to Metrics (on the left panel, the lowest option). +It's a Ray Dashboard. You can navigate to Metrics (on the left panel, the lowest +option). -Or alternatively, you can just see them in Grafana by clicking on the right-up corner, “View in Grafana”. Please note that the Ray dashboard is only accessible during the simulation. After the simulation ends, you can only use Grafana to explore the metrics. You can start Grafana by going to ``http://localhost:3000/``. +Or alternatively, you can just see them in Grafana by clicking on the right-up corner, +“View in Grafana”. Please note that the Ray dashboard is only accessible during the +simulation. After the simulation ends, you can only use Grafana to explore the metrics. +You can start Grafana by going to ``http://localhost:3000/``. -After you finish the visualization, stop Prometheus and Grafana. This is important as they will otherwise block, for example port :code:`3000` on your machine as long as they are running. +After you finish the visualization, stop Prometheus and Grafana. This is important as +they will otherwise block, for example port ``3000`` on your machine as long as they are +running. .. code-block:: bash - brew services stop prometheus - brew services stop grafana - + brew services stop prometheus + brew services stop grafana Resource allocation ------------------- -You must understand how the Ray library works to efficiently allocate system resources to simulation clients on your own. +You must understand how the Ray library works to efficiently allocate system resources +to simulation clients on your own. -Initially, the simulation (which Ray handles under the hood) starts by default with all the available resources on the system, which it shares among the clients. It doesn't mean it divides it equally among all of them, nor that the model training happens at all of them simultaneously. You will learn more about that in the later part of this blog. You can check the system resources by running the following: +Initially, the simulation (which Ray handles under the hood) starts by default with all +the available resources on the system, which it shares among the clients. It doesn't +mean it divides it equally among all of them, nor that the model training happens at all +of them simultaneously. You will learn more about that in the later part of this blog. +You can check the system resources by running the following: .. code-block:: python - import ray - ray.available_resources() + import ray + + ray.available_resources() In Google Colab, the result you see might be similar to this: .. code-block:: bash - {'memory': 8020104807.0, - 'GPU': 1.0, - 'object_store_memory': 4010052403.0, - 'CPU': 2.0, - 'accelerator_type:T4': 1.0, - 'node:172.28.0.2': 1.0} + {'memory': 8020104807.0, + 'GPU': 1.0, + 'object_store_memory': 4010052403.0, + 'CPU': 2.0, + 'accelerator_type:T4': 1.0, + 'node:172.28.0.2': 1.0} - -However, you can overwrite the defaults. When starting a simulation, do the following (you don't need to overwrite all of them): +However, you can overwrite the defaults. When starting a simulation, do the following +(you don't need to overwrite all of them): .. code-block:: python - num_cpus = 2 - num_gpus = 1 - ram_memory = 16_000 * 1024 * 1024 # 16 GB - fl.simulation.start_simulation( - # ... - # all the args you were specifying before - # ... - ray_init_args = { - "include_dashboard": True, # we need this one for tracking - "num_cpus": num_cpus, - "num_gpus": num_gpus, - "memory": ram_memory, - } - ) - + num_cpus = 2 + num_gpus = 1 + ram_memory = 16_000 * 1024 * 1024 # 16 GB + fl.simulation.start_simulation( + # ... + # all the args you were specifying before + # ... + ray_init_args={ + "include_dashboard": True, # we need this one for tracking + "num_cpus": num_cpus, + "num_gpus": num_gpus, + "memory": ram_memory, + } + ) Let’s also specify the resource for a single client. .. code-block:: python - # Total resources for simulation - num_cpus = 4 - num_gpus = 1 - ram_memory = 16_000 * 1024 * 1024 # 16 GB - - # Single client resources - client_num_cpus = 2 - client_num_gpus = 1 - - fl.simulation.start_simulation( - # ... - # all the args you were specifying before - # ... - ray_init_args = { - "include_dashboard": True, # we need this one for tracking - "num_cpus": num_cpus, - "num_gpus": num_gpus, - "memory": ram_memory, - }, - # The argument below is new - client_resources = { - "num_cpus": client_num_cpus, - "num_gpus": client_num_gpus, - } - ) - -Now comes the crucial part. Ray will start a new client only when it has all the required resources (such that they run in parallel) when the resources allow. - -In the example above, only one client will be run, so your clients won't run concurrently. Setting :code:`client_num_gpus = 0.5` would allow running two clients and therefore enable them to run concurrently. -Be careful not to require more resources than available. If you specified :code:`client_num_gpus = 2`, the simulation wouldn't start (even if you had 2 GPUs but decided to set 1 in :code:`ray_init_args`). - + # Total resources for simulation + num_cpus = 4 + num_gpus = 1 + ram_memory = 16_000 * 1024 * 1024 # 16 GB + + # Single client resources + client_num_cpus = 2 + client_num_gpus = 1 + + fl.simulation.start_simulation( + # ... + # all the args you were specifying before + # ... + ray_init_args={ + "include_dashboard": True, # we need this one for tracking + "num_cpus": num_cpus, + "num_gpus": num_gpus, + "memory": ram_memory, + }, + # The argument below is new + client_resources={ + "num_cpus": client_num_cpus, + "num_gpus": client_num_gpus, + }, + ) + +Now comes the crucial part. Ray will start a new client only when it has all the +required resources (such that they run in parallel) when the resources allow. + +In the example above, only one client will be run, so your clients won't run +concurrently. Setting ``client_num_gpus = 0.5`` would allow running two clients and +therefore enable them to run concurrently. Be careful not to require more resources than +available. If you specified ``client_num_gpus = 2``, the simulation wouldn't start (even +if you had 2 GPUs but decided to set 1 in ``ray_init_args``). FAQ --- Q: I don't see any metrics logged. -A: The timeframe might not be properly set. The setting is in the top right corner ("Last 30 minutes" by default). Please change the timeframe to reflect the period when the simulation was running. +A: The timeframe might not be properly set. The setting is in the top right corner +("Last 30 minutes" by default). Please change the timeframe to reflect the period when +the simulation was running. -Q: I see “Grafana server not detected. Please make sure the Grafana server is running and refresh this page” after going to the Metrics tab in Ray Dashboard. +Q: I see “Grafana server not detected. Please make sure the Grafana server is running +and refresh this page” after going to the Metrics tab in Ray Dashboard. A: You probably don't have Grafana running. Please check the running services .. code-block:: bash - brew services list + brew services list -Q: I see "This site can't be reached" when going to ``_. +Q: I see "This site can't be reached" when going to http://127.0.0.1:8265. A: Either the simulation has already finished, or you still need to start Prometheus. - Resources --------- -Ray Dashboard: ``_ +Ray Dashboard: https://docs.ray.io/en/latest/ray-observability/getting-started.html -Ray Metrics: ``_ +Ray Metrics: https://docs.ray.io/en/latest/cluster/metrics.html diff --git a/doc/source/how-to-run-simulations.rst b/doc/source/how-to-run-simulations.rst index d1dcb511ed51..fb4eed17b4e7 100644 --- a/doc/source/how-to-run-simulations.rst +++ b/doc/source/how-to-run-simulations.rst @@ -1,48 +1,85 @@ Run simulations =============== -.. youtube:: cRebUIGB5RU - :url_parameters: ?list=PLNG4feLHqCWlnj8a_E1A_n5zr2-8pafTB - :width: 100% - -Simulating Federated Learning workloads is useful for a multitude of use-cases: you might want to run your workload on a large cohort of clients but without having to source, configure and mange a large number of physical devices; you might want to run your FL workloads as fast as possible on the compute systems you have access to without having to go through a complex setup process; you might want to validate your algorithm on different scenarios at varying levels of data and system heterogeneity, client availability, privacy budgets, etc. These are among some of the use-cases where simulating FL workloads makes sense. Flower can accommodate these scenarios by means of its `VirtualClientEngine `_ or VCE. - -The :code:`VirtualClientEngine` schedules, launches and manages `virtual` clients. These clients are identical to `non-virtual` clients (i.e. the ones you launch via the command `flwr.client.start_client `_) in the sense that they can be configure by creating a class inheriting, for example, from `flwr.client.NumPyClient `_ and therefore behave in an identical way. In addition to that, clients managed by the :code:`VirtualClientEngine` are: - -* resource-aware: this means that each client gets assigned a portion of the compute and memory on your system. You as a user can control this at the beginning of the simulation and allows you to control the degree of parallelism of your Flower FL simulation. The fewer the resources per client, the more clients can run concurrently on the same hardware. -* self-managed: this means that you as a user do not need to launch clients manually, instead this gets delegated to :code:`VirtualClientEngine`'s internals. -* ephemeral: this means that a client is only materialized when it is required in the FL process (e.g. to do `fit() `_). The object is destroyed afterwards, releasing the resources it was assigned and allowing in this way other clients to participate. - -The :code:`VirtualClientEngine` implements `virtual` clients using `Ray `_, an open-source framework for scalable Python workloads. In particular, Flower's :code:`VirtualClientEngine` makes use of `Actors `_ to spawn `virtual` clients and run their workload. - +.. youtube:: cRebUIGB5RU + :url_parameters: ?list=PLNG4feLHqCWlnj8a_E1A_n5zr2-8pafTB + :width: 100% + +Simulating Federated Learning workloads is useful for a multitude of use-cases: you +might want to run your workload on a large cohort of clients but without having to +source, configure and mange a large number of physical devices; you might want to run +your FL workloads as fast as possible on the compute systems you have access to without +having to go through a complex setup process; you might want to validate your algorithm +on different scenarios at varying levels of data and system heterogeneity, client +availability, privacy budgets, etc. These are among some of the use-cases where +simulating FL workloads makes sense. Flower can accommodate these scenarios by means of +its `VirtualClientEngine +`_ or VCE. + +The ``VirtualClientEngine`` schedules, launches and manages `virtual` clients. These +clients are identical to `non-virtual` clients (i.e. the ones you launch via the command +`flwr.client.start_client `_) in the sense that they can +be configure by creating a class inheriting, for example, from `flwr.client.NumPyClient +`_ and therefore behave in an identical way. +In addition to that, clients managed by the ``VirtualClientEngine`` are: + +- resource-aware: this means that each client gets assigned a portion of the compute and + memory on your system. You as a user can control this at the beginning of the + simulation and allows you to control the degree of parallelism of your Flower FL + simulation. The fewer the resources per client, the more clients can run concurrently + on the same hardware. +- self-managed: this means that you as a user do not need to launch clients manually, + instead this gets delegated to ``VirtualClientEngine``'s internals. +- ephemeral: this means that a client is only materialized when it is required in the FL + process (e.g. to do `fit() `_). The object + is destroyed afterwards, releasing the resources it was assigned and allowing in this + way other clients to participate. + +The ``VirtualClientEngine`` implements `virtual` clients using `Ray +`_, an open-source framework for scalable Python workloads. In +particular, Flower's ``VirtualClientEngine`` makes use of `Actors +`_ to spawn `virtual` clients and +run their workload. Launch your Flower simulation ----------------------------- -Running Flower simulations still require you to define your client class, a strategy, and utility functions to download and load (and potentially partition) your dataset. With that out of the way, launching your simulation is done with `start_simulation `_ and a minimal example looks as follows: - +Running Flower simulations still require you to define your client class, a strategy, +and utility functions to download and load (and potentially partition) your dataset. +With that out of the way, launching your simulation is done with `start_simulation +`_ and a minimal example looks as +follows: .. code-block:: python import flwr as fl from flwr.server.strategy import FedAvg - + + def client_fn(cid: str): # Return a standard Flower client return MyFlowerClient().to_client() + # Launch the simulation hist = fl.simulation.start_simulation( - client_fn=client_fn, # A function to run a _virtual_ client when required - num_clients=50, # Total number of clients available - config=fl.server.ServerConfig(num_rounds=3), # Specify number of FL rounds - strategy=FedAvg() # A Flower strategy + client_fn=client_fn, # A function to run a _virtual_ client when required + num_clients=50, # Total number of clients available + config=fl.server.ServerConfig(num_rounds=3), # Specify number of FL rounds + strategy=FedAvg(), # A Flower strategy ) - VirtualClientEngine resources ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -By default the VCE has access to all system resources (i.e. all CPUs, all GPUs, etc) since that is also the default behavior when starting Ray. However, in some settings you might want to limit how many of your system resources are used for simulation. You can do this via the :code:`ray_init_args` input argument to :code:`start_simulation` which the VCE internally passes to Ray's :code:`ray.init` command. For a complete list of settings you can configure check the `ray.init `_ documentation. Do not set :code:`ray_init_args` if you want the VCE to use all your system's CPUs and GPUs. + +By default the VCE has access to all system resources (i.e. all CPUs, all GPUs, etc) +since that is also the default behavior when starting Ray. However, in some settings you +might want to limit how many of your system resources are used for simulation. You can +do this via the ``ray_init_args`` input argument to ``start_simulation`` which the VCE +internally passes to Ray's ``ray.init`` command. For a complete list of settings you can +configure check the `ray.init +`_ documentation. +Do not set ``ray_init_args`` if you want the VCE to use all your system's CPUs and GPUs. .. code-block:: python @@ -50,22 +87,28 @@ By default the VCE has access to all system resources (i.e. all CPUs, all GPUs, # Launch the simulation by limiting resources visible to Flower's VCE hist = fl.simulation.start_simulation( - ... + # ... # Out of all CPUs and GPUs available in your system, # only 8xCPUs and 1xGPUs would be used for simulation. - ray_init_args = {'num_cpus': 8, 'num_gpus': 1} + ray_init_args={"num_cpus": 8, "num_gpus": 1} ) - - Assigning client resources ~~~~~~~~~~~~~~~~~~~~~~~~~~ -By default the :code:`VirtualClientEngine` assigns a single CPU core (and nothing else) to each virtual client. This means that if your system has 10 cores, that many virtual clients can be concurrently running. -More often than not, you would probably like to adjust the resources your clients get assigned based on the complexity (i.e. compute and memory footprint) of your FL workload. You can do so when starting your simulation by setting the argument `client_resources` to `start_simulation `_. Two keys are internally used by Ray to schedule and spawn workloads (in our case Flower clients): +By default the ``VirtualClientEngine`` assigns a single CPU core (and nothing else) to +each virtual client. This means that if your system has 10 cores, that many virtual +clients can be concurrently running. -* :code:`num_cpus` indicates the number of CPU cores a client would get. -* :code:`num_gpus` indicates the **ratio** of GPU memory a client gets assigned. +More often than not, you would probably like to adjust the resources your clients get +assigned based on the complexity (i.e. compute and memory footprint) of your FL +workload. You can do so when starting your simulation by setting the argument +`client_resources` to `start_simulation +`_. Two keys are internally used by +Ray to schedule and spawn workloads (in our case Flower clients): + +- ``num_cpus`` indicates the number of CPU cores a client would get. +- ``num_gpus`` indicates the **ratio** of GPU memory a client gets assigned. Let's see a few examples: @@ -74,90 +117,140 @@ Let's see a few examples: import flwr as fl # each client gets 1xCPU (this is the default if no resources are specified) - my_client_resources = {'num_cpus': 1, 'num_gpus': 0.0} + my_client_resources = {"num_cpus": 1, "num_gpus": 0.0} # each client gets 2xCPUs and half a GPU. (with a single GPU, 2 clients run concurrently) - my_client_resources = {'num_cpus': 2, 'num_gpus': 0.5} + my_client_resources = {"num_cpus": 2, "num_gpus": 0.5} # 10 client can run concurrently on a single GPU, but only if you have 20 CPU threads. - my_client_resources = {'num_cpus': 2, 'num_gpus': 0.1} + my_client_resources = {"num_cpus": 2, "num_gpus": 0.1} # Launch the simulation hist = fl.simulation.start_simulation( - ... - client_resources = my_client_resources # A Python dict specifying CPU/GPU resources + # ... + client_resources=my_client_resources # A Python dict specifying CPU/GPU resources ) -While the :code:`client_resources` can be used to control the degree of concurrency in your FL simulation, this does not stop you from running dozens, hundreds or even thousands of clients in the same round and having orders of magnitude more `dormant` (i.e. not participating in a round) clients. Let's say you want to have 100 clients per round but your system can only accommodate 8 clients concurrently. The :code:`VirtualClientEngine` will schedule 100 jobs to run (each simulating a client sampled by the strategy) and then will execute them in a resource-aware manner in batches of 8. +While the ``client_resources`` can be used to control the degree of concurrency in your +FL simulation, this does not stop you from running dozens, hundreds or even thousands of +clients in the same round and having orders of magnitude more `dormant` (i.e. not +participating in a round) clients. Let's say you want to have 100 clients per round but +your system can only accommodate 8 clients concurrently. The ``VirtualClientEngine`` +will schedule 100 jobs to run (each simulating a client sampled by the strategy) and +then will execute them in a resource-aware manner in batches of 8. -To understand all the intricate details on how resources are used to schedule FL clients and how to define custom resources, please take a look at the `Ray documentation `_. +To understand all the intricate details on how resources are used to schedule FL clients +and how to define custom resources, please take a look at the `Ray documentation +`_. Simulation examples ~~~~~~~~~~~~~~~~~~~ -A few ready-to-run complete examples for Flower simulation in Tensorflow/Keras and PyTorch are provided in the `Flower repository `_. You can run them on Google Colab too: - -* `Tensorflow/Keras Simulation `_: 100 clients collaboratively train a MLP model on MNIST. -* `PyTorch Simulation `_: 100 clients collaboratively train a CNN model on MNIST. - +A few ready-to-run complete examples for Flower simulation in Tensorflow/Keras and +PyTorch are provided in the `Flower repository `_. You +can run them on Google Colab too: +- `Tensorflow/Keras Simulation + `_: 100 + clients collaboratively train a MLP model on MNIST. +- `PyTorch Simulation + `_: 100 clients + collaboratively train a CNN model on MNIST. Multi-node Flower simulations ----------------------------- -Flower's :code:`VirtualClientEngine` allows you to run FL simulations across multiple compute nodes. Before starting your multi-node simulation ensure that you: - -#. Have the same Python environment in all nodes. -#. Have a copy of your code (e.g. your entire repo) in all nodes. -#. Have a copy of your dataset in all nodes (more about this in :ref:`simulation considerations `) -#. Pass :code:`ray_init_args={"address"="auto"}` to `start_simulation `_ so the :code:`VirtualClientEngine` attaches to a running Ray instance. -#. Start Ray on you head node: on the terminal type :code:`ray start --head`. This command will print a few lines, one of which indicates how to attach other nodes to the head node. -#. Attach other nodes to the head node: copy the command shown after starting the head and execute it on terminal of a new node: for example :code:`ray start --address='192.168.1.132:6379'` - -With all the above done, you can run your code from the head node as you would if the simulation was running on a single node. - -Once your simulation is finished, if you'd like to dismantle your cluster you simply need to run the command :code:`ray stop` in each node's terminal (including the head node). +Flower's ``VirtualClientEngine`` allows you to run FL simulations across multiple +compute nodes. Before starting your multi-node simulation ensure that you: + +1. Have the same Python environment in all nodes. +2. Have a copy of your code (e.g. your entire repo) in all nodes. +3. Have a copy of your dataset in all nodes (more about this in :ref:`simulation + considerations `) +4. Pass ``ray_init_args={"address"="auto"}`` to `start_simulation + `_ so the ``VirtualClientEngine`` + attaches to a running Ray instance. +5. Start Ray on you head node: on the terminal type ``ray start --head``. This command + will print a few lines, one of which indicates how to attach other nodes to the head + node. +6. Attach other nodes to the head node: copy the command shown after starting the head + and execute it on terminal of a new node: for example ``ray start + --address='192.168.1.132:6379'`` + +With all the above done, you can run your code from the head node as you would if the +simulation was running on a single node. + +Once your simulation is finished, if you'd like to dismantle your cluster you simply +need to run the command ``ray stop`` in each node's terminal (including the head node). Multi-node simulation good-to-know ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Here we list a few interesting functionality when running multi-node FL simulations: -User :code:`ray status` to check all nodes connected to your head node as well as the total resources available to the :code:`VirtualClientEngine`. +User ``ray status`` to check all nodes connected to your head node as well as the total +resources available to the ``VirtualClientEngine``. -When attaching a new node to the head, all its resources (i.e. all CPUs, all GPUs) will be visible by the head node. This means that the :code:`VirtualClientEngine` can schedule as many `virtual` clients as that node can possible run. In some settings you might want to exclude certain resources from the simulation. You can do this by appending `--num-cpus=` and/or `--num-gpus=` in any :code:`ray start` command (including when starting the head) +When attaching a new node to the head, all its resources (i.e. all CPUs, all GPUs) will +be visible by the head node. This means that the ``VirtualClientEngine`` can schedule as +many `virtual` clients as that node can possible run. In some settings you might want to +exclude certain resources from the simulation. You can do this by appending +`--num-cpus=` and/or `--num-gpus=` in any ``ray +start`` command (including when starting the head) .. _considerations-for-simulations: - Considerations for simulations ------------------------------ .. note:: - We are actively working on these fronts so to make it trivial to run any FL workload with Flower simulation. + We are actively working on these fronts so to make it trivial to run any FL workload + with Flower simulation. -The current VCE allows you to run Federated Learning workloads in simulation mode whether you are prototyping simple scenarios on your personal laptop or you want to train a complex FL pipeline across multiple high-performance GPU nodes. While we add more capabilities to the VCE, the points below highlight some of the considerations to keep in mind when designing your FL pipeline with Flower. We also highlight a couple of current limitations in our implementation. +The current VCE allows you to run Federated Learning workloads in simulation mode +whether you are prototyping simple scenarios on your personal laptop or you want to +train a complex FL pipeline across multiple high-performance GPU nodes. While we add +more capabilities to the VCE, the points below highlight some of the considerations to +keep in mind when designing your FL pipeline with Flower. We also highlight a couple of +current limitations in our implementation. GPU resources ~~~~~~~~~~~~~ -The VCE assigns a share of GPU memory to a client that specifies the key :code:`num_gpus` in :code:`client_resources`. This being said, Ray (used internally by the VCE) is by default: - - -* not aware of the total VRAM available on the GPUs. This means that if you set :code:`num_gpus=0.5` and you have two GPUs in your system with different (e.g. 32GB and 8GB) VRAM amounts, they both would run 2 clients concurrently. -* not aware of other unrelated (i.e. not created by the VCE) workloads are running on the GPU. Two takeaways from this are: +The VCE assigns a share of GPU memory to a client that specifies the key ``num_gpus`` in +``client_resources``. This being said, Ray (used internally by the VCE) is by default: - * Your Flower server might need a GPU to evaluate the `global model` after aggregation (by instance when making use of the `evaluate method `_) - * If you want to run several independent Flower simulations on the same machine you need to mask-out your GPUs with :code:`CUDA_VISIBLE_DEVICES=""` when launching your experiment. +- not aware of the total VRAM available on the GPUs. This means that if you set + ``num_gpus=0.5`` and you have two GPUs in your system with different (e.g. 32GB and + 8GB) VRAM amounts, they both would run 2 clients concurrently. +- not aware of other unrelated (i.e. not created by the VCE) workloads are running on + the GPU. Two takeaways from this are: + - Your Flower server might need a GPU to evaluate the `global model` after aggregation + (by instance when making use of the `evaluate method + `_) + - If you want to run several independent Flower simulations on the same machine you + need to mask-out your GPUs with ``CUDA_VISIBLE_DEVICES=""`` when launching + your experiment. -In addition, the GPU resource limits passed to :code:`client_resources` are not `enforced` (i.e. they can be exceeded) which can result in the situation of client using more VRAM than the ratio specified when starting the simulation. +In addition, the GPU resource limits passed to ``client_resources`` are not `enforced` +(i.e. they can be exceeded) which can result in the situation of client using more VRAM +than the ratio specified when starting the simulation. TensorFlow with GPUs -"""""""""""""""""""" +++++++++++++++++++++ -When `using a GPU with TensorFlow `_ nearly your entire GPU memory of all your GPUs visible to the process will be mapped. This is done by TensorFlow for optimization purposes. However, in settings such as FL simulations where we want to split the GPU into multiple `virtual` clients, this is not a desirable mechanism. Luckily we can disable this default behavior by `enabling memory growth `_. +When `using a GPU with TensorFlow `_ nearly your +entire GPU memory of all your GPUs visible to the process will be mapped. This is done +by TensorFlow for optimization purposes. However, in settings such as FL simulations +where we want to split the GPU into multiple `virtual` clients, this is not a desirable +mechanism. Luckily we can disable this default behavior by `enabling memory growth +`_. -This would need to be done in the main process (which is where the server would run) and in each Actor created by the VCE. By means of :code:`actor_kwargs` we can pass the reserved key `"on_actor_init_fn"` in order to specify a function to be executed upon actor initialization. In this case, to enable GPU growth for TF workloads. It would look as follows: +This would need to be done in the main process (which is where the server would run) and +in each Actor created by the VCE. By means of ``actor_kwargs`` we can pass the reserved +key `"on_actor_init_fn"` in order to specify a function to be executed upon actor +initialization. In this case, to enable GPU growth for TF workloads. It would look as +follows: .. code-block:: python @@ -170,19 +263,29 @@ This would need to be done in the main process (which is where the server would # Start Flower simulation hist = fl.simulation.start_simulation( - ... + # ... actor_kwargs={ - "on_actor_init_fn": enable_tf_gpu_growth # <-- To be executed upon actor init. + "on_actor_init_fn": enable_tf_gpu_growth # <-- To be executed upon actor init. }, ) -This is precisely the mechanism used in `Tensorflow/Keras Simulation `_ example. - +This is precisely the mechanism used in `Tensorflow/Keras Simulation +`_ example. Multi-node setups ~~~~~~~~~~~~~~~~~ -* The VCE does not currently offer a way to control on which node a particular `virtual` client is executed. In other words, if more than a single node have the resources needed by a client to run, then any of those nodes could get the client workload scheduled onto. Later in the FL process (i.e. in a different round) the same client could be executed by a different node. Depending on how your clients access their datasets, this might require either having a copy of all dataset partitions on all nodes or a dataset serving mechanism (e.g. using nfs, a database) to circumvent data duplication. - -* By definition virtual clients are `stateless` due to their ephemeral nature. A client state can be implemented as part of the Flower client class but users need to ensure this saved to persistent storage (e.g. a database, disk) and that can be retrieve later by the same client regardless on which node it is running from. This is related to the point above also since, in some way, the client's dataset could be seen as a type of `state`. - +- The VCE does not currently offer a way to control on which node a particular `virtual` + client is executed. In other words, if more than a single node have the resources + needed by a client to run, then any of those nodes could get the client workload + scheduled onto. Later in the FL process (i.e. in a different round) the same client + could be executed by a different node. Depending on how your clients access their + datasets, this might require either having a copy of all dataset partitions on all + nodes or a dataset serving mechanism (e.g. using nfs, a database) to circumvent data + duplication. +- By definition virtual clients are `stateless` due to their ephemeral nature. A client + state can be implemented as part of the Flower client class but users need to ensure + this saved to persistent storage (e.g. a database, disk) and that can be retrieve + later by the same client regardless on which node it is running from. This is related + to the point above also since, in some way, the client's dataset could be seen as a + type of `state`. diff --git a/doc/source/how-to-save-and-load-model-checkpoints.rst b/doc/source/how-to-save-and-load-model-checkpoints.rst index 0d711e375cd8..f2f12dae97be 100644 --- a/doc/source/how-to-save-and-load-model-checkpoints.rst +++ b/doc/source/how-to-save-and-load-model-checkpoints.rst @@ -1,17 +1,19 @@ Save and load model checkpoints =============================== -Flower does not automatically save model updates on the server-side. This how-to guide describes the steps to save (and load) model checkpoints in Flower. - +Flower does not automatically save model updates on the server-side. This how-to guide +describes the steps to save (and load) model checkpoints in Flower. Model checkpointing ------------------- -Model updates can be persisted on the server-side by customizing :code:`Strategy` methods. -Implementing custom strategies is always an option, but for many cases it may be more convenient to simply customize an existing strategy. -The following code example defines a new :code:`SaveModelStrategy` which customized the existing built-in :code:`FedAvg` strategy. -In particular, it customizes :code:`aggregate_fit` by calling :code:`aggregate_fit` in the base class (:code:`FedAvg`). -It then continues to save returned (aggregated) weights before it returns those aggregated weights to the caller (i.e., the server): +Model updates can be persisted on the server-side by customizing ``Strategy`` methods. +Implementing custom strategies is always an option, but for many cases it may be more +convenient to simply customize an existing strategy. The following code example defines +a new ``SaveModelStrategy`` which customized the existing built-in ``FedAvg`` strategy. +In particular, it customizes ``aggregate_fit`` by calling ``aggregate_fit`` in the base +class (``FedAvg``). It then continues to save returned (aggregated) weights before it +returns those aggregated weights to the caller (i.e., the server): .. code-block:: python @@ -24,11 +26,15 @@ It then continues to save returned (aggregated) weights before it returns those ) -> Tuple[Optional[Parameters], Dict[str, Scalar]]: # Call aggregate_fit from base class (FedAvg) to aggregate parameters and metrics - aggregated_parameters, aggregated_metrics = super().aggregate_fit(server_round, results, failures) - + aggregated_parameters, aggregated_metrics = super().aggregate_fit( + server_round, results, failures + ) + if aggregated_parameters is not None: # Convert `Parameters` to `List[np.ndarray]` - aggregated_ndarrays: List[np.ndarray] = fl.common.parameters_to_ndarrays(aggregated_parameters) + aggregated_ndarrays: List[np.ndarray] = fl.common.parameters_to_ndarrays( + aggregated_parameters + ) # Save aggregated_ndarrays print(f"Saving round {server_round} aggregated_ndarrays...") @@ -36,24 +42,27 @@ It then continues to save returned (aggregated) weights before it returns those return aggregated_parameters, aggregated_metrics + # Create strategy and run server strategy = SaveModelStrategy( # (same arguments as FedAvg here) ) fl.server.start_server(strategy=strategy) - Save and load PyTorch checkpoints --------------------------------- -Similar to the previous example but with a few extra steps, we'll show how to -store a PyTorch checkpoint we'll use the ``torch.save`` function. -Firstly, ``aggregate_fit`` returns a ``Parameters`` object that has to be transformed into a list of NumPy ``ndarray``'s, -then those are transformed into the PyTorch ``state_dict`` following the ``OrderedDict`` class structure. +Similar to the previous example but with a few extra steps, we'll show how to store a +PyTorch checkpoint we'll use the ``torch.save`` function. Firstly, ``aggregate_fit`` +returns a ``Parameters`` object that has to be transformed into a list of NumPy +``ndarray``'s, then those are transformed into the PyTorch ``state_dict`` following the +``OrderedDict`` class structure. .. code-block:: python net = cifar.Net().to(DEVICE) + + class SaveModelStrategy(fl.server.strategy.FedAvg): def aggregate_fit( self, @@ -64,14 +73,18 @@ then those are transformed into the PyTorch ``state_dict`` following the ``Order """Aggregate model weights using weighted average and store checkpoint""" # Call aggregate_fit from base class (FedAvg) to aggregate parameters and metrics - aggregated_parameters, aggregated_metrics = super().aggregate_fit(server_round, results, failures) - + aggregated_parameters, aggregated_metrics = super().aggregate_fit( + server_round, results, failures + ) + if aggregated_parameters is not None: print(f"Saving round {server_round} aggregated_parameters...") # Convert `Parameters` to `List[np.ndarray]` - aggregated_ndarrays: List[np.ndarray] = fl.common.parameters_to_ndarrays(aggregated_parameters) - + aggregated_ndarrays: List[np.ndarray] = fl.common.parameters_to_ndarrays( + aggregated_parameters + ) + # Convert `List[np.ndarray]` to PyTorch`state_dict` params_dict = zip(net.state_dict().keys(), aggregated_ndarrays) state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict}) @@ -82,7 +95,8 @@ then those are transformed into the PyTorch ``state_dict`` following the ``Order return aggregated_parameters, aggregated_metrics -To load your progress, you simply append the following lines to your code. Note that this will iterate over all saved checkpoints and load the latest one: +To load your progress, you simply append the following lines to your code. Note that +this will iterate over all saved checkpoints and load the latest one: .. code-block:: python @@ -94,4 +108,5 @@ To load your progress, you simply append the following lines to your code. Note state_dict_ndarrays = [v.cpu().numpy() for v in net.state_dict().values()] parameters = fl.common.ndarrays_to_parameters(state_dict_ndarrays) -Return/use this object of type ``Parameters`` wherever necessary, such as in the ``initial_parameters`` when defining a ``Strategy``. \ No newline at end of file +Return/use this object of type ``Parameters`` wherever necessary, such as in the +``initial_parameters`` when defining a ``Strategy``. diff --git a/doc/source/how-to-upgrade-to-flower-1.0.rst b/doc/source/how-to-upgrade-to-flower-1.0.rst index c0721b0f3736..5f10f16a551f 100644 --- a/doc/source/how-to-upgrade-to-flower-1.0.rst +++ b/doc/source/how-to-upgrade-to-flower-1.0.rst @@ -1,8 +1,10 @@ Upgrade to Flower 1.0 ===================== -Flower 1.0 is here. Along with new features, Flower 1.0 provides a stable foundation for future growth. Compared to Flower 0.19 (and other 0.x series releases), there are a few breaking changes that make it necessary to change the code of existing 0.x-series projects. - +Flower 1.0 is here. Along with new features, Flower 1.0 provides a stable foundation for +future growth. Compared to Flower 0.19 (and other 0.x series releases), there are a few +breaking changes that make it necessary to change the code of existing 0.x-series +projects. Install update -------------- @@ -14,11 +16,13 @@ Here's how to update an existing installation to Flower 1.0 using either pip or - ``python -m pip install -U flwr`` (when using ``start_server`` and ``start_client``) - ``python -m pip install -U 'flwr[simulation]'`` (when using ``start_simulation``) -- Poetry: update the ``flwr`` dependency in ``pyproject.toml`` and then reinstall (don't forget to delete ``poetry.lock`` via ``rm poetry.lock`` before running ``poetry install``). +- Poetry: update the ``flwr`` dependency in ``pyproject.toml`` and then reinstall (don't + forget to delete ``poetry.lock`` via ``rm poetry.lock`` before running ``poetry + install``). - ``flwr = "^1.0.0"`` (when using ``start_server`` and ``start_client``) - - ``flwr = { version = "^1.0.0", extras = ["simulation"] }`` (when using ``start_simulation``) - + - ``flwr = { version = "^1.0.0", extras = ["simulation"] }`` (when using + ``start_simulation``) Required changes ---------------- @@ -28,64 +32,96 @@ The following breaking changes require manual updates. General ~~~~~~~ -Pass all arguments as keyword arguments (not as positional arguments). Here's an example: +Pass all arguments as keyword arguments (not as positional arguments). Here's an +example: - Flower 0.19 (positional arguments): ``start_client("127.0.0.1:8080", FlowerClient())`` -- Flower 1.0 (keyword arguments): ``start_client(server_address="127.0.0.1:8080", client=FlowerClient())`` +- Flower 1.0 (keyword arguments): ``start_client(server_address="127.0.0.1:8080", + client=FlowerClient())`` Client ~~~~~~ -- Subclasses of ``NumPyClient``: change ``def get_parameters(self):``` to ``def get_parameters(self, config):`` -- Subclasses of ``Client``: change ``def get_parameters(self):``` to ``def get_parameters(self, ins: GetParametersIns):`` +- Subclasses of ``NumPyClient``: change ``def get_parameters(self):``` to ``def + get_parameters(self, config):`` +- Subclasses of ``Client``: change ``def get_parameters(self):``` to ``def + get_parameters(self, ins: GetParametersIns):`` Strategies / ``start_server`` / ``start_simulation`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- Pass ``ServerConfig`` (instead of a dictionary) to ``start_server`` and ``start_simulation``. Here's an example: +- Pass ``ServerConfig`` (instead of a dictionary) to ``start_server`` and + ``start_simulation``. Here's an example: - - Flower 0.19: ``start_server(..., config={"num_rounds": 3, "round_timeout": 600.0}, ...)`` - - Flower 1.0: ``start_server(..., config=flwr.server.ServerConfig(num_rounds=3, round_timeout=600.0), ...)`` + - Flower 0.19: ``start_server(..., config={"num_rounds": 3, "round_timeout": 600.0}, + ...)`` + - Flower 1.0: ``start_server(..., config=flwr.server.ServerConfig(num_rounds=3, + round_timeout=600.0), ...)`` -- Replace ``num_rounds=1`` in ``start_simulation`` with the new ``config=ServerConfig(...)`` (see previous item) -- Remove ``force_final_distributed_eval`` parameter from calls to ``start_server``. Distributed evaluation on all clients can be enabled by configuring the strategy to sample all clients for evaluation after the last round of training. +- Replace ``num_rounds=1`` in ``start_simulation`` with the new + ``config=ServerConfig(...)`` (see previous item) +- Remove ``force_final_distributed_eval`` parameter from calls to ``start_server``. + Distributed evaluation on all clients can be enabled by configuring the strategy to + sample all clients for evaluation after the last round of training. - Rename parameter/ndarray conversion functions: - ``parameters_to_weights`` --> ``parameters_to_ndarrays`` - ``weights_to_parameters`` --> ``ndarrays_to_parameters`` -- Strategy initialization: if the strategy relies on the default values for ``fraction_fit`` and ``fraction_evaluate``, set ``fraction_fit`` and ``fraction_evaluate`` manually to ``0.1``. Projects that do not manually create a strategy (by calling ``start_server`` or ``start_simulation`` without passing a strategy instance) should now manually initialize FedAvg with ``fraction_fit`` and ``fraction_evaluate`` set to ``0.1``. +- Strategy initialization: if the strategy relies on the default values for + ``fraction_fit`` and ``fraction_evaluate``, set ``fraction_fit`` and + ``fraction_evaluate`` manually to ``0.1``. Projects that do not manually create a + strategy (by calling ``start_server`` or ``start_simulation`` without passing a + strategy instance) should now manually initialize FedAvg with ``fraction_fit`` and + ``fraction_evaluate`` set to ``0.1``. - Rename built-in strategy parameters (e.g., ``FedAvg``): - ``fraction_eval`` --> ``fraction_evaluate`` - ``min_eval_clients`` --> ``min_evaluate_clients`` - ``eval_fn`` --> ``evaluate_fn`` -- Rename ``rnd`` to ``server_round``. This impacts multiple methods and functions, for example, ``configure_fit``, ``aggregate_fit``, ``configure_evaluate``, ``aggregate_evaluate``, and ``evaluate_fn``. +- Rename ``rnd`` to ``server_round``. This impacts multiple methods and functions, for + example, ``configure_fit``, ``aggregate_fit``, ``configure_evaluate``, + ``aggregate_evaluate``, and ``evaluate_fn``. - Add ``server_round`` and ``config`` to ``evaluate_fn``: - - Flower 0.19: ``def evaluate(parameters: NDArrays) -> Optional[Tuple[float, Dict[str, Scalar]]]:`` - - Flower 1.0: ``def evaluate(server_round: int, parameters: NDArrays, config: Dict[str, Scalar]) -> Optional[Tuple[float, Dict[str, Scalar]]]:`` + - Flower 0.19: ``def evaluate(parameters: NDArrays) -> Optional[Tuple[float, Dict[str, + Scalar]]]:`` + - Flower 1.0: ``def evaluate(server_round: int, parameters: NDArrays, config: + Dict[str, Scalar]) -> Optional[Tuple[float, Dict[str, Scalar]]]:`` Custom strategies ~~~~~~~~~~~~~~~~~ -- The type of parameter ``failures`` has changed from ``List[BaseException]`` to ``List[Union[Tuple[ClientProxy, FitRes], BaseException]]`` (in ``aggregate_fit``) and ``List[Union[Tuple[ClientProxy, EvaluateRes], BaseException]]`` (in ``aggregate_evaluate``) -- The ``Strategy`` method ``evaluate`` now receives the current round of federated learning/evaluation as the first parameter: +- The type of parameter ``failures`` has changed from ``List[BaseException]`` to + ``List[Union[Tuple[ClientProxy, FitRes], BaseException]]`` (in ``aggregate_fit``) and + ``List[Union[Tuple[ClientProxy, EvaluateRes], BaseException]]`` (in + ``aggregate_evaluate``) +- The ``Strategy`` method ``evaluate`` now receives the current round of federated + learning/evaluation as the first parameter: - - Flower 0.19: ``def evaluate(self, parameters: Parameters) -> Optional[Tuple[float, Dict[str, Scalar]]]:`` - - Flower 1.0: ``def evaluate(self, server_round: int, parameters: Parameters) -> Optional[Tuple[float, Dict[str, Scalar]]]:`` + - Flower 0.19: ``def evaluate(self, parameters: Parameters) -> Optional[Tuple[float, + Dict[str, Scalar]]]:`` + - Flower 1.0: ``def evaluate(self, server_round: int, parameters: Parameters) -> + Optional[Tuple[float, Dict[str, Scalar]]]:`` Optional improvements --------------------- -Along with the necessary changes above, there are a number of potential improvements that just became possible: - -- Remove "placeholder" methods from subclasses of ``Client`` or ``NumPyClient``. If you, for example, use server-side evaluation, then empty placeholder implementations of ``evaluate`` are no longer necessary. -- Configure the round timeout via ``start_simulation``: ``start_simulation(..., config=flwr.server.ServerConfig(num_rounds=3, round_timeout=600.0), ...)`` +Along with the necessary changes above, there are a number of potential improvements +that just became possible: +- Remove "placeholder" methods from subclasses of ``Client`` or ``NumPyClient``. If you, + for example, use server-side evaluation, then empty placeholder implementations of + ``evaluate`` are no longer necessary. +- Configure the round timeout via ``start_simulation``: ``start_simulation(..., + config=flwr.server.ServerConfig(num_rounds=3, round_timeout=600.0), ...)`` Further help ------------ -Most official `Flower code examples `_ are already updated to Flower 1.0, they can serve as a reference for using the Flower 1.0 API. If there are further questions, `join the Flower Slack `_ and use the channel ``#questions``. +Most official `Flower code examples +`_ are already updated to Flower 1.0, +they can serve as a reference for using the Flower 1.0 API. If there are further +questions, `join the Flower Slack `_ and use the channel +``#questions``. diff --git a/doc/source/how-to-upgrade-to-flower-next.rst b/doc/source/how-to-upgrade-to-flower-next.rst index f378e92dbba4..e1fc350deb8b 100644 --- a/doc/source/how-to-upgrade-to-flower-next.rst +++ b/doc/source/how-to-upgrade-to-flower-next.rst @@ -1,11 +1,13 @@ Upgrade to Flower Next ====================== -Welcome to the migration guide for updating Flower to Flower Next! Whether you're a seasoned user -or just getting started, this guide will help you smoothly transition your existing setup to take -advantage of the latest features and improvements in Flower Next, starting from version 1.8. +Welcome to the migration guide for updating Flower to Flower Next! Whether you're a +seasoned user or just getting started, this guide will help you smoothly transition your +existing setup to take advantage of the latest features and improvements in Flower Next, +starting from version 1.8. .. note:: + This guide shows how to reuse pre-``1.8`` Flower code with minimum code changes by using the *compatibility layer* in Flower Next. In another guide, we will show how to run Flower Next end-to-end with pure Flower Next APIs. @@ -18,26 +20,44 @@ Let's dive in! - https://github.com/jgm/pandoc/issues/3973#issuecomment-337087394 .. |clientapp_link| replace:: ``ClientApp()`` + .. |serverapp_link| replace:: ``ServerApp()`` + .. |startclient_link| replace:: ``start_client()`` + .. |startserver_link| replace:: ``start_server()`` + .. |startsim_link| replace:: ``start_simulation()`` + .. |runsim_link| replace:: ``run_simulation()`` + .. |flowernext_superlink_link| replace:: ``flower-superlink`` + .. |flowernext_clientapp_link| replace:: ``flower-client-app`` + .. |flowernext_serverapp_link| replace:: ``flower-server-app`` + .. |flower_simulation_link| replace:: ``flower-simulation`` + .. _clientapp_link: ref-api/flwr.client.ClientApp.html + +.. _flower_simulation_link: ref-api-cli.html#flower-simulation + +.. _flowernext_clientapp_link: ref-api-cli.html#flower-client-app + +.. _flowernext_serverapp_link: ref-api-cli.html#flower-server-app + +.. _flowernext_superlink_link: ref-api-cli.html#flower-superlink + +.. _runsim_link: ref-api/flwr.simulation.run_simulation.html + .. _serverapp_link: ref-api/flwr.server.ServerApp.html + .. _startclient_link: ref-api/flwr.client.start_client.html + .. _startserver_link: ref-api/flwr.server.start_server.html -.. _startsim_link: ref-api/flwr.simulation.start_simulation.html -.. _runsim_link: ref-api/flwr.simulation.run_simulation.html -.. _flowernext_superlink_link: ref-api-cli.html#flower-superlink -.. _flowernext_clientapp_link: ref-api-cli.html#flower-client-app -.. _flowernext_serverapp_link: ref-api-cli.html#flower-server-app -.. _flower_simulation_link: ref-api-cli.html#flower-simulation +.. _startsim_link: ref-api/flwr.simulation.start_simulation.html Install update -------------- @@ -48,19 +68,18 @@ Using pip Here's how to update an existing installation of Flower to Flower Next with ``pip``: .. code-block:: bash - + $ python -m pip install -U flwr or if you need Flower Next with simulation: .. code-block:: bash - - $ python -m pip install -U "flwr[simulation]" + $ python -m pip install -U "flwr[simulation]" Ensure you set the following version constraint in your ``requirements.txt`` -.. code-block:: +.. code-block:: # Without simulation support flwr>=1.8,<2.0 @@ -81,7 +100,8 @@ or ``pyproject.toml``: Using Poetry ~~~~~~~~~~~~ -Update the ``flwr`` dependency in ``pyproject.toml`` and then reinstall (don't forget to delete ``poetry.lock`` via ``rm poetry.lock`` before running ``poetry install``). +Update the ``flwr`` dependency in ``pyproject.toml`` and then reinstall (don't forget to +delete ``poetry.lock`` via ``rm poetry.lock`` before running ``poetry install``). Ensure you set the following version constraint in your ``pyproject.toml``: @@ -100,13 +120,16 @@ Required changes ---------------- In Flower Next, the *infrastructure* and *application layers* have been decoupled. -Instead of starting a client in code via ``start_client()``, you create a |clientapp_link|_ and start it via the command line. -Instead of starting a server in code via ``start_server()``, you create a |serverapp_link|_ and start it via the command line. -The long-running components of server and client are called SuperLink and SuperNode. -The following non-breaking changes that require manual updates and allow you to run your project both in the traditional way and in the Flower Next way: +Instead of starting a client in code via ``start_client()``, you create a +|clientapp_link|_ and start it via the command line. Instead of starting a server in +code via ``start_server()``, you create a |serverapp_link|_ and start it via the command +line. The long-running components of server and client are called SuperLink and +SuperNode. The following non-breaking changes that require manual updates and allow you +to run your project both in the traditional way and in the Flower Next way: |clientapp_link|_ ~~~~~~~~~~~~~~~~~ + - Wrap your existing client with |clientapp_link|_ instead of launching it via |startclient_link|_. Here's an example: @@ -115,23 +138,25 @@ The following non-breaking changes that require manual updates and allow you to # Flower 1.8 def client_fn(cid: str): - return flwr.client.FlowerClient().to_client() - + return flwr.client.FlowerClient().to_client() + + app = flwr.client.ClientApp( - client_fn=client_fn, + client_fn=client_fn, ) # Flower 1.7 if __name__ == "__main__": flwr.client.start_client( - server_address="127.0.0.1:8080", - client=flwr.client.FlowerClient().to_client(), + server_address="127.0.0.1:8080", + client=flwr.client.FlowerClient().to_client(), ) |serverapp_link|_ ~~~~~~~~~~~~~~~~~ -- Wrap your existing strategy with |serverapp_link|_ instead of starting the server - via |startserver_link|_. Here's an example: + +- Wrap your existing strategy with |serverapp_link|_ instead of starting the server via + |startserver_link|_. Here's an example: .. code-block:: python :emphasize-lines: 2,9 @@ -152,13 +177,14 @@ The following non-breaking changes that require manual updates and allow you to Deployment ~~~~~~~~~~ + - Run the ``SuperLink`` using |flowernext_superlink_link|_ before running, in sequence, - |flowernext_clientapp_link|_ (2x) and |flowernext_serverapp_link|_. There is no need to - execute `client.py` and `server.py` as Python scripts. + |flowernext_clientapp_link|_ (2x) and |flowernext_serverapp_link|_. There is no need + to execute `client.py` and `server.py` as Python scripts. - Here's an example to start the server without HTTPS (only for prototyping): .. code-block:: bash - + # Start a Superlink $ flower-superlink --insecure @@ -171,8 +197,9 @@ Deployment # In yet another terminal window, run the ServerApp (this starts the actual training run) $ flower-server-app server:app --insecure -- Here's another example to start with HTTPS. Use the ``--ssl-ca-certfile``, ``--ssl-certfile``, and ``--ssl-keyfile`` command line - options to pass paths to (CA certificate, server certificate, and server private key). +- Here's another example to start with HTTPS. Use the ``--ssl-ca-certfile``, + ``--ssl-certfile``, and ``--ssl-keyfile`` command line options to pass paths to (CA + certificate, server certificate, and server private key). .. code-block:: bash @@ -199,6 +226,7 @@ Deployment Simulation in CLI ~~~~~~~~~~~~~~~~~ + - Wrap your existing client and strategy with |clientapp_link|_ and |serverapp_link|_, respectively. There is no need to use |startsim_link|_ anymore. Here's an example: @@ -208,13 +236,16 @@ Simulation in CLI # Regular Flower client implementation class FlowerClient(NumPyClient): # ... + pass + # Flower 1.8 def client_fn(cid: str): - return FlowerClient().to_client() - + return FlowerClient().to_client() + + client_app = flwr.client.ClientApp( - client_fn=client_fn, + client_fn=client_fn, ) server_app = flwr.server.ServerApp( @@ -226,12 +257,12 @@ Simulation in CLI if __name__ == "__main__": hist = flwr.simulation.start_simulation( num_clients=100, - ... + # ... ) -- Run |flower_simulation_link|_ in CLI and point to the ``server_app`` / ``client_app`` object in the - code instead of executing the Python script. Here's an example (assuming the - ``server_app`` and ``client_app`` objects are in a ``sim.py`` module): +- Run |flower_simulation_link|_ in CLI and point to the ``server_app`` / ``client_app`` + object in the code instead of executing the Python script. Here's an example (assuming + the ``server_app`` and ``client_app`` objects are in a ``sim.py`` module): .. code-block:: bash @@ -246,8 +277,8 @@ Simulation in CLI # Flower 1.7 $ python sim.py -- Set default resources for each |clientapp_link|_ using the ``--backend-config`` command - line argument instead of setting the ``client_resources`` argument in +- Set default resources for each |clientapp_link|_ using the ``--backend-config`` + command line argument instead of setting the ``client_resources`` argument in |startsim_link|_. Here's an example: .. code-block:: bash @@ -266,26 +297,27 @@ Simulation in CLI # Flower 1.7 (in `sim.py`) if __name__ == "__main__": hist = flwr.simulation.start_simulation( - num_clients=100, - client_resources = {'num_cpus': 2, "num_gpus": 0.25}, - ... + num_clients=100, client_resources={"num_cpus": 2, "num_gpus": 0.25}, ... ) Simulation in a Notebook ~~~~~~~~~~~~~~~~~~~~~~~~ + - Run |runsim_link|_ in your notebook instead of |startsim_link|_. Here's an example: .. code-block:: python :emphasize-lines: 19,27 - NUM_CLIENTS = + NUM_CLIENTS = 10 # Replace by any integer greater than zero + def client_fn(cid: str): # ... - return FlowerClient().to_client() - + return FlowerClient().to_client() + + client_app = flwr.client.ClientApp( - client_fn=client_fn, + client_fn=client_fn, ) server_app = flwr.server.ServerApp( @@ -297,7 +329,7 @@ Simulation in a Notebook # Flower 1.8 flwr.simulation.run_simulation( - server_app=server_app, + server_app=server_app, client_app=client_app, num_supernodes=NUM_CLIENTS, backend_config=backend_config, @@ -312,18 +344,17 @@ Simulation in a Notebook client_resources=backend_config["client_resources"], ) - Further help ------------ Some official `Flower code examples `_ are already -updated to Flower Next so they can serve as a reference for using the Flower Next API. If there are -further questions, `join the Flower Slack `_ and use the channel ``#questions``. -You can also `participate in Flower Discuss `_ where you can find us -answering questions, or share and learn from others about migrating to Flower Next. +updated to Flower Next so they can serve as a reference for using the Flower Next API. +If there are further questions, `join the Flower Slack `_ +and use the channel ``#questions``. You can also `participate in Flower Discuss +`_ where you can find us answering questions, or share and +learn from others about migrating to Flower Next. .. admonition:: Important - :class: important As we continuously enhance Flower Next at a rapid pace, we'll be periodically updating this guide. Please feel free to share any feedback with us! diff --git a/doc/source/how-to-use-built-in-mods.rst b/doc/source/how-to-use-built-in-mods.rst index 341139175074..970b2055ec23 100644 --- a/doc/source/how-to-use-built-in-mods.rst +++ b/doc/source/how-to-use-built-in-mods.rst @@ -1,14 +1,19 @@ Use Built-in Mods ================= -**Note: This tutorial covers experimental features. The functionality and interfaces may change in future versions.** +**Note: This tutorial covers experimental features. The functionality and interfaces may +change in future versions.** -In this tutorial, we will learn how to utilize built-in mods to augment the behavior of a ``ClientApp``. Mods (sometimes also called Modifiers) allow us to perform operations before and after a task is processed in the ``ClientApp``. +In this tutorial, we will learn how to utilize built-in mods to augment the behavior of +a ``ClientApp``. Mods (sometimes also called Modifiers) allow us to perform operations +before and after a task is processed in the ``ClientApp``. What are Mods? -------------- -A Mod is a callable that wraps around a ``ClientApp``. It can manipulate or inspect the incoming ``Message`` and the resulting outgoing ``Message``. The signature for a ``Mod`` is as follows: +A Mod is a callable that wraps around a ``ClientApp``. It can manipulate or inspect the +incoming ``Message`` and the resulting outgoing ``Message``. The signature for a ``Mod`` +is as follows: .. code-block:: python @@ -51,12 +56,13 @@ Define your client function (``client_fn``) that will be wrapped by the mod(s): def client_fn(cid): # Your client code goes here. - return # your client + return # your client 3. Create the ``ClientApp`` with mods -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Create your ``ClientApp`` and pass the mods as a list to the ``mods`` argument. The order in which you provide the mods matters: +Create your ``ClientApp`` and pass the mods as a list to the ``mods`` argument. The +order in which you provide the mods matters: .. code-block:: python @@ -65,25 +71,31 @@ Create your ``ClientApp`` and pass the mods as a list to the ``mods`` argument. mods=[ example_mod_1, # Mod 1 example_mod_2, # Mod 2 - ] + ], ) Order of execution ------------------ -When the ``ClientApp`` runs, the mods are executed in the order they are provided in the list: +When the ``ClientApp`` runs, the mods are executed in the order they are provided in the +list: 1. ``example_mod_1`` (outermost mod) 2. ``example_mod_2`` (next mod) -3. Message handler (core function that handles the incoming ``Message`` and returns the outgoing ``Message``) +3. Message handler (core function that handles the incoming ``Message`` and returns the + outgoing ``Message``) 4. ``example_mod_2`` (on the way back) 5. ``example_mod_1`` (outermost mod on the way back) -Each mod has a chance to inspect and modify the incoming ``Message`` before passing it to the next mod, and likewise with the outgoing ``Message`` before returning it up the stack. +Each mod has a chance to inspect and modify the incoming ``Message`` before passing it +to the next mod, and likewise with the outgoing ``Message`` before returning it up the +stack. Conclusion ---------- -By following this guide, you have learned how to effectively use mods to enhance your ``ClientApp``'s functionality. Remember that the order of mods is crucial and affects how the input and output are processed. +By following this guide, you have learned how to effectively use mods to enhance your +``ClientApp``'s functionality. Remember that the order of mods is crucial and affects +how the input and output are processed. Enjoy building a more robust and flexible ``ClientApp`` with mods! diff --git a/doc/source/how-to-use-differential-privacy.rst b/doc/source/how-to-use-differential-privacy.rst index 5d4fa3dca1a4..67e54271bb2e 100644 --- a/doc/source/how-to-use-differential-privacy.rst +++ b/doc/source/how-to-use-differential-privacy.rst @@ -1,126 +1,151 @@ Use Differential Privacy ------------------------- -This guide explains how you can utilize differential privacy in the Flower framework. If you are not yet familiar with differential privacy, you can refer to :doc:`explanation-differential-privacy`. +======================== -.. warning:: +This guide explains how you can utilize differential privacy in the Flower framework. If +you are not yet familiar with differential privacy, you can refer to +:doc:`explanation-differential-privacy`. - Differential Privacy in Flower is in a preview phase. If you plan to use these features in a production environment with sensitive data, feel free contact us to discuss your requirements and to receive guidance on how to best use these features. +.. warning:: + Differential Privacy in Flower is in a preview phase. If you plan to use these + features in a production environment with sensitive data, feel free contact us to + discuss your requirements and to receive guidance on how to best use these features. Central Differential Privacy -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This approach consists of two separate phases: clipping of the updates and adding noise to the aggregated model. -For the clipping phase, Flower framework has made it possible to decide whether to perform clipping on the server side or the client side. - -- **Server-side Clipping**: This approach has the advantage of the server enforcing uniform clipping across all clients' updates and reducing the communication overhead for clipping values. However, it also has the disadvantage of increasing the computational load on the server due to the need to perform the clipping operation for all clients. -- **Client-side Clipping**: This approach has the advantage of reducing the computational overhead on the server. However, it also has the disadvantage of lacking centralized control, as the server has less control over the clipping process. +---------------------------- +This approach consists of two separate phases: clipping of the updates and adding noise +to the aggregated model. For the clipping phase, Flower framework has made it possible +to decide whether to perform clipping on the server side or the client side. +- **Server-side Clipping**: This approach has the advantage of the server enforcing + uniform clipping across all clients' updates and reducing the communication overhead + for clipping values. However, it also has the disadvantage of increasing the + computational load on the server due to the need to perform the clipping operation for + all clients. +- **Client-side Clipping**: This approach has the advantage of reducing the + computational overhead on the server. However, it also has the disadvantage of lacking + centralized control, as the server has less control over the clipping process. Server-side Clipping -^^^^^^^^^^^^^^^^^^^^ -For central DP with server-side clipping, there are two :code:`Strategy` classes that act as wrappers around the actual :code:`Strategy` instance (for example, :code:`FedAvg`). -The two wrapper classes are :code:`DifferentialPrivacyServerSideFixedClipping` and :code:`DifferentialPrivacyServerSideAdaptiveClipping` for fixed and adaptive clipping. +~~~~~~~~~~~~~~~~~~~~ -.. image:: ./_static/DP/serversideCDP.png - :align: center - :width: 700 - :alt: server side clipping +For central DP with server-side clipping, there are two ``Strategy`` classes that act as +wrappers around the actual ``Strategy`` instance (for example, ``FedAvg``). The two +wrapper classes are ``DifferentialPrivacyServerSideFixedClipping`` and +``DifferentialPrivacyServerSideAdaptiveClipping`` for fixed and adaptive clipping. +.. image:: ./_static/DP/serversideCDP.png + :align: center + :width: 700 + :alt: server side clipping -The code sample below enables the :code:`FedAvg` strategy to use server-side fixed clipping using the :code:`DifferentialPrivacyServerSideFixedClipping` wrapper class. -The same approach can be used with :code:`DifferentialPrivacyServerSideAdaptiveClipping` by adjusting the corresponding input parameters. +The code sample below enables the ``FedAvg`` strategy to use server-side fixed clipping +using the ``DifferentialPrivacyServerSideFixedClipping`` wrapper class. The same +approach can be used with ``DifferentialPrivacyServerSideAdaptiveClipping`` by adjusting +the corresponding input parameters. .. code-block:: python - from flwr.server.strategy import DifferentialPrivacyClientSideFixedClipping - - # Create the strategy - strategy = fl.server.strategy.FedAvg(...) - - # Wrap the strategy with the DifferentialPrivacyServerSideFixedClipping wrapper - dp_strategy = DifferentialPrivacyServerSideFixedClipping( - strategy, - cfg.noise_multiplier, - cfg.clipping_norm, - cfg.num_sampled_clients, - ) + from flwr.server.strategy import DifferentialPrivacyClientSideFixedClipping + # Create the strategy + strategy = fl.server.strategy.FedAvg(...) + # Wrap the strategy with the DifferentialPrivacyServerSideFixedClipping wrapper + dp_strategy = DifferentialPrivacyServerSideFixedClipping( + strategy, + cfg.noise_multiplier, + cfg.clipping_norm, + cfg.num_sampled_clients, + ) Client-side Clipping -^^^^^^^^^^^^^^^^^^^^ -For central DP with client-side clipping, the server sends the clipping value to selected clients on each round. -Clients can use existing Flower :code:`Mods` to perform the clipping. -Two mods are available for fixed and adaptive client-side clipping: :code:`fixedclipping_mod` and :code:`adaptiveclipping_mod` with corresponding server-side wrappers :code:`DifferentialPrivacyClientSideFixedClipping` and :code:`DifferentialPrivacyClientSideAdaptiveClipping`. +~~~~~~~~~~~~~~~~~~~~ -.. image:: ./_static/DP/clientsideCDP.png - :align: center - :width: 800 - :alt: client side clipping +For central DP with client-side clipping, the server sends the clipping value to +selected clients on each round. Clients can use existing Flower ``Mods`` to perform the +clipping. Two mods are available for fixed and adaptive client-side clipping: +``fixedclipping_mod`` and ``adaptiveclipping_mod`` with corresponding server-side +wrappers ``DifferentialPrivacyClientSideFixedClipping`` and +``DifferentialPrivacyClientSideAdaptiveClipping``. +.. image:: ./_static/DP/clientsideCDP.png + :align: center + :width: 800 + :alt: client side clipping -The code sample below enables the :code:`FedAvg` strategy to use differential privacy with client-side fixed clipping using both the :code:`DifferentialPrivacyClientSideFixedClipping` wrapper class and, on the client, :code:`fixedclipping_mod`: +The code sample below enables the ``FedAvg`` strategy to use differential privacy with +client-side fixed clipping using both the ``DifferentialPrivacyClientSideFixedClipping`` +wrapper class and, on the client, ``fixedclipping_mod``: .. code-block:: python - from flwr.server.strategy import DifferentialPrivacyClientSideFixedClipping + from flwr.server.strategy import DifferentialPrivacyClientSideFixedClipping - # Create the strategy - strategy = fl.server.strategy.FedAvg(...) + # Create the strategy + strategy = fl.server.strategy.FedAvg(...) - # Wrap the strategy with the DifferentialPrivacyClientSideFixedClipping wrapper - dp_strategy = DifferentialPrivacyClientSideFixedClipping( - strategy, - cfg.noise_multiplier, - cfg.clipping_norm, - cfg.num_sampled_clients, - ) + # Wrap the strategy with the DifferentialPrivacyClientSideFixedClipping wrapper + dp_strategy = DifferentialPrivacyClientSideFixedClipping( + strategy, + cfg.noise_multiplier, + cfg.clipping_norm, + cfg.num_sampled_clients, + ) -In addition to the server-side strategy wrapper, the :code:`ClientApp` needs to configure the matching :code:`fixedclipping_mod` to perform the client-side clipping: +In addition to the server-side strategy wrapper, the ``ClientApp`` needs to configure +the matching ``fixedclipping_mod`` to perform the client-side clipping: .. code-block:: python - from flwr.client.mod import fixedclipping_mod - - # Add fixedclipping_mod to the client-side mods - app = fl.client.ClientApp( - client_fn=client_fn, - mods=[ - fixedclipping_mod, - ] - ) + from flwr.client.mod import fixedclipping_mod + # Add fixedclipping_mod to the client-side mods + app = fl.client.ClientApp( + client_fn=client_fn, + mods=[ + fixedclipping_mod, + ], + ) Local Differential Privacy -~~~~~~~~~~~~~~~~~~~~~~~~~~ -To utilize local differential privacy (DP) and add noise to the client model parameters before transmitting them to the server in Flower, you can use the `LocalDpMod`. The following hyperparameters need to be set: clipping norm value, sensitivity, epsilon, and delta. +-------------------------- + +To utilize local differential privacy (DP) and add noise to the client model parameters +before transmitting them to the server in Flower, you can use the `LocalDpMod`. The +following hyperparameters need to be set: clipping norm value, sensitivity, epsilon, and +delta. .. image:: ./_static/DP/localdp.png - :align: center - :width: 700 - :alt: local DP mod + :align: center + :width: 700 + :alt: local DP mod -Below is a code example that shows how to use :code:`LocalDpMod`: +Below is a code example that shows how to use ``LocalDpMod``: .. code-block:: python - from flwr.client.mod.localdp_mod import LocalDpMod - - # Create an instance of the mod with the required params - local_dp_obj = LocalDpMod( - cfg.clipping_norm, cfg.sensitivity, cfg.epsilon, cfg.delta - ) - # Add local_dp_obj to the client-side mods + from flwr.client.mod.localdp_mod import LocalDpMod - app = fl.client.ClientApp( - client_fn=client_fn, - mods=[local_dp_obj], - ) + # Create an instance of the mod with the required params + local_dp_obj = LocalDpMod(cfg.clipping_norm, cfg.sensitivity, cfg.epsilon, cfg.delta) + # Add local_dp_obj to the client-side mods + app = fl.client.ClientApp( + client_fn=client_fn, + mods=[local_dp_obj], + ) -Please note that the order of mods, especially those that modify parameters, is important when using multiple modifiers. Typically, differential privacy (DP) modifiers should be the last to operate on parameters. +Please note that the order of mods, especially those that modify parameters, is +important when using multiple modifiers. Typically, differential privacy (DP) modifiers +should be the last to operate on parameters. Local Training using Privacy Engines -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -For ensuring data instance-level privacy during local model training on the client side, consider leveraging privacy engines such as Opacus and TensorFlow Privacy. For examples of using Flower with these engines, please refer to the Flower examples directory (`Opacus `_, `Tensorflow Privacy `_). \ No newline at end of file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For ensuring data instance-level privacy during local model training on the client side, +consider leveraging privacy engines such as Opacus and TensorFlow Privacy. For examples +of using Flower with these engines, please refer to the Flower examples directory +(`Opacus `_, `Tensorflow +Privacy `_). diff --git a/doc/source/how-to-use-strategies.rst b/doc/source/how-to-use-strategies.rst index 8ac120124951..b4803c6059b3 100644 --- a/doc/source/how-to-use-strategies.rst +++ b/doc/source/how-to-use-strategies.rst @@ -1,19 +1,21 @@ Use strategies ============== -Flower allows full customization of the learning process through the :code:`Strategy` abstraction. A number of built-in strategies are provided in the core framework. +Flower allows full customization of the learning process through the ``Strategy`` +abstraction. A number of built-in strategies are provided in the core framework. -There are three ways to customize the way Flower orchestrates the learning process on the server side: - -* Use an existing strategy, for example, :code:`FedAvg` -* Customize an existing strategy with callback functions -* Implement a novel strategy +There are three ways to customize the way Flower orchestrates the learning process on +the server side: +- Use an existing strategy, for example, ``FedAvg`` +- Customize an existing strategy with callback functions +- Implement a novel strategy Use an existing strategy ------------------------ -Flower comes with a number of popular federated learning strategies built-in. A built-in strategy can be instantiated as follows: +Flower comes with a number of popular federated learning strategies built-in. A built-in +strategy can be instantiated as follows: .. code-block:: python @@ -22,7 +24,9 @@ Flower comes with a number of popular federated learning strategies built-in. A strategy = fl.server.strategy.FedAvg() fl.server.start_server(config=fl.server.ServerConfig(num_rounds=3), strategy=strategy) -This creates a strategy with all parameters left at their default values and passes it to the :code:`start_server` function. It is usually recommended to adjust a few parameters during instantiation: +This creates a strategy with all parameters left at their default values and passes it +to the ``start_server`` function. It is usually recommended to adjust a few parameters +during instantiation: .. code-block:: python @@ -35,22 +39,26 @@ This creates a strategy with all parameters left at their default values and pas ) fl.server.start_server(config=fl.server.ServerConfig(num_rounds=3), strategy=strategy) - Customize an existing strategy with callback functions ------------------------------------------------------ -Existing strategies provide several ways to customize their behaviour. Callback functions allow strategies to call user-provided code during execution. +Existing strategies provide several ways to customize their behaviour. Callback +functions allow strategies to call user-provided code during execution. Configuring client fit and client evaluate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The server can pass new configuration values to the client each round by providing a function to :code:`on_fit_config_fn`. The provided function will be called by the strategy and must return a dictionary of configuration key values pairs that will be sent to the client. -It must return a dictionary of arbitrary configuration values :code:`client.fit` and :code:`client.evaluate` functions during each round of federated learning. +The server can pass new configuration values to the client each round by providing a +function to ``on_fit_config_fn``. The provided function will be called by the strategy +and must return a dictionary of configuration key values pairs that will be sent to the +client. It must return a dictionary of arbitrary configuration values ``client.fit`` and +``client.evaluate`` functions during each round of federated learning. .. code-block:: python import flwr as fl + def get_on_fit_config_fn() -> Callable[[int], Dict[str, str]]: """Return a function which returns training configurations.""" @@ -64,6 +72,7 @@ It must return a dictionary of arbitrary configuration values :code:`client.fit return fit_config + strategy = fl.server.strategy.FedAvg( fraction_fit=0.1, min_fit_clients=10, @@ -72,18 +81,23 @@ It must return a dictionary of arbitrary configuration values :code:`client.fit ) fl.server.start_server(config=fl.server.ServerConfig(num_rounds=3), strategy=strategy) -The :code:`on_fit_config_fn` can be used to pass arbitrary configuration values from server to client, and potentially change these values each round, for example, to adjust the learning rate. -The client will receive the dictionary returned by the :code:`on_fit_config_fn` in its own :code:`client.fit()` function. +The ``on_fit_config_fn`` can be used to pass arbitrary configuration values from server +to client, and potentially change these values each round, for example, to adjust the +learning rate. The client will receive the dictionary returned by the +``on_fit_config_fn`` in its own ``client.fit()`` function. -Similar to :code:`on_fit_config_fn`, there is also :code:`on_evaluate_config_fn` to customize the configuration sent to :code:`client.evaluate()` +Similar to ``on_fit_config_fn``, there is also ``on_evaluate_config_fn`` to customize +the configuration sent to ``client.evaluate()`` Configuring server-side evaluation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Server-side evaluation can be enabled by passing an evaluation function to :code:`evaluate_fn`. - +Server-side evaluation can be enabled by passing an evaluation function to +``evaluate_fn``. Implement a novel strategy -------------------------- -Writing a fully custom strategy is a bit more involved, but it provides the most flexibility. Read the `Implementing Strategies `_ guide to learn more. +Writing a fully custom strategy is a bit more involved, but it provides the most +flexibility. Read the `Implementing Strategies `_ +guide to learn more. diff --git a/doc/source/index.rst b/doc/source/index.rst index fe996db62ffb..197599d595a8 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -2,15 +2,16 @@ Flower Framework Documentation ============================== .. meta:: - :description: Check out the documentation of the main Flower Framework enabling easy Python development for Federated Learning. - -Welcome to Flower's documentation. `Flower `_ is a friendly federated learning framework. + :description: Check out the documentation of the main Flower Framework enabling easy Python development for Federated Learning. +Welcome to Flower's documentation. `Flower `_ is a friendly federated +learning framework. Join the Flower Community ------------------------- -The Flower Community is growing quickly - we're a friendly group of researchers, engineers, students, professionals, academics, and other enthusiasts. +The Flower Community is growing quickly - we're a friendly group of researchers, +engineers, students, professionals, academics, and other enthusiasts. .. button-link:: https://flower.ai/join-slack :color: primary @@ -18,13 +19,12 @@ The Flower Community is growing quickly - we're a friendly group of researchers, Join us on Slack - Flower Framework ---------------- -The user guide is targeted at researchers and developers who want to use Flower -to bring existing machine learning workloads into a federated setting. One of -Flower's design goals was to make this simple. Read on to learn more. +The user guide is targeted at researchers and developers who want to use Flower to bring +existing machine learning workloads into a federated setting. One of Flower's design +goals was to make this simple. Read on to learn more. Tutorials ~~~~~~~~~ @@ -32,44 +32,50 @@ Tutorials A learning-oriented series of federated learning tutorials, the best place to start. .. toctree:: - :maxdepth: 1 - :caption: Tutorial + :maxdepth: 1 + :caption: Tutorial - tutorial-series-what-is-federated-learning - tutorial-series-get-started-with-flower-pytorch - tutorial-series-use-a-federated-learning-strategy-pytorch - tutorial-series-build-a-strategy-from-scratch-pytorch - tutorial-series-customize-the-client-pytorch + tutorial-series-what-is-federated-learning + tutorial-series-get-started-with-flower-pytorch + tutorial-series-use-a-federated-learning-strategy-pytorch + tutorial-series-build-a-strategy-from-scratch-pytorch + tutorial-series-customize-the-client-pytorch .. toctree:: - :maxdepth: 1 - :caption: Quickstart tutorials - :hidden: - - tutorial-quickstart-pytorch - tutorial-quickstart-tensorflow - tutorial-quickstart-mlx - tutorial-quickstart-huggingface - tutorial-quickstart-jax - tutorial-quickstart-pandas - tutorial-quickstart-fastai - tutorial-quickstart-pytorch-lightning - tutorial-quickstart-scikitlearn - tutorial-quickstart-xgboost - tutorial-quickstart-android - tutorial-quickstart-ios - -QUICKSTART TUTORIALS: :doc:`PyTorch ` | :doc:`TensorFlow ` | :doc:`MLX ` | :doc:`🤗 Transformers ` | :doc:`JAX ` | :doc:`Pandas ` | :doc:`fastai ` | :doc:`PyTorch Lightning ` | :doc:`scikit-learn ` | :doc:`XGBoost ` | :doc:`Android ` | :doc:`iOS ` + :maxdepth: 1 + :caption: Quickstart tutorials + :hidden: + + tutorial-quickstart-pytorch + tutorial-quickstart-tensorflow + tutorial-quickstart-mlx + tutorial-quickstart-huggingface + tutorial-quickstart-jax + tutorial-quickstart-pandas + tutorial-quickstart-fastai + tutorial-quickstart-pytorch-lightning + tutorial-quickstart-scikitlearn + tutorial-quickstart-xgboost + tutorial-quickstart-android + tutorial-quickstart-ios + +QUICKSTART TUTORIALS: :doc:`PyTorch ` | :doc:`TensorFlow +` | :doc:`MLX ` | :doc:`🤗 +Transformers ` | :doc:`JAX ` | +:doc:`Pandas ` | :doc:`fastai ` +| :doc:`PyTorch Lightning ` | :doc:`scikit-learn +` | :doc:`XGBoost ` | +:doc:`Android ` | :doc:`iOS ` We also made video tutorials for PyTorch: -.. youtube:: jOmmuzMIQ4c - :width: 80% +.. youtube:: jOmmuzMIQ4c + :width: 80% And TensorFlow: -.. youtube:: FGTc2TQq7VM - :width: 80% +.. youtube:: FGTc2TQq7VM + :width: 80% How-to guides ~~~~~~~~~~~~~ @@ -77,45 +83,46 @@ How-to guides Problem-oriented how-to guides show step-by-step how to achieve a specific goal. .. toctree:: - :maxdepth: 1 - :caption: How-to guides - - how-to-install-flower - how-to-configure-clients - how-to-use-strategies - how-to-implement-strategies - how-to-aggregate-evaluation-results - how-to-save-and-load-model-checkpoints - how-to-run-simulations - how-to-monitor-simulation - how-to-configure-logging - how-to-enable-ssl-connections - how-to-use-built-in-mods - how-to-use-differential-privacy - how-to-authenticate-supernodes - docker/index - how-to-upgrade-to-flower-1.0 - how-to-upgrade-to-flower-next + :maxdepth: 1 + :caption: How-to guides + + how-to-install-flower + how-to-configure-clients + how-to-use-strategies + how-to-implement-strategies + how-to-aggregate-evaluation-results + how-to-save-and-load-model-checkpoints + how-to-run-simulations + how-to-monitor-simulation + how-to-configure-logging + how-to-enable-ssl-connections + how-to-use-built-in-mods + how-to-use-differential-privacy + how-to-authenticate-supernodes + docker/index + how-to-upgrade-to-flower-1.0 + how-to-upgrade-to-flower-next .. toctree:: - :maxdepth: 1 - :caption: Legacy example guides + :maxdepth: 1 + :caption: Legacy example guides - example-pytorch-from-centralized-to-federated - example-fedbn-pytorch-from-centralized-to-federated + example-pytorch-from-centralized-to-federated + example-fedbn-pytorch-from-centralized-to-federated Explanations ~~~~~~~~~~~~ -Understanding-oriented concept guides explain and discuss key topics and underlying ideas behind Flower and collaborative AI. +Understanding-oriented concept guides explain and discuss key topics and underlying +ideas behind Flower and collaborative AI. .. toctree:: - :maxdepth: 1 - :caption: Explanations + :maxdepth: 1 + :caption: Explanations - explanation-federated-evaluation - explanation-differential-privacy - explanation-flower-architecture + explanation-federated-evaluation + explanation-differential-privacy + explanation-flower-architecture References ~~~~~~~~~~ @@ -123,71 +130,77 @@ References Information-oriented API reference and other reference material. .. autosummary:: - :toctree: ref-api - :template: autosummary/module.rst - :caption: API reference - :recursive: + :toctree: ref-api + :template: autosummary/module.rst + :caption: API reference + :recursive: - flwr + flwr .. toctree:: - :maxdepth: 2 + :maxdepth: 2 - ref-api-cli + ref-api-cli .. toctree:: - :maxdepth: 1 - :caption: Reference docs - - ref-example-projects - ref-telemetry - ref-changelog - ref-faq + :maxdepth: 1 + :caption: Reference docs + ref-example-projects + ref-telemetry + ref-changelog + ref-faq Contributor docs ---------------- -The Flower community welcomes contributions. The following docs are intended to help along the way. - +The Flower community welcomes contributions. The following docs are intended to help +along the way. .. toctree:: - :maxdepth: 1 - :caption: Contributor tutorials + :maxdepth: 1 + :caption: Contributor tutorials - contributor-tutorial-contribute-on-github - contributor-tutorial-get-started-as-a-contributor + contributor-tutorial-contribute-on-github + contributor-tutorial-get-started-as-a-contributor .. toctree:: - :maxdepth: 1 - :caption: Contributor how-to guides + :maxdepth: 1 + :caption: Contributor how-to guides - contributor-how-to-install-development-versions - contributor-how-to-set-up-a-virtual-env - contributor-how-to-develop-in-vscode-dev-containers - contributor-how-to-write-documentation - contributor-how-to-release-flower - contributor-how-to-contribute-translations - contributor-how-to-build-docker-images + contributor-how-to-install-development-versions + contributor-how-to-set-up-a-virtual-env + contributor-how-to-develop-in-vscode-dev-containers + contributor-how-to-write-documentation + contributor-how-to-release-flower + contributor-how-to-contribute-translations + contributor-how-to-build-docker-images .. toctree:: - :maxdepth: 1 - :caption: Contributor explanations + :maxdepth: 1 + :caption: Contributor explanations - contributor-explanation-public-and-private-apis + contributor-explanation-public-and-private-apis .. toctree:: - :maxdepth: 1 - :caption: Contributor references + :maxdepth: 1 + :caption: Contributor references + + fed/index + contributor-ref-good-first-contributions + contributor-ref-secure-aggregation-protocols + +.. + Indices and tables - fed/index - contributor-ref-good-first-contributions - contributor-ref-secure-aggregation-protocols +.. + ------------------ +.. + * :ref:`genindex` -.. Indices and tables -.. ------------------ +.. + * :ref:`modindex` -.. * :ref:`genindex` -.. * :ref:`modindex` -.. * :ref:`search` +.. + * :ref:`search` diff --git a/doc/source/ref-api-cli.rst b/doc/source/ref-api-cli.rst index 95664b2f490a..e95132bbadba 100644 --- a/doc/source/ref-api-cli.rst +++ b/doc/source/ref-api-cli.rst @@ -4,64 +4,66 @@ Flower CLI reference .. _flwr-apiref: flwr CLI -~~~~~~~~ +-------- .. click:: flwr.cli.app:typer_click_object - :prog: flwr - :nested: full + :prog: flwr + :nested: full .. _flower-simulation-apiref: flower-simulation -~~~~~~~~~~~~~~~~~ +----------------- .. argparse:: - :module: flwr.simulation.run_simulation - :func: _parse_args_run_simulation - :prog: flower-simulation + :module: flwr.simulation.run_simulation + :func: _parse_args_run_simulation + :prog: flower-simulation .. _flower-superlink-apiref: flower-superlink -~~~~~~~~~~~~~~~~ +---------------- .. argparse:: - :module: flwr.server.app - :func: _parse_args_run_superlink - :prog: flower-superlink + :module: flwr.server.app + :func: _parse_args_run_superlink + :prog: flower-superlink .. _flower-supernode-apiref: flower-supernode -~~~~~~~~~~~~~~~~~ +---------------- .. argparse:: - :module: flwr.client.supernode.app - :func: _parse_args_run_supernode - :prog: flower-supernode + :module: flwr.client.supernode.app + :func: _parse_args_run_supernode + :prog: flower-supernode .. _flower-server-app-apiref: flower-server-app -~~~~~~~~~~~~~~~~~ +----------------- .. note:: - Note that since version :code:`1.11.0`, :code:`flower-server-app` no longer supports passing a reference to a `ServerApp` attribute. - Instead, you need to pass the path to Flower app via the argument :code:`--app`. - This is the path to a directory containing a `pyproject.toml`. - You can create a valid Flower app by executing :code:`flwr new` and following the prompt. + + Note that since version ``1.11.0``, ``flower-server-app`` no longer supports passing + a reference to a `ServerApp` attribute. Instead, you need to pass the path to Flower + app via the argument ``--app``. This is the path to a directory containing a + `pyproject.toml`. You can create a valid Flower app by executing ``flwr new`` and + following the prompt. .. argparse:: - :module: flwr.server.run_serverapp - :func: _parse_args_run_server_app - :prog: flower-server-app + :module: flwr.server.run_serverapp + :func: _parse_args_run_server_app + :prog: flower-server-app .. _flower-superexec-apiref: flower-superexec -~~~~~~~~~~~~~~~~~ +---------------- .. argparse:: - :module: flwr.superexec.app - :func: _parse_args_run_superexec - :prog: flower-superexec \ No newline at end of file + :module: flwr.superexec.app + :func: _parse_args_run_superexec + :prog: flower-superexec diff --git a/doc/source/ref-example-projects.rst b/doc/source/ref-example-projects.rst index 597e3a596c51..4f0a3014e1d4 100644 --- a/doc/source/ref-example-projects.rst +++ b/doc/source/ref-example-projects.rst @@ -1,48 +1,52 @@ Example projects ================ -Flower comes with a number of usage examples. The examples demonstrate how -Flower can be used to federate different kinds of existing machine learning -pipelines, usually leveraging popular machine learning frameworks such as -`PyTorch `_ or -`TensorFlow `_. +Flower comes with a number of usage examples. The examples demonstrate how Flower can be +used to federate different kinds of existing machine learning pipelines, usually +leveraging popular machine learning frameworks such as `PyTorch `_ +or `TensorFlow `_. The following examples are available as standalone projects. + Quickstart TensorFlow/Keras --------------------------- -The TensorFlow/Keras quickstart example shows CIFAR-10 image classification -with MobileNetV2: +The TensorFlow/Keras quickstart example shows CIFAR-10 image classification with +MobileNetV2: -- `Quickstart TensorFlow (Code) `_ +- `Quickstart TensorFlow (Code) + `_ - :doc:`Quickstart TensorFlow (Tutorial) ` -- `Quickstart TensorFlow (Blog Post) `_ - +- `Quickstart TensorFlow (Blog Post) + `_ Quickstart PyTorch ------------------ -The PyTorch quickstart example shows CIFAR-10 image classification -with a simple Convolutional Neural Network: +The PyTorch quickstart example shows CIFAR-10 image classification with a simple +Convolutional Neural Network: -- `Quickstart PyTorch (Code) `_ +- `Quickstart PyTorch (Code) + `_ - :doc:`Quickstart PyTorch (Tutorial) ` - PyTorch: From Centralized To Federated -------------------------------------- This example shows how a regular PyTorch project can be federated using Flower: -- `PyTorch: From Centralized To Federated (Code) `_ -- :doc:`PyTorch: From Centralized To Federated (Tutorial) ` - +- `PyTorch: From Centralized To Federated (Code) + `_ +- :doc:`PyTorch: From Centralized To Federated (Tutorial) + ` Federated Learning on Raspberry Pi and Nvidia Jetson ---------------------------------------------------- -This example shows how Flower can be used to build a federated learning system that run across Raspberry Pi and Nvidia Jetson: - -- `Federated Learning on Raspberry Pi and Nvidia Jetson (Code) `_ -- `Federated Learning on Raspberry Pi and Nvidia Jetson (Blog Post) `_ +This example shows how Flower can be used to build a federated learning system that run +across Raspberry Pi and Nvidia Jetson: +- `Federated Learning on Raspberry Pi and Nvidia Jetson (Code) + `_ +- `Federated Learning on Raspberry Pi and Nvidia Jetson (Blog Post) + `_ diff --git a/doc/source/ref-faq.rst b/doc/source/ref-faq.rst index e3bd754d481c..0bd004f81858 100644 --- a/doc/source/ref-faq.rst +++ b/doc/source/ref-faq.rst @@ -1,7 +1,8 @@ FAQ === -This page collects answers to commonly asked questions about Federated Learning with Flower. +This page collects answers to commonly asked questions about Federated Learning with +Flower. .. dropdown:: :fa:`eye,mr-1` Can Flower run on Jupyter Notebooks / Google Colab? diff --git a/doc/source/tutorial-quickstart-android.rst b/doc/source/tutorial-quickstart-android.rst index 9177236d5a7c..f2691203078c 100644 --- a/doc/source/tutorial-quickstart-android.rst +++ b/doc/source/tutorial-quickstart-android.rst @@ -1,12 +1,12 @@ .. _quickstart-android: - Quickstart Android ================== .. meta:: - :description: Read this Federated Learning quickstart tutorial for creating an Android app using Flower. + :description: Read this Federated Learning quickstart tutorial for creating an Android app using Flower. Let's build a federated learning system using TFLite and Flower on Android! -Please refer to the `full code example `_ to learn more. +Please refer to the `full code example +`_ to learn more. diff --git a/doc/source/tutorial-quickstart-fastai.rst b/doc/source/tutorial-quickstart-fastai.rst index e42328e6f712..d52c570b0195 100644 --- a/doc/source/tutorial-quickstart-fastai.rst +++ b/doc/source/tutorial-quickstart-fastai.rst @@ -1,113 +1,110 @@ .. _quickstart-fastai: -################### - Quickstart fastai -################### +Quickstart fastai +================= -In this federated learning tutorial we will learn how to train a -SqueezeNet model on MNIST using Flower and fastai. It is recommended to -create a virtual environment and run everything within a -:doc:`virtualenv `. +In this federated learning tutorial we will learn how to train a SqueezeNet model on +MNIST using Flower and fastai. It is recommended to create a virtual environment and run +everything within a :doc:`virtualenv `. Then, clone the code example directly from GitHub: -.. code:: shell +.. code-block:: shell - git clone --depth=1 https://github.com/adap/flower.git _tmp \ - && mv _tmp/examples/quickstart-fastai . \ - && rm -rf _tmp && cd quickstart-fastai + git clone --depth=1 https://github.com/adap/flower.git _tmp \ + && mv _tmp/examples/quickstart-fastai . \ + && rm -rf _tmp && cd quickstart-fastai -This will create a new directory called `quickstart-fastai` containing -the following files: +This will create a new directory called `quickstart-fastai` containing the following +files: -.. code:: shell +.. code-block:: shell - quickstart-fastai - ├── fastai_example - │ ├── client_app.py # Defines your ClientApp - │ ├── server_app.py # Defines your ServerApp - │ └── task.py # Defines your model, training and data loading - ├── pyproject.toml # Project metadata like dependencies and configs - └── README.md + quickstart-fastai + ├── fastai_example + │ ├── client_app.py # Defines your ClientApp + │ ├── server_app.py # Defines your ServerApp + │ └── task.py # Defines your model, training and data loading + ├── pyproject.toml # Project metadata like dependencies and configs + └── README.md Next, activate your environment, then run: -.. code:: shell +.. code-block:: shell - # Navigate to the example directory - $ cd path/to/quickstart-fastai + # Navigate to the example directory + $ cd path/to/quickstart-fastai - # Install project and dependencies - $ pip install -e . + # Install project and dependencies + $ pip install -e . -This example by default runs the Flower Simulation Engine, creating a -federation of 10 nodes using `FedAvg +This example by default runs the Flower Simulation Engine, creating a federation of 10 +nodes using `FedAvg `_ -as the aggregation strategy. The dataset will be partitioned using -Flower Dataset's `IidPartitioner +as the aggregation strategy. The dataset will be partitioned using Flower Dataset's +`IidPartitioner `_. Let's run the project: -.. code:: shell +.. code-block:: shell - # Run with default arguments - $ flwr run . + # Run with default arguments + $ flwr run . With default arguments you will see an output like this one: -.. code:: shell - - Loading project configuration... - Success - INFO : Starting Flower ServerApp, config: num_rounds=3, no round_timeout - INFO : - INFO : [INIT] - INFO : Using initial global parameters provided by strategy - INFO : Starting evaluation of initial global parameters - INFO : Evaluation returned no results (`None`) - INFO : - INFO : [ROUND 1] - INFO : configure_fit: strategy sampled 5 clients (out of 10) - INFO : aggregate_fit: received 5 results and 0 failures - WARNING : No fit_metrics_aggregation_fn provided - INFO : configure_evaluate: strategy sampled 5 clients (out of 10) - INFO : aggregate_evaluate: received 5 results and 0 failures - INFO : - INFO : [ROUND 2] - INFO : configure_fit: strategy sampled 5 clients (out of 10) - INFO : aggregate_fit: received 5 results and 0 failures - INFO : configure_evaluate: strategy sampled 5 clients (out of 10) - INFO : aggregate_evaluate: received 5 results and 0 failures - INFO : - INFO : [ROUND 3] - INFO : configure_fit: strategy sampled 5 clients (out of 10) - INFO : aggregate_fit: received 5 results and 0 failures - INFO : configure_evaluate: strategy sampled 5 clients (out of 10) - INFO : aggregate_evaluate: received 5 results and 0 failures - INFO : - INFO : [SUMMARY] - INFO : Run finished 3 round(s) in 143.02s - INFO : History (loss, distributed): - INFO : round 1: 2.699497365951538 - INFO : round 2: 0.9549586296081543 - INFO : round 3: 0.6627192616462707 - INFO : History (metrics, distributed, evaluate): - INFO : {'accuracy': [(1, 0.09766666889190674), - INFO : (2, 0.6948333323001862), - INFO : (3, 0.7721666693687439)]} - INFO : - -You can also override the parameters defined in the -``[tool.flwr.app.config]`` section in ``pyproject.toml`` like this: - -.. code:: shell - - # Override some arguments - $ flwr run . --run-config num-server-rounds=5 +.. code-block:: shell + + Loading project configuration... + Success + INFO : Starting Flower ServerApp, config: num_rounds=3, no round_timeout + INFO : + INFO : [INIT] + INFO : Using initial global parameters provided by strategy + INFO : Starting evaluation of initial global parameters + INFO : Evaluation returned no results (`None`) + INFO : + INFO : [ROUND 1] + INFO : configure_fit: strategy sampled 5 clients (out of 10) + INFO : aggregate_fit: received 5 results and 0 failures + WARNING : No fit_metrics_aggregation_fn provided + INFO : configure_evaluate: strategy sampled 5 clients (out of 10) + INFO : aggregate_evaluate: received 5 results and 0 failures + INFO : + INFO : [ROUND 2] + INFO : configure_fit: strategy sampled 5 clients (out of 10) + INFO : aggregate_fit: received 5 results and 0 failures + INFO : configure_evaluate: strategy sampled 5 clients (out of 10) + INFO : aggregate_evaluate: received 5 results and 0 failures + INFO : + INFO : [ROUND 3] + INFO : configure_fit: strategy sampled 5 clients (out of 10) + INFO : aggregate_fit: received 5 results and 0 failures + INFO : configure_evaluate: strategy sampled 5 clients (out of 10) + INFO : aggregate_evaluate: received 5 results and 0 failures + INFO : + INFO : [SUMMARY] + INFO : Run finished 3 round(s) in 143.02s + INFO : History (loss, distributed): + INFO : round 1: 2.699497365951538 + INFO : round 2: 0.9549586296081543 + INFO : round 3: 0.6627192616462707 + INFO : History (metrics, distributed, evaluate): + INFO : {'accuracy': [(1, 0.09766666889190674), + INFO : (2, 0.6948333323001862), + INFO : (3, 0.7721666693687439)]} + INFO : + +You can also override the parameters defined in the ``[tool.flwr.app.config]`` section +in ``pyproject.toml`` like this: + +.. code-block:: shell + + # Override some arguments + $ flwr run . --run-config num-server-rounds=5 .. note:: - Check the `source code - `_ - of this tutorial in ``examples/quickstart-fasai`` in the Flower - GitHub repository. + Check the `source code + `_ of this + tutorial in ``examples/quickstart-fasai`` in the Flower GitHub repository. diff --git a/doc/source/tutorial-quickstart-huggingface.rst b/doc/source/tutorial-quickstart-huggingface.rst index e5caa3b19dd6..3c9d3981e587 100644 --- a/doc/source/tutorial-quickstart-huggingface.rst +++ b/doc/source/tutorial-quickstart-huggingface.rst @@ -1,419 +1,385 @@ .. _quickstart-huggingface: -########################### - Quickstart 🤗 Transformers -########################### +Quickstart 🤗 Transformers +========================== -In this federated learning tutorial we will learn how to train a large -language model (LLM) on the `IMDB -`_ dataset using -Flower and the 🤗 Hugging Face Transformers library. It is recommended to -create a virtual environment and run everything within a -:doc:`virtualenv `. +In this federated learning tutorial we will learn how to train a large language model +(LLM) on the `IMDB `_ dataset using +Flower and the 🤗 Hugging Face Transformers library. It is recommended to create a +virtual environment and run everything within a :doc:`virtualenv +`. -Let's use ``flwr new`` to create a complete Flower+🤗 Hugging Face -project. It will generate all the files needed to run, by default with -the Flower Simulation Engine, a federation of 10 nodes using |fedavg|_ -The dataset will be partitioned using |flowerdatasets|_'s -|iidpartitioner|_. +Let's use ``flwr new`` to create a complete Flower+🤗 Hugging Face project. It will +generate all the files needed to run, by default with the Flower Simulation Engine, a +federation of 10 nodes using |fedavg|_ The dataset will be partitioned using +|flowerdatasets|_'s |iidpartitioner|_. -Now that we have a rough idea of what this example is about, let's get -started. First, install Flower in your new environment: +Now that we have a rough idea of what this example is about, let's get started. First, +install Flower in your new environment: -.. code:: shell +.. code-block:: shell - # In a new Python environment - $ pip install flwr + # In a new Python environment + $ pip install flwr -Then, run the command below. You will be prompted to select one of the -available templates (choose ``HuggingFace``), give a name to your -project, and type in your developer name: +Then, run the command below. You will be prompted to select one of the available +templates (choose ``HuggingFace``), give a name to your project, and type in your +developer name: -.. code:: shell +.. code-block:: shell - $ flwr new + $ flwr new -After running it you'll notice a new directory with your project name -has been created. It should have the following structure: +After running it you'll notice a new directory with your project name has been created. +It should have the following structure: -.. code:: shell +.. code-block:: shell - - ├── - │ ├── __init__.py - │ ├── client_app.py # Defines your ClientApp - │ ├── server_app.py # Defines your ServerApp - │ └── task.py # Defines your model, training and data loading - ├── pyproject.toml # Project metadata like dependencies and configs - └── README.md + + ├── + │ ├── __init__.py + │ ├── client_app.py # Defines your ClientApp + │ ├── server_app.py # Defines your ServerApp + │ └── task.py # Defines your model, training and data loading + ├── pyproject.toml # Project metadata like dependencies and configs + └── README.md -If you haven't yet installed the project and its dependencies, you can -do so by: +If you haven't yet installed the project and its dependencies, you can do so by: -.. code:: shell +.. code-block:: shell - # From the directory where your pyproject.toml is - $ pip install -e . + # From the directory where your pyproject.toml is + $ pip install -e . To run the project, do: -.. code:: shell +.. code-block:: shell - # Run with default arguments - $ flwr run . + # Run with default arguments + $ flwr run . With default arguments you will see an output like this one: -.. code:: shell - - Loading project configuration... - Success - INFO : Starting Flower ServerApp, config: num_rounds=3, no round_timeout - INFO : - INFO : [INIT] - INFO : Using initial global parameters provided by strategy - INFO : Starting evaluation of initial global parameters - INFO : Evaluation returned no results (`None`) - INFO : - INFO : [ROUND 1] - INFO : configure_fit: strategy sampled 2 clients (out of 10) - INFO : aggregate_fit: received 2 results and 0 failures - WARNING : No fit_metrics_aggregation_fn provided - INFO : configure_evaluate: strategy sampled 10 clients (out of 10) - INFO : aggregate_evaluate: received 10 results and 0 failures - WARNING : No evaluate_metrics_aggregation_fn provided - INFO : - INFO : [ROUND 2] - INFO : configure_fit: strategy sampled 5 clients (out of 10) - INFO : aggregate_fit: received 5 results and 0 failures - INFO : configure_evaluate: strategy sampled 10 clients (out of 10) - INFO : aggregate_evaluate: received 10 results and 0 failures - INFO : - INFO : [ROUND 3] - INFO : configure_fit: strategy sampled 5 clients (out of 10) - INFO : aggregate_fit: received 5 results and 0 failures - INFO : configure_evaluate: strategy sampled 10 clients (out of 10) - INFO : aggregate_evaluate: received 10 results and 0 failures - INFO : - INFO : [SUMMARY] - INFO : Run finished 3 round(s) in 249.11s - INFO : History (loss, distributed): - INFO : round 1: 0.02111011856794357 - INFO : round 2: 0.019722302150726317 - INFO : round 3: 0.018227258533239362 - INFO : +.. code-block:: shell + + Loading project configuration... + Success + INFO : Starting Flower ServerApp, config: num_rounds=3, no round_timeout + INFO : + INFO : [INIT] + INFO : Using initial global parameters provided by strategy + INFO : Starting evaluation of initial global parameters + INFO : Evaluation returned no results (`None`) + INFO : + INFO : [ROUND 1] + INFO : configure_fit: strategy sampled 2 clients (out of 10) + INFO : aggregate_fit: received 2 results and 0 failures + WARNING : No fit_metrics_aggregation_fn provided + INFO : configure_evaluate: strategy sampled 10 clients (out of 10) + INFO : aggregate_evaluate: received 10 results and 0 failures + WARNING : No evaluate_metrics_aggregation_fn provided + INFO : + INFO : [ROUND 2] + INFO : configure_fit: strategy sampled 5 clients (out of 10) + INFO : aggregate_fit: received 5 results and 0 failures + INFO : configure_evaluate: strategy sampled 10 clients (out of 10) + INFO : aggregate_evaluate: received 10 results and 0 failures + INFO : + INFO : [ROUND 3] + INFO : configure_fit: strategy sampled 5 clients (out of 10) + INFO : aggregate_fit: received 5 results and 0 failures + INFO : configure_evaluate: strategy sampled 10 clients (out of 10) + INFO : aggregate_evaluate: received 10 results and 0 failures + INFO : + INFO : [SUMMARY] + INFO : Run finished 3 round(s) in 249.11s + INFO : History (loss, distributed): + INFO : round 1: 0.02111011856794357 + INFO : round 2: 0.019722302150726317 + INFO : round 3: 0.018227258533239362 + INFO : You can also run the project with GPU as follows: -.. code:: shell - - # Run with default arguments - $ flwr run . localhost-gpu - -This will use the default arguments where each ``ClientApp`` will use 2 -CPUs and at most 4 ``ClientApp``\s will run in a given GPU. - -You can also override the parameters defined in the -``[tool.flwr.app.config]`` section in ``pyproject.toml`` like this: - -.. code:: shell - - # Override some arguments - $ flwr run . --run-config "num-server-rounds=5 fraction-fit=0.2" - -What follows is an explanation of each component in the project you just -created: dataset partition, the model, defining the ``ClientApp`` and -defining the ``ServerApp``. - -********** - The Data -********** - -This tutorial uses |flowerdatasets|_ to easily download and partition -the `IMDB `_ dataset. -In this example you'll make use of the |iidpartitioner|_ to generate -``num_partitions`` partitions. You can choose |otherpartitioners|_ -available in Flower Datasets. To tokenize the text, we will also load -the tokenizer from the pre-trained Transformer model that we'll use -during training - more on that in the next section. Each ``ClientApp`` -will call this function to create dataloaders with the data that -correspond to their data partition. - -.. code:: python - - partitioner = IidPartitioner(num_partitions=num_partitions) - fds = FederatedDataset( - dataset="stanfordnlp/imdb", - partitioners={"train": partitioner}, - ) - partition = fds.load_partition(partition_id) - # Divide data: 80% train, 20% test - partition_train_test = partition.train_test_split(test_size=0.2, seed=42) - - tokenizer = AutoTokenizer.from_pretrained(model_name) - - def tokenize_function(examples): - return tokenizer( - examples["text"], truncation=True, add_special_tokens=True, max_length=512 - ) - - partition_train_test = partition_train_test.map(tokenize_function, batched=True) - partition_train_test = partition_train_test.remove_columns("text") - partition_train_test = partition_train_test.rename_column("label", "labels") - - data_collator = DataCollatorWithPadding(tokenizer=tokenizer) - trainloader = DataLoader( - partition_train_test["train"], - shuffle=True, - batch_size=32, - collate_fn=data_collator, - ) - - testloader = DataLoader( - partition_train_test["test"], batch_size=32, collate_fn=data_collator - ) - -*********** - The Model -*********** - -We will leverage 🤗 Hugging Face to federate the training of language -models over multiple clients using Flower. More specifically, we will -fine-tune a pre-trained Transformer model (|berttiny|_) for sequence -classification over the dataset of IMDB ratings. The end goal is to -detect if a movie rating is positive or negative. If you have access to -larger GPUs, feel free to use larger models! - -.. code:: python - - net = AutoModelForSequenceClassification.from_pretrained( - model_name, num_labels=num_labels - ) - -Note that here, ``model_name`` is a string that will be loaded from the -``Context`` in the ClientApp and ServerApp. - -In addition to loading the pretrained model weights and architecture, we -also include two utility functions to perform both training (i.e. -``train()``) and evaluation (i.e. ``test()``) using the above model. -These functions should look fairly familiar if you have some prior -experience with PyTorch. Note these functions do not have anything -specific to Flower. That being said, the training function will normally -be called, as we'll see later, from a Flower client passing its own -data. In summary, your clients can use standard training/testing -functions to perform local training or evaluation: - -.. code:: python - - def train(net, trainloader, epochs, device): - optimizer = AdamW(net.parameters(), lr=5e-5) - net.train() - for _ in range(epochs): - for batch in trainloader: - batch = {k: v.to(device) for k, v in batch.items()} - outputs = net(**batch) - loss = outputs.loss - loss.backward() - optimizer.step() - optimizer.zero_grad() - - - def test(net, testloader, device): - metric = load_metric("accuracy") - loss = 0 - net.eval() - for batch in testloader: - batch = {k: v.to(device) for k, v in batch.items()} - with torch.no_grad(): - outputs = net(**batch) - logits = outputs.logits - loss += outputs.loss.item() - predictions = torch.argmax(logits, dim=-1) - metric.add_batch(predictions=predictions, references=batch["labels"]) - loss /= len(testloader.dataset) - accuracy = metric.compute()["accuracy"] - return loss, accuracy - -*************** - The ClientApp -*************** - -The main changes we have to make to use 🤗 Hugging Face with Flower will -be found in the ``get_weights()`` and ``set_weights()`` functions. Under -the hood, the ``transformers`` library uses PyTorch, which means we can -reuse the ``get_weights()`` and ``set_weights()`` code that we defined -in the :doc:`Quickstart PyTorch ` tutorial. -As a reminder, in ``get_weights()``, PyTorch model parameters are -extracted and represented as a list of NumPy arrays. The -``set_weights()`` function that's the opposite: given a list of NumPy -arrays it applies them to an existing PyTorch model. Doing this in -fairly easy in PyTorch. +.. code-block:: shell -.. note:: - - The specific implementation of ``get_weights()`` and - ``set_weights()`` depends on the type of models you use. The ones - shown below work for a wide range of PyTorch models but you might - need to adjust them if you have more exotic model architectures. - -.. code:: python - - def get_weights(net): - return [val.cpu().numpy() for _, val in net.state_dict().items()] - - def set_weights(net, parameters): - params_dict = zip(net.state_dict().keys(), parameters) - state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict}) - net.load_state_dict(state_dict, strict=True) - -The rest of the functionality is directly inspired by the centralized -case. The ``fit()`` method in the client trains the model using the -local dataset. Similarly, the ``evaluate()`` method is used to evaluate -the model received on a held-out validation set that the client might -have: - -.. code:: python - - class FlowerClient(NumPyClient): - def __init__(self, net, trainloader, testloader, local_epochs): - self.net = net - self.trainloader = trainloader - self.testloader = testloader - self.local_epochs = local_epochs - self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - self.net.to(self.device) - - def fit(self, parameters, config): - set_weights(self.net, parameters) - train(self.net, self.trainloader, epochs=self.local_epochs, device=self.device) - return get_weights(self.net), len(self.trainloader), {} - - def evaluate(self, parameters, config): - set_weights(self.net, parameters) - loss, accuracy = test(self.net, self.testloader, self.device) - return float(loss), len(self.testloader), {"accuracy": accuracy} - -Finally, we can construct a ``ClientApp`` using the ``FlowerClient`` -defined above by means of a ``client_fn()`` callback. Note that the -`context` enables you to get access to hyperparemeters defined in your -``pyproject.toml`` to configure the run. In this tutorial we access the -``local-epochs`` setting to control the number of epochs a ``ClientApp`` -will perform when running the ``fit()`` method. You could define -additional hyperparameters in ``pyproject.toml`` and access them here. + # Run with default arguments + $ flwr run . localhost-gpu -.. code:: python +This will use the default arguments where each ``ClientApp`` will use 2 CPUs and at most +4 ``ClientApp``\s will run in a given GPU. - def client_fn(context: Context): +You can also override the parameters defined in the ``[tool.flwr.app.config]`` section +in ``pyproject.toml`` like this: - # Get this client's dataset partition - partition_id = context.node_config["partition-id"] - num_partitions = context.node_config["num-partitions"] - model_name = context.run_config["model-name"] - trainloader, valloader = load_data(partition_id, num_partitions, model_name) +.. code-block:: shell - # Load model - num_labels = context.run_config["num-labels"] - net = AutoModelForSequenceClassification.from_pretrained( - model_name, num_labels=num_labels - ) + # Override some arguments + $ flwr run . --run-config "num-server-rounds=5 fraction-fit=0.2" + +What follows is an explanation of each component in the project you just created: +dataset partition, the model, defining the ``ClientApp`` and defining the ``ServerApp``. + +The Data +-------- + +This tutorial uses |flowerdatasets|_ to easily download and partition the `IMDB +`_ dataset. In this example you'll +make use of the |iidpartitioner|_ to generate ``num_partitions`` partitions. You can +choose |otherpartitioners|_ available in Flower Datasets. To tokenize the text, we will +also load the tokenizer from the pre-trained Transformer model that we'll use during +training - more on that in the next section. Each ``ClientApp`` will call this function +to create dataloaders with the data that correspond to their data partition. + +.. code-block:: python + + partitioner = IidPartitioner(num_partitions=num_partitions) + fds = FederatedDataset( + dataset="stanfordnlp/imdb", + partitioners={"train": partitioner}, + ) + partition = fds.load_partition(partition_id) + # Divide data: 80% train, 20% test + partition_train_test = partition.train_test_split(test_size=0.2, seed=42) + + tokenizer = AutoTokenizer.from_pretrained(model_name) + + + def tokenize_function(examples): + return tokenizer( + examples["text"], truncation=True, add_special_tokens=True, max_length=512 + ) + + + partition_train_test = partition_train_test.map(tokenize_function, batched=True) + partition_train_test = partition_train_test.remove_columns("text") + partition_train_test = partition_train_test.rename_column("label", "labels") + + data_collator = DataCollatorWithPadding(tokenizer=tokenizer) + trainloader = DataLoader( + partition_train_test["train"], + shuffle=True, + batch_size=32, + collate_fn=data_collator, + ) + + testloader = DataLoader( + partition_train_test["test"], batch_size=32, collate_fn=data_collator + ) + +The Model +--------- + +We will leverage 🤗 Hugging Face to federate the training of language models over +multiple clients using Flower. More specifically, we will fine-tune a pre-trained +Transformer model (|berttiny|_) for sequence classification over the dataset of IMDB +ratings. The end goal is to detect if a movie rating is positive or negative. If you +have access to larger GPUs, feel free to use larger models! + +.. code-block:: python + + net = AutoModelForSequenceClassification.from_pretrained( + model_name, num_labels=num_labels + ) + +Note that here, ``model_name`` is a string that will be loaded from the ``Context`` in +the ClientApp and ServerApp. + +In addition to loading the pretrained model weights and architecture, we also include +two utility functions to perform both training (i.e. ``train()``) and evaluation (i.e. +``test()``) using the above model. These functions should look fairly familiar if you +have some prior experience with PyTorch. Note these functions do not have anything +specific to Flower. That being said, the training function will normally be called, as +we'll see later, from a Flower client passing its own data. In summary, your clients can +use standard training/testing functions to perform local training or evaluation: + +.. code-block:: python + + def train(net, trainloader, epochs, device): + optimizer = AdamW(net.parameters(), lr=5e-5) + net.train() + for _ in range(epochs): + for batch in trainloader: + batch = {k: v.to(device) for k, v in batch.items()} + outputs = net(**batch) + loss = outputs.loss + loss.backward() + optimizer.step() + optimizer.zero_grad() + + + def test(net, testloader, device): + metric = load_metric("accuracy") + loss = 0 + net.eval() + for batch in testloader: + batch = {k: v.to(device) for k, v in batch.items()} + with torch.no_grad(): + outputs = net(**batch) + logits = outputs.logits + loss += outputs.loss.item() + predictions = torch.argmax(logits, dim=-1) + metric.add_batch(predictions=predictions, references=batch["labels"]) + loss /= len(testloader.dataset) + accuracy = metric.compute()["accuracy"] + return loss, accuracy + +The ClientApp +------------- + +The main changes we have to make to use 🤗 Hugging Face with Flower will be found in the +``get_weights()`` and ``set_weights()`` functions. Under the hood, the ``transformers`` +library uses PyTorch, which means we can reuse the ``get_weights()`` and +``set_weights()`` code that we defined in the :doc:`Quickstart PyTorch +` tutorial. As a reminder, in ``get_weights()``, PyTorch +model parameters are extracted and represented as a list of NumPy arrays. The +``set_weights()`` function that's the opposite: given a list of NumPy arrays it applies +them to an existing PyTorch model. Doing this in fairly easy in PyTorch. - local_epochs = context.run_config["local-epochs"] - - # Return Client instance - return FlowerClient(net, trainloader, valloader, local_epochs).to_client() - - # Flower ClientApp - app = ClientApp(client_fn) +.. note:: -*************** - The ServerApp -*************** + The specific implementation of ``get_weights()`` and ``set_weights()`` depends on + the type of models you use. The ones shown below work for a wide range of PyTorch + models but you might need to adjust them if you have more exotic model + architectures. + +.. code-block:: python + + def get_weights(net): + return [val.cpu().numpy() for _, val in net.state_dict().items()] + + + def set_weights(net, parameters): + params_dict = zip(net.state_dict().keys(), parameters) + state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict}) + net.load_state_dict(state_dict, strict=True) + +The rest of the functionality is directly inspired by the centralized case. The +``fit()`` method in the client trains the model using the local dataset. Similarly, the +``evaluate()`` method is used to evaluate the model received on a held-out validation +set that the client might have: + +.. code-block:: python + + class FlowerClient(NumPyClient): + def __init__(self, net, trainloader, testloader, local_epochs): + self.net = net + self.trainloader = trainloader + self.testloader = testloader + self.local_epochs = local_epochs + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self.net.to(self.device) + + def fit(self, parameters, config): + set_weights(self.net, parameters) + train(self.net, self.trainloader, epochs=self.local_epochs, device=self.device) + return get_weights(self.net), len(self.trainloader), {} + + def evaluate(self, parameters, config): + set_weights(self.net, parameters) + loss, accuracy = test(self.net, self.testloader, self.device) + return float(loss), len(self.testloader), {"accuracy": accuracy} + +Finally, we can construct a ``ClientApp`` using the ``FlowerClient`` defined above by +means of a ``client_fn()`` callback. Note that the `context` enables you to get access +to hyperparemeters defined in your ``pyproject.toml`` to configure the run. In this +tutorial we access the ``local-epochs`` setting to control the number of epochs a +``ClientApp`` will perform when running the ``fit()`` method. You could define +additional hyperparameters in ``pyproject.toml`` and access them here. -To construct a ``ServerApp`` we define a ``server_fn()`` callback with -an identical signature to that of ``client_fn()`` but the return type is -|serverappcomponents|_ as opposed to a |client|_ In this example we use -the `FedAvg` strategy. To it we pass a randomly initialized model that -will server as the global model to federated. Note that the value of -``fraction_fit`` is read from the run config. You can find the default -value defined in the ``pyproject.toml``. +.. code-block:: python -.. code:: python + def client_fn(context: Context): - def server_fn(context: Context): - # Read from config - num_rounds = context.run_config["num-server-rounds"] - fraction_fit = context.run_config["fraction-fit"] + # Get this client's dataset partition + partition_id = context.node_config["partition-id"] + num_partitions = context.node_config["num-partitions"] + model_name = context.run_config["model-name"] + trainloader, valloader = load_data(partition_id, num_partitions, model_name) - # Initialize global model - model_name = context.run_config["model-name"] - num_labels = context.run_config["num-labels"] - net = AutoModelForSequenceClassification.from_pretrained( - model_name, num_labels=num_labels - ) + # Load model + num_labels = context.run_config["num-labels"] + net = AutoModelForSequenceClassification.from_pretrained( + model_name, num_labels=num_labels + ) - weights = get_weights(net) - initial_parameters = ndarrays_to_parameters(weights) + local_epochs = context.run_config["local-epochs"] - # Define strategy - strategy = FedAvg( - fraction_fit=fraction_fit, - fraction_evaluate=1.0, - initial_parameters=initial_parameters, - ) - config = ServerConfig(num_rounds=num_rounds) + # Return Client instance + return FlowerClient(net, trainloader, valloader, local_epochs).to_client() - return ServerAppComponents(strategy=strategy, config=config) + # Flower ClientApp + app = ClientApp(client_fn) - # Create ServerApp - app = ServerApp(server_fn=server_fn) +The ServerApp +------------- -Congratulations! You've successfully built and run your first federated -learning system for an LLM. +To construct a ``ServerApp`` we define a ``server_fn()`` callback with an identical +signature to that of ``client_fn()`` but the return type is |serverappcomponents|_ as +opposed to a |client|_ In this example we use the `FedAvg` strategy. To it we pass a +randomly initialized model that will server as the global model to federated. Note that +the value of ``fraction_fit`` is read from the run config. You can find the default +value defined in the ``pyproject.toml``. -.. note:: +.. code-block:: python - Check the source code of the extended version of this tutorial in - |quickstart_hf_link|_ in the Flower GitHub repository. For a - comprehensive example of a federated fine-tuning of an LLM with - Flower, refer to the |flowertune|_ example in the Flower GitHub - repository. + def server_fn(context: Context): + # Read from config + num_rounds = context.run_config["num-server-rounds"] + fraction_fit = context.run_config["fraction-fit"] -.. |quickstart_hf_link| replace:: + # Initialize global model + model_name = context.run_config["model-name"] + num_labels = context.run_config["num-labels"] + net = AutoModelForSequenceClassification.from_pretrained( + model_name, num_labels=num_labels + ) - ``examples/quickstart-huggingface`` + weights = get_weights(net) + initial_parameters = ndarrays_to_parameters(weights) -.. |fedavg| replace:: + # Define strategy + strategy = FedAvg( + fraction_fit=fraction_fit, + fraction_evaluate=1.0, + initial_parameters=initial_parameters, + ) + config = ServerConfig(num_rounds=num_rounds) - ``FedAvg`` + return ServerAppComponents(strategy=strategy, config=config) -.. |iidpartitioner| replace:: - ``IidPartitioner`` + # Create ServerApp + app = ServerApp(server_fn=server_fn) -.. |otherpartitioners| replace:: +Congratulations! You've successfully built and run your first federated learning system +for an LLM. - other partitioners +.. note:: -.. |berttiny| replace:: + Check the source code of the extended version of this tutorial in + |quickstart_hf_link|_ in the Flower GitHub repository. For a comprehensive example + of a federated fine-tuning of an LLM with Flower, refer to the |flowertune|_ example + in the Flower GitHub repository. - ``bert-tiny`` +.. |quickstart_hf_link| replace:: ``examples/quickstart-huggingface`` -.. |serverappcomponents| replace:: +.. |fedavg| replace:: ``FedAvg`` - ``ServerAppComponents`` +.. |iidpartitioner| replace:: ``IidPartitioner`` -.. |client| replace:: +.. |otherpartitioners| replace:: other partitioners - ``Client`` +.. |berttiny| replace:: ``bert-tiny`` -.. |flowerdatasets| replace:: +.. |serverappcomponents| replace:: ``ServerAppComponents`` - Flower Datasets +.. |client| replace:: ``Client`` -.. |flowertune| replace:: +.. |flowerdatasets| replace:: Flower Datasets - FlowerTune LLM +.. |flowertune| replace:: FlowerTune LLM .. _berttiny: https://huggingface.co/prajjwal1/bert-tiny @@ -434,4 +400,4 @@ learning system for an LLM. .. _serverappcomponents: ref-api/flwr.server.ServerAppComponents.html#serverappcomponents .. meta:: - :description: Check out this Federating Learning quickstart tutorial for using Flower with 🤗 HuggingFace Transformers in order to fine-tune an LLM. + :description: Check out this Federating Learning quickstart tutorial for using Flower with 🤗 HuggingFace Transformers in order to fine-tune an LLM. diff --git a/doc/source/tutorial-quickstart-ios.rst b/doc/source/tutorial-quickstart-ios.rst index e4315ce569fb..8a9250f8dfb0 100644 --- a/doc/source/tutorial-quickstart-ios.rst +++ b/doc/source/tutorial-quickstart-ios.rst @@ -1,136 +1,155 @@ .. _quickstart-ios: - Quickstart iOS ============== .. meta:: - :description: Read this Federated Learning quickstart tutorial for creating an iOS app using Flower to train a neural network on MNIST. + :description: Read this Federated Learning quickstart tutorial for creating an iOS app using Flower to train a neural network on MNIST. -In this tutorial we will learn how to train a Neural Network on MNIST using Flower and CoreML on iOS devices. +In this tutorial we will learn how to train a Neural Network on MNIST using Flower and +CoreML on iOS devices. -First of all, for running the Flower Python server, it is recommended to create a virtual environment and run everything within a :doc:`virtualenv `. -For the Flower client implementation in iOS, it is recommended to use Xcode as our IDE. +First of all, for running the Flower Python server, it is recommended to create a +virtual environment and run everything within a :doc:`virtualenv +`. For the Flower client implementation in iOS, +it is recommended to use Xcode as our IDE. -Our example consists of one Python *server* and two iPhone *clients* that all have the same model. +Our example consists of one Python *server* and two iPhone *clients* that all have the +same model. -*Clients* are responsible for generating individual weight updates for the model based on their local datasets. -These updates are then sent to the *server* which will aggregate them to produce a better model. Finally, the *server* sends this improved version of the model back to each *client*. -A complete cycle of weight updates is called a *round*. +*Clients* are responsible for generating individual weight updates for the model based +on their local datasets. These updates are then sent to the *server* which will +aggregate them to produce a better model. Finally, the *server* sends this improved +version of the model back to each *client*. A complete cycle of weight updates is called +a *round*. -Now that we have a rough idea of what is going on, let's get started to setup our Flower server environment. We first need to install Flower. You can do this by using pip: +Now that we have a rough idea of what is going on, let's get started to setup our Flower +server environment. We first need to install Flower. You can do this by using pip: .. code-block:: shell - $ pip install flwr + $ pip install flwr Or Poetry: .. code-block:: shell - $ poetry add flwr + $ poetry add flwr Flower Client ------------- -Now that we have all our dependencies installed, let's run a simple distributed training using CoreML as our local training pipeline and MNIST as our dataset. -For simplicity reasons we will use the complete Flower client with CoreML, that has been implemented and stored inside the Swift SDK. The client implementation can be seen below: +Now that we have all our dependencies installed, let's run a simple distributed training +using CoreML as our local training pipeline and MNIST as our dataset. For simplicity +reasons we will use the complete Flower client with CoreML, that has been implemented +and stored inside the Swift SDK. The client implementation can be seen below: .. code-block:: swift - /// Parses the parameters from the local model and returns them as GetParametersRes struct - /// - /// - Returns: Parameters from the local model - public func getParameters() -> GetParametersRes { - let parameters = parameters.weightsToParameters() - let status = Status(code: .ok, message: String()) - - return GetParametersRes(parameters: parameters, status: status) - } - - /// Calls the routine to fit the local model - /// - /// - Returns: The result from the local training, e.g., updated parameters - public func fit(ins: FitIns) -> FitRes { - let status = Status(code: .ok, message: String()) - let result = runMLTask(configuration: parameters.parametersToWeights(parameters: ins.parameters), task: .train) - let parameters = parameters.weightsToParameters() - - return FitRes(parameters: parameters, numExamples: result.numSamples, status: status) - } + /// Parses the parameters from the local model and returns them as GetParametersRes struct + /// + /// - Returns: Parameters from the local model + public func getParameters() -> GetParametersRes { + let parameters = parameters.weightsToParameters() + let status = Status(code: .ok, message: String()) - /// Calls the routine to evaluate the local model - /// - /// - Returns: The result from the evaluation, e.g., loss - public func evaluate(ins: EvaluateIns) -> EvaluateRes { - let status = Status(code: .ok, message: String()) - let result = runMLTask(configuration: parameters.parametersToWeights(parameters: ins.parameters), task: .test) + return GetParametersRes(parameters: parameters, status: status) + } - return EvaluateRes(loss: Float(result.loss), numExamples: result.numSamples, status: status) - } + /// Calls the routine to fit the local model + /// + /// - Returns: The result from the local training, e.g., updated parameters + public func fit(ins: FitIns) -> FitRes { + let status = Status(code: .ok, message: String()) + let result = runMLTask(configuration: parameters.parametersToWeights(parameters: ins.parameters), task: .train) + let parameters = parameters.weightsToParameters() + + return FitRes(parameters: parameters, numExamples: result.numSamples, status: status) + } + + /// Calls the routine to evaluate the local model + /// + /// - Returns: The result from the evaluation, e.g., loss + public func evaluate(ins: EvaluateIns) -> EvaluateRes { + let status = Status(code: .ok, message: String()) + let result = runMLTask(configuration: parameters.parametersToWeights(parameters: ins.parameters), task: .test) + + return EvaluateRes(loss: Float(result.loss), numExamples: result.numSamples, status: status) + } -Let's create a new application project in Xcode and add :code:`flwr` as a dependency in your project. For our application, we will store the logic of our app in :code:`FLiOSModel.swift` and the UI elements in :code:`ContentView.swift`. -We will focus more on :code:`FLiOSModel.swift` in this quickstart. Please refer to the `full code example `_ to learn more about the app. +Let's create a new application project in Xcode and add ``flwr`` as a dependency in your +project. For our application, we will store the logic of our app in ``FLiOSModel.swift`` +and the UI elements in ``ContentView.swift``. We will focus more on ``FLiOSModel.swift`` +in this quickstart. Please refer to the `full code example +`_ to learn more about the app. -Import Flower and CoreML related packages in :code:`FLiOSModel.swift`: +Import Flower and CoreML related packages in ``FLiOSModel.swift``: .. code-block:: swift - import Foundation - import CoreML - import flwr + import Foundation + import CoreML + import flwr -Then add the mlmodel to the project simply by drag-and-drop, the mlmodel will be bundled inside the application during deployment to your iOS device. -We need to pass the url to access mlmodel and run CoreML machine learning processes, it can be retrieved by calling the function :code:`Bundle.main.url`. -For the MNIST dataset, we need to preprocess it into :code:`MLBatchProvider` object. The preprocessing is done inside :code:`DataLoader.swift`. +Then add the mlmodel to the project simply by drag-and-drop, the mlmodel will be bundled +inside the application during deployment to your iOS device. We need to pass the url to +access mlmodel and run CoreML machine learning processes, it can be retrieved by calling +the function ``Bundle.main.url``. For the MNIST dataset, we need to preprocess it into +``MLBatchProvider`` object. The preprocessing is done inside ``DataLoader.swift``. .. code-block:: swift - // prepare train dataset - let trainBatchProvider = DataLoader.trainBatchProvider() { _ in } + // prepare train dataset + let trainBatchProvider = DataLoader.trainBatchProvider() { _ in } - // prepare test dataset - let testBatchProvider = DataLoader.testBatchProvider() { _ in } + // prepare test dataset + let testBatchProvider = DataLoader.testBatchProvider() { _ in } - // load them together - let dataLoader = MLDataLoader(trainBatchProvider: trainBatchProvider, - testBatchProvider: testBatchProvider) + // load them together + let dataLoader = MLDataLoader(trainBatchProvider: trainBatchProvider, + testBatchProvider: testBatchProvider) -Since CoreML does not allow the model parameters to be seen before training, and accessing the model parameters during or after the training can only be done by specifying the layer name, -we need to know this information beforehand, through looking at the model specification, which are written as proto files. The implementation can be seen in :code:`MLModelInspect`. +Since CoreML does not allow the model parameters to be seen before training, and +accessing the model parameters during or after the training can only be done by +specifying the layer name, we need to know this information beforehand, through looking +at the model specification, which are written as proto files. The implementation can be +seen in ``MLModelInspect``. After we have all of the necessary information, let's create our Flower client. .. code-block:: swift - let compiledModelUrl = try MLModel.compileModel(at: url) + let compiledModelUrl = try MLModel.compileModel(at: url) - // inspect the model to be able to access the model parameters - // to access the model we need to know the layer name - // since the model parameters are stored as key value pairs - let modelInspect = try MLModelInspect(serializedData: Data(contentsOf: url)) - let layerWrappers = modelInspect.getLayerWrappers() - self.mlFlwrClient = MLFlwrClient(layerWrappers: layerWrappers, - dataLoader: dataLoader, - compiledModelUrl: compiledModelUrl) + // inspect the model to be able to access the model parameters + // to access the model we need to know the layer name + // since the model parameters are stored as key value pairs + let modelInspect = try MLModelInspect(serializedData: Data(contentsOf: url)) + let layerWrappers = modelInspect.getLayerWrappers() + self.mlFlwrClient = MLFlwrClient(layerWrappers: layerWrappers, + dataLoader: dataLoader, + compiledModelUrl: compiledModelUrl) -Then start the Flower gRPC client and start communicating to the server by passing our Flower client to the function :code:`startFlwrGRPC`. +Then start the Flower gRPC client and start communicating to the server by passing our +Flower client to the function ``startFlwrGRPC``. .. code-block:: swift - self.flwrGRPC = FlwrGRPC(serverHost: hostname, serverPort: port) - self.flwrGRPC.startFlwrGRPC(client: self.mlFlwrClient) + self.flwrGRPC = FlwrGRPC(serverHost: hostname, serverPort: port) + self.flwrGRPC.startFlwrGRPC(client: self.mlFlwrClient) -That's it for the client. We only have to implement :code:`Client` or call the provided -:code:`MLFlwrClient` and call :code:`startFlwrGRPC()`. The attribute :code:`hostname` and :code:`port` tells the client which server to connect to. -This can be done by entering the hostname and port in the application before clicking the start button to start the federated learning process. +That's it for the client. We only have to implement ``Client`` or call the provided +``MLFlwrClient`` and call ``startFlwrGRPC()``. The attribute ``hostname`` and ``port`` +tells the client which server to connect to. This can be done by entering the hostname +and port in the application before clicking the start button to start the federated +learning process. Flower Server ------------- -For simple workloads we can start a Flower server and leave all the -configuration possibilities at their default values. In a file named -:code:`server.py`, import Flower and start the server: +For simple workloads we can start a Flower server and leave all the configuration +possibilities at their default values. In a file named ``server.py``, import Flower and +start the server: .. code-block:: python @@ -141,18 +160,21 @@ configuration possibilities at their default values. In a file named Train the model, federated! --------------------------- -With both client and server ready, we can now run everything and see federated -learning in action. FL systems usually have a server and multiple clients. We -therefore have to start the server first: +With both client and server ready, we can now run everything and see federated learning +in action. FL systems usually have a server and multiple clients. We therefore have to +start the server first: .. code-block:: shell $ python server.py -Once the server is running we can start the clients in different terminals. -Build and run the client through your Xcode, one through Xcode Simulator and the other by deploying it to your iPhone. -To see more about how to deploy your app to iPhone or Simulator visit `here `_. +Once the server is running we can start the clients in different terminals. Build and +run the client through your Xcode, one through Xcode Simulator and the other by +deploying it to your iPhone. To see more about how to deploy your app to iPhone or +Simulator visit `here +`_. -Congratulations! -You've successfully built and run your first federated learning system in your ios device. -The full `source code `_ for this example can be found in :code:`examples/ios`. +Congratulations! You've successfully built and run your first federated learning system +in your ios device. The full `source code +`_ for this example can be found +in ``examples/ios``. diff --git a/doc/source/tutorial-quickstart-jax.rst b/doc/source/tutorial-quickstart-jax.rst index d2b9243e2bb3..0581e95d8d42 100644 --- a/doc/source/tutorial-quickstart-jax.rst +++ b/doc/source/tutorial-quickstart-jax.rst @@ -1,34 +1,42 @@ .. _quickstart-jax: - Quickstart JAX ============== .. meta:: - :description: Check out this Federated Learning quickstart tutorial for using Flower with Jax to train a linear regression model on a scikit-learn dataset. - -This tutorial will show you how to use Flower to build a federated version of an existing JAX workload. -We are using JAX to train a linear regression model on a scikit-learn dataset. -We will structure the example similar to our `PyTorch - From Centralized To Federated `_ walkthrough. -First, we build a centralized training approach based on the `Linear Regression with JAX `_ tutorial`. -Then, we build upon the centralized training code to run the training in a federated fashion. - -Before we start building our JAX example, we need install the packages :code:`jax`, :code:`jaxlib`, :code:`scikit-learn`, and :code:`flwr`: + :description: Check out this Federated Learning quickstart tutorial for using Flower with Jax to train a linear regression model on a scikit-learn dataset. + +This tutorial will show you how to use Flower to build a federated version of an +existing JAX workload. We are using JAX to train a linear regression model on a +scikit-learn dataset. We will structure the example similar to our `PyTorch - From +Centralized To Federated +`_ +walkthrough. First, we build a centralized training approach based on the `Linear +Regression with JAX +`_ tutorial`. +Then, we build upon the centralized training code to run the training in a federated +fashion. + +Before we start building our JAX example, we need install the packages ``jax``, +``jaxlib``, ``scikit-learn``, and ``flwr``: .. code-block:: shell - $ pip install jax jaxlib scikit-learn flwr - + $ pip install jax jaxlib scikit-learn flwr Linear Regression with JAX -------------------------- -We begin with a brief description of the centralized training code based on a :code:`Linear Regression` model. -If you want a more in-depth explanation of what's going on then have a look at the official `JAX documentation `_. +We begin with a brief description of the centralized training code based on a ``Linear +Regression`` model. If you want a more in-depth explanation of what's going on then have +a look at the official `JAX documentation `_. -Let's create a new file called :code:`jax_training.py` with all the components required for a traditional (centralized) linear regression training. -First, the JAX packages :code:`jax` and :code:`jaxlib` need to be imported. In addition, we need to import :code:`sklearn` since we use :code:`make_regression` for the dataset and :code:`train_test_split` to split the dataset into a training and test set. -You can see that we do not yet import the :code:`flwr` package for federated learning. This will be done later. +Let's create a new file called ``jax_training.py`` with all the components required for +a traditional (centralized) linear regression training. First, the JAX packages ``jax`` +and ``jaxlib`` need to be imported. In addition, we need to import ``sklearn`` since we +use ``make_regression`` for the dataset and ``train_test_split`` to split the dataset +into a training and test set. You can see that we do not yet import the ``flwr`` package +for federated learning. This will be done later. .. code-block:: python @@ -40,47 +48,52 @@ You can see that we do not yet import the :code:`flwr` package for federated lea key = jax.random.PRNGKey(0) -The :code:`load_data()` function loads the mentioned training and test sets. +The ``load_data()`` function loads the mentioned training and test sets. .. code-block:: python - def load_data() -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]]: + def load_data() -> ( + Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]] + ): # create our dataset and start with similar datasets for different clients X, y = make_regression(n_features=3, random_state=0) X, X_test, y, y_test = train_test_split(X, y) return X, y, X_test, y_test -The model architecture (a very simple :code:`Linear Regression` model) is defined in :code:`load_model()`. +The model architecture (a very simple ``Linear Regression`` model) is defined in +``load_model()``. .. code-block:: python def load_model(model_shape) -> Dict: # model weights - params = { - 'b' : jax.random.uniform(key), - 'w' : jax.random.uniform(key, model_shape) - } + params = {"b": jax.random.uniform(key), "w": jax.random.uniform(key, model_shape)} return params -We now need to define the training (function :code:`train()`), which loops over the training set and measures the loss (function :code:`loss_fn()`) for each batch of training examples. The loss function is separate since JAX takes derivatives with a :code:`grad()` function (defined in the :code:`main()` function and called in :code:`train()`). +We now need to define the training (function ``train()``), which loops over the training +set and measures the loss (function ``loss_fn()``) for each batch of training examples. +The loss function is separate since JAX takes derivatives with a ``grad()`` function +(defined in the ``main()`` function and called in ``train()``). .. code-block:: python def loss_fn(params, X, y) -> Callable: - err = jnp.dot(X, params['w']) + params['b'] - y + err = jnp.dot(X, params["w"]) + params["b"] - y return jnp.mean(jnp.square(err)) # mse + def train(params, grad_fn, X, y) -> Tuple[np.array, float, int]: num_examples = X.shape[0] for epochs in range(10): grads = grad_fn(params, X, y) params = jax.tree_multimap(lambda p, g: p - 0.05 * g, params, grads) - loss = loss_fn(params,X, y) + loss = loss_fn(params, X, y) # if epochs % 10 == 9: # print(f'For Epoch {epochs} loss {loss}') return params, loss, num_examples -The evaluation of the model is defined in the function :code:`evaluation()`. The function takes all test examples and measures the loss of the linear regression model. +The evaluation of the model is defined in the function ``evaluation()``. The function +takes all test examples and measures the loss of the linear regression model. .. code-block:: python @@ -91,7 +104,9 @@ The evaluation of the model is defined in the function :code:`evaluation()`. The # print(f'Test loss {loss_test}') return loss_test, num_examples -Having defined the data loading, model architecture, training, and evaluation we can put everything together and train our model using JAX. As already mentioned, the :code:`jax.grad()` function is defined in :code:`main()` and passed to :code:`train()`. +Having defined the data loading, model architecture, training, and evaluation we can put +everything together and train our model using JAX. As already mentioned, the +``jax.grad()`` function is defined in ``main()`` and passed to ``train()``. .. code-block:: python @@ -100,7 +115,7 @@ Having defined the data loading, model architecture, training, and evaluation we model_shape = X.shape[1:] grad_fn = jax.grad(loss_fn) print("Model Shape", model_shape) - params = load_model(model_shape) + params = load_model(model_shape) params, loss, num_examples = train(params, grad_fn, X, y) evaluation(params, grad_fn, X_test, y_test) @@ -110,40 +125,48 @@ Having defined the data loading, model architecture, training, and evaluation we You can now run your (centralized) JAX linear regression workload: -.. code-block:: python +.. code-block:: bash python3 jax_training.py -So far this should all look fairly familiar if you've used JAX before. -Let's take the next step and use what we've built to create a simple federated learning system consisting of one server and two clients. +So far this should all look fairly familiar if you've used JAX before. Let's take the +next step and use what we've built to create a simple federated learning system +consisting of one server and two clients. JAX meets Flower ---------------- -The concept of federating an existing workload is always the same and easy to understand. -We have to start a *server* and then use the code in :code:`jax_training.py` for the *clients* that are connected to the *server*. -The *server* sends model parameters to the clients. The *clients* run the training and update the parameters. -The updated parameters are sent back to the *server*, which averages all received parameter updates. -This describes one round of the federated learning process, and we repeat this for multiple rounds. +The concept of federating an existing workload is always the same and easy to +understand. We have to start a *server* and then use the code in ``jax_training.py`` for +the *clients* that are connected to the *server*. The *server* sends model parameters to +the clients. The *clients* run the training and update the parameters. The updated +parameters are sent back to the *server*, which averages all received parameter updates. +This describes one round of the federated learning process, and we repeat this for +multiple rounds. -Our example consists of one *server* and two *clients*. Let's set up :code:`server.py` first. The *server* needs to import the Flower package :code:`flwr`. -Next, we use the :code:`start_server` function to start a server and tell it to perform three rounds of federated learning. +Our example consists of one *server* and two *clients*. Let's set up ``server.py`` +first. The *server* needs to import the Flower package ``flwr``. Next, we use the +``start_server`` function to start a server and tell it to perform three rounds of +federated learning. .. code-block:: python import flwr as fl if __name__ == "__main__": - fl.server.start_server(server_address="0.0.0.0:8080", config=fl.server.ServerConfig(num_rounds=3)) + fl.server.start_server( + server_address="0.0.0.0:8080", config=fl.server.ServerConfig(num_rounds=3) + ) We can already start the *server*: -.. code-block:: python +.. code-block:: bash python3 server.py -Finally, we will define our *client* logic in :code:`client.py` and build upon the previously defined JAX training in :code:`jax_training.py`. -Our *client* needs to import :code:`flwr`, but also :code:`jax` and :code:`jaxlib` to update the parameters on our JAX model: +Finally, we will define our *client* logic in ``client.py`` and build upon the +previously defined JAX training in ``jax_training.py``. Our *client* needs to import +``flwr``, but also ``jax`` and ``jaxlib`` to update the parameters on our JAX model: .. code-block:: python @@ -156,36 +179,45 @@ Our *client* needs to import :code:`flwr`, but also :code:`jax` and :code:`jaxli import jax_training - -Implementing a Flower *client* basically means implementing a subclass of either :code:`flwr.client.Client` or :code:`flwr.client.NumPyClient`. -Our implementation will be based on :code:`flwr.client.NumPyClient` and we'll call it :code:`FlowerClient`. -:code:`NumPyClient` is slightly easier to implement than :code:`Client` if you use a framework with good NumPy interoperability (like JAX) because it avoids some of the boilerplate that would otherwise be necessary. -:code:`FlowerClient` needs to implement four methods, two methods for getting/setting model parameters, one method for training the model, and one method for testing the model: - -#. :code:`set_parameters (optional)` - * set the model parameters on the local model that are received from the server - * transform parameters to NumPy :code:`ndarray`'s - * loop over the list of model parameters received as NumPy :code:`ndarray`'s (think list of neural network layers) -#. :code:`get_parameters` - * get the model parameters and return them as a list of NumPy :code:`ndarray`'s (which is what :code:`flwr.client.NumPyClient` expects) -#. :code:`fit` - * update the parameters of the local model with the parameters received from the server - * train the model on the local training set - * get the updated local model parameters and return them to the server -#. :code:`evaluate` - * update the parameters of the local model with the parameters received from the server - * evaluate the updated model on the local test set - * return the local loss to the server - -The challenging part is to transform the JAX model parameters from :code:`DeviceArray` to :code:`NumPy ndarray` to make them compatible with `NumPyClient`. - -The two :code:`NumPyClient` methods :code:`fit` and :code:`evaluate` make use of the functions :code:`train()` and :code:`evaluate()` previously defined in :code:`jax_training.py`. -So what we really do here is we tell Flower through our :code:`NumPyClient` subclass which of our already defined functions to call for training and evaluation. -We included type annotations to give you a better understanding of the data types that get passed around. +Implementing a Flower *client* basically means implementing a subclass of either +``flwr.client.Client`` or ``flwr.client.NumPyClient``. Our implementation will be based +on ``flwr.client.NumPyClient`` and we'll call it ``FlowerClient``. ``NumPyClient`` is +slightly easier to implement than ``Client`` if you use a framework with good NumPy +interoperability (like JAX) because it avoids some of the boilerplate that would +otherwise be necessary. ``FlowerClient`` needs to implement four methods, two methods +for getting/setting model parameters, one method for training the model, and one method +for testing the model: + +1. ``set_parameters (optional)`` + - set the model parameters on the local model that are received from the server + - transform parameters to NumPy ``ndarray``'s + - loop over the list of model parameters received as NumPy ``ndarray``'s (think + list of neural network layers) +2. ``get_parameters`` + - get the model parameters and return them as a list of NumPy ``ndarray``'s + (which is what ``flwr.client.NumPyClient`` expects) +3. ``fit`` + - update the parameters of the local model with the parameters received from the + server + - train the model on the local training set + - get the updated local model parameters and return them to the server +4. ``evaluate`` + - update the parameters of the local model with the parameters received from the + server + - evaluate the updated model on the local test set + - return the local loss to the server + +The challenging part is to transform the JAX model parameters from ``DeviceArray`` to +``NumPy ndarray`` to make them compatible with `NumPyClient`. + +The two ``NumPyClient`` methods ``fit`` and ``evaluate`` make use of the functions +``train()`` and ``evaluate()`` previously defined in ``jax_training.py``. So what we +really do here is we tell Flower through our ``NumPyClient`` subclass which of our +already defined functions to call for training and evaluation. We included type +annotations to give you a better understanding of the data types that get passed around. .. code-block:: python - class FlowerClient(fl.client.NumPyClient): """Flower client implementing using linear regression and JAX.""" @@ -198,7 +230,7 @@ We included type annotations to give you a better understanding of the data type test_x: List[np.ndarray], test_y: List[np.ndarray], ) -> None: - self.params= params + self.params = params self.grad_fn = grad_fn self.train_x = train_x self.train_y = train_y @@ -211,25 +243,26 @@ We included type annotations to give you a better understanding of the data type for _, val in self.params.items(): parameter_value.append(np.array(val)) return parameter_value - + def set_parameters(self, parameters: List[np.ndarray]) -> Dict: # Collect model parameters and update the parameters of the local model - value=jnp.ndarray - params_item = list(zip(self.params.keys(),parameters)) + value = jnp.ndarray + params_item = list(zip(self.params.keys(), parameters)) for item in params_item: key = item[0] value = item[1] self.params[key] = value return self.params - def fit( self, parameters: List[np.ndarray], config: Dict ) -> Tuple[List[np.ndarray], int, Dict]: # Set model parameters, train model, return updated model parameters print("Start local training") self.params = self.set_parameters(parameters) - self.params, loss, num_examples = jax_training.train(self.params, self.grad_fn, self.train_x, self.train_y) + self.params, loss, num_examples = jax_training.train( + self.params, self.grad_fn, self.train_x, self.train_y + ) results = {"loss": float(loss)} print("Training results", results) return self.get_parameters(config={}), num_examples, results @@ -240,7 +273,9 @@ We included type annotations to give you a better understanding of the data type # Set model parameters, evaluate the model on a local test dataset, return result print("Start evaluation") self.params = self.set_parameters(parameters) - loss, num_examples = jax_training.evaluation(self.params,self.grad_fn, self.test_x, self.test_y) + loss, num_examples = jax_training.evaluation( + self.params, self.grad_fn, self.test_x, self.test_y + ) print("Evaluation accuracy & loss", loss) return ( float(loss), @@ -267,22 +302,25 @@ Having defined the federation process, we can run it. client = FlowerClient(params, grad_fn, train_x, train_y, test_x, test_y) fl.client.start_client(server_address="0.0.0.0:8080", client=client.to_client()) + if __name__ == "__main__": main() - And that's it. You can now open two additional terminal windows and run -.. code-block:: python +.. code-block:: bash python3 client.py -in each window (make sure that the server is still running before you do so) and see your JAX project run federated learning across two clients. Congratulations! +in each window (make sure that the server is still running before you do so) and see +your JAX project run federated learning across two clients. Congratulations! Next Steps ---------- -The source code of this example was improved over time and can be found here: `Quickstart JAX `_. +The source code of this example was improved over time and can be found here: +`Quickstart JAX `_. Our example is somewhat over-simplified because both clients load the same dataset. -You're now prepared to explore this topic further. How about using a more sophisticated model or using a different dataset? How about adding more clients? +You're now prepared to explore this topic further. How about using a more sophisticated +model or using a different dataset? How about adding more clients? diff --git a/doc/source/tutorial-quickstart-mlx.rst b/doc/source/tutorial-quickstart-mlx.rst index 675a08502d26..40e870ddc822 100644 --- a/doc/source/tutorial-quickstart-mlx.rst +++ b/doc/source/tutorial-quickstart-mlx.rst @@ -1,410 +1,393 @@ .. _quickstart-mlx: -################ - Quickstart MLX -################ - -In this federated learning tutorial we will learn how to train simple -MLP on MNIST using Flower and MLX. It is recommended to create a virtual -environment and run everything within a :doc:`virtualenv -`. - -Let's use `flwr new` to create a complete Flower+MLX project. It will -generate all the files needed to run, by default with the Simulation -Engine, a federation of 10 nodes using `FedAvg +Quickstart MLX +============== + +In this federated learning tutorial we will learn how to train simple MLP on MNIST using +Flower and MLX. It is recommended to create a virtual environment and run everything +within a :doc:`virtualenv `. + +Let's use `flwr new` to create a complete Flower+MLX project. It will generate all the +files needed to run, by default with the Simulation Engine, a federation of 10 nodes +using `FedAvg `_. The dataset will be partitioned using Flower Dataset's `IidPartitioner `_. -Now that we have a rough idea of what this example is about, let's get -started. First, install Flower in your new environment: +Now that we have a rough idea of what this example is about, let's get started. First, +install Flower in your new environment: -.. code:: shell +.. code-block:: shell - # In a new Python environment - $ pip install flwr + # In a new Python environment + $ pip install flwr -Then, run the command below. You will be prompted to select of the -available templates (choose ``MLX``), give a name to your project, and -type in your developer name: +Then, run the command below. You will be prompted to select of the available templates +(choose ``MLX``), give a name to your project, and type in your developer name: -.. code:: shell +.. code-block:: shell - $ flwr new + $ flwr new -After running it you'll notice a new directory with your project name -has been created. It should have the following structure: +After running it you'll notice a new directory with your project name has been created. +It should have the following structure: -.. code:: shell +.. code-block:: shell - - ├── - │ ├── __init__.py - │ ├── client_app.py # Defines your ClientApp - │ ├── server_app.py # Defines your ServerApp - │ └── task.py # Defines your model, training and data loading - ├── pyproject.toml # Project metadata like dependencies and configs - └── README.md + + ├── + │ ├── __init__.py + │ ├── client_app.py # Defines your ClientApp + │ ├── server_app.py # Defines your ServerApp + │ └── task.py # Defines your model, training and data loading + ├── pyproject.toml # Project metadata like dependencies and configs + └── README.md -If you haven't yet installed the project and its dependencies, you can -do so by: +If you haven't yet installed the project and its dependencies, you can do so by: -.. code:: shell +.. code-block:: shell - # From the directory where your pyproject.toml is - $ pip install -e . + # From the directory where your pyproject.toml is + $ pip install -e . To run the project do: -.. code:: shell +.. code-block:: shell - # Run with default arguments - $ flwr run . + # Run with default arguments + $ flwr run . With default arguments you will see an output like this one: -.. code:: shell - - Loading project configuration... - Success - INFO : Starting Flower ServerApp, config: num_rounds=3, no round_timeout - INFO : - INFO : [INIT] - INFO : Requesting initial parameters from one random client - WARNING : FAB ID is not provided; the default ClientApp will be loaded. - INFO : Received initial parameters from one random client - INFO : Evaluating initial global parameters - INFO : - INFO : [ROUND 1] - INFO : configure_fit: strategy sampled 10 clients (out of 10) - INFO : aggregate_fit: received 10 results and 0 failures - WARNING : No fit_metrics_aggregation_fn provided - INFO : configure_evaluate: strategy sampled 10 clients (out of 10) - INFO : aggregate_evaluate: received 10 results and 0 failures - WARNING : No evaluate_metrics_aggregation_fn provided - INFO : - INFO : [ROUND 2] - INFO : configure_fit: strategy sampled 10 clients (out of 10) - INFO : aggregate_fit: received 10 results and 0 failures - INFO : configure_evaluate: strategy sampled 10 clients (out of 10) - INFO : aggregate_evaluate: received 10 results and 0 failures - INFO : - INFO : [ROUND 3] - INFO : configure_fit: strategy sampled 10 clients (out of 10) - INFO : aggregate_fit: received 10 results and 0 failures - INFO : configure_evaluate: strategy sampled 10 clients (out of 10) - INFO : aggregate_evaluate: received 10 results and 0 failures - INFO : - INFO : [SUMMARY] - INFO : Run finished 3 round(s) in 8.15s - INFO : History (loss, distributed): - INFO : round 1: 2.243802046775818 - INFO : round 2: 2.101812958717346 - INFO : round 3: 1.7419301986694335 - INFO : - -You can also override the parameters defined in -``[tool.flwr.app.config]`` section in the ``pyproject.toml`` like this: - -.. code:: shell - - # Override some arguments - $ flwr run . --run-config "num-server-rounds=5 lr=0.05" - -What follows is an explanation of each component in the project you just -created: dataset partition, the model, defining the ``ClientApp`` and -defining the ``ServerApp``. - -********** - The Data -********** - -We will use `Flower Datasets `_ to -easily download and partition the `MNIST` dataset. In this example -you'll make use of the `IidPartitioner +.. code-block:: shell + + Loading project configuration... + Success + INFO : Starting Flower ServerApp, config: num_rounds=3, no round_timeout + INFO : + INFO : [INIT] + INFO : Requesting initial parameters from one random client + WARNING : FAB ID is not provided; the default ClientApp will be loaded. + INFO : Received initial parameters from one random client + INFO : Evaluating initial global parameters + INFO : + INFO : [ROUND 1] + INFO : configure_fit: strategy sampled 10 clients (out of 10) + INFO : aggregate_fit: received 10 results and 0 failures + WARNING : No fit_metrics_aggregation_fn provided + INFO : configure_evaluate: strategy sampled 10 clients (out of 10) + INFO : aggregate_evaluate: received 10 results and 0 failures + WARNING : No evaluate_metrics_aggregation_fn provided + INFO : + INFO : [ROUND 2] + INFO : configure_fit: strategy sampled 10 clients (out of 10) + INFO : aggregate_fit: received 10 results and 0 failures + INFO : configure_evaluate: strategy sampled 10 clients (out of 10) + INFO : aggregate_evaluate: received 10 results and 0 failures + INFO : + INFO : [ROUND 3] + INFO : configure_fit: strategy sampled 10 clients (out of 10) + INFO : aggregate_fit: received 10 results and 0 failures + INFO : configure_evaluate: strategy sampled 10 clients (out of 10) + INFO : aggregate_evaluate: received 10 results and 0 failures + INFO : + INFO : [SUMMARY] + INFO : Run finished 3 round(s) in 8.15s + INFO : History (loss, distributed): + INFO : round 1: 2.243802046775818 + INFO : round 2: 2.101812958717346 + INFO : round 3: 1.7419301986694335 + INFO : + +You can also override the parameters defined in ``[tool.flwr.app.config]`` section in +the ``pyproject.toml`` like this: + +.. code-block:: shell + + # Override some arguments + $ flwr run . --run-config "num-server-rounds=5 lr=0.05" + +What follows is an explanation of each component in the project you just created: +dataset partition, the model, defining the ``ClientApp`` and defining the ``ServerApp``. + +The Data +-------- + +We will use `Flower Datasets `_ to easily download and +partition the `MNIST` dataset. In this example you'll make use of the `IidPartitioner `_ -to generate `num_partitions` partitions. You can choose `other -partitioners -`_ -available in Flower Datasets: - -.. code:: python - - partitioner = IidPartitioner(num_partitions=num_partitions) - fds = FederatedDataset( - dataset="ylecun/mnist", - partitioners={"train": partitioner}, - ) - partition = fds.load_partition(partition_id) - partition_splits = partition.train_test_split(test_size=0.2, seed=42) - - partition_splits["train"].set_format("numpy") - partition_splits["test"].set_format("numpy") - - train_partition = partition_splits["train"].map( - lambda img: {"img": img.reshape(-1, 28 * 28).squeeze().astype(np.float32) / 255.0}, - input_columns="image", - ) - test_partition = partition_splits["test"].map( - lambda img: {"img": img.reshape(-1, 28 * 28).squeeze().astype(np.float32) / 255.0}, - input_columns="image", - ) - - data = ( - train_partition["img"], - train_partition["label"].astype(np.uint32), - test_partition["img"], - test_partition["label"].astype(np.uint32), - ) - - train_images, train_labels, test_images, test_labels = map(mx.array, data) - -*********** - The Model -*********** +to generate `num_partitions` partitions. You can choose `other partitioners +`_ available in +Flower Datasets: + +.. code-block:: python + + partitioner = IidPartitioner(num_partitions=num_partitions) + fds = FederatedDataset( + dataset="ylecun/mnist", + partitioners={"train": partitioner}, + ) + partition = fds.load_partition(partition_id) + partition_splits = partition.train_test_split(test_size=0.2, seed=42) + + partition_splits["train"].set_format("numpy") + partition_splits["test"].set_format("numpy") + + train_partition = partition_splits["train"].map( + lambda img: {"img": img.reshape(-1, 28 * 28).squeeze().astype(np.float32) / 255.0}, + input_columns="image", + ) + test_partition = partition_splits["test"].map( + lambda img: {"img": img.reshape(-1, 28 * 28).squeeze().astype(np.float32) / 255.0}, + input_columns="image", + ) + + data = ( + train_partition["img"], + train_partition["label"].astype(np.uint32), + test_partition["img"], + test_partition["label"].astype(np.uint32), + ) + + train_images, train_labels, test_images, test_labels = map(mx.array, data) + +The Model +--------- We define the model as in the `centralized MLX example -`_, it's a -simple MLP: +`_, it's a simple MLP: -.. code:: python +.. code-block:: python - class MLP(nn.Module): - """A simple MLP.""" + class MLP(nn.Module): + """A simple MLP.""" - def __init__( - self, num_layers: int, input_dim: int, hidden_dim: int, output_dim: int - ): - super().__init__() - layer_sizes = [input_dim] + [hidden_dim] * num_layers + [output_dim] - self.layers = [ - nn.Linear(idim, odim) - for idim, odim in zip(layer_sizes[:-1], layer_sizes[1:]) - ] + def __init__( + self, num_layers: int, input_dim: int, hidden_dim: int, output_dim: int + ): + super().__init__() + layer_sizes = [input_dim] + [hidden_dim] * num_layers + [output_dim] + self.layers = [ + nn.Linear(idim, odim) + for idim, odim in zip(layer_sizes[:-1], layer_sizes[1:]) + ] - def __call__(self, x): - for l in self.layers[:-1]: - x = mx.maximum(l(x), 0.0) - return self.layers[-1](x) + def __call__(self, x): + for l in self.layers[:-1]: + x = mx.maximum(l(x), 0.0) + return self.layers[-1](x) -We also define some utility functions to test our model and to iterate -over batches. +We also define some utility functions to test our model and to iterate over batches. -.. code:: python +.. code-block:: python - def loss_fn(model, X, y): - return mx.mean(nn.losses.cross_entropy(model(X), y)) + def loss_fn(model, X, y): + return mx.mean(nn.losses.cross_entropy(model(X), y)) - def eval_fn(model, X, y): - return mx.mean(mx.argmax(model(X), axis=1) == y) + def eval_fn(model, X, y): + return mx.mean(mx.argmax(model(X), axis=1) == y) - def batch_iterate(batch_size, X, y): - perm = mx.array(np.random.permutation(y.size)) - for s in range(0, y.size, batch_size): - ids = perm[s : s + batch_size] - yield X[ids], y[ids] + def batch_iterate(batch_size, X, y): + perm = mx.array(np.random.permutation(y.size)) + for s in range(0, y.size, batch_size): + ids = perm[s : s + batch_size] + yield X[ids], y[ids] The ClientApp -============= +~~~~~~~~~~~~~ -The main changes we have to make to use `MLX` with `Flower` will be -found in the ``get_params()`` and ``set_params()`` functions. Indeed, -MLX doesn't provide an easy way to convert the model parameters into a -list of ``np.array`` objects (the format we need for the serialization -of the messages to work). +The main changes we have to make to use `MLX` with `Flower` will be found in the +``get_params()`` and ``set_params()`` functions. Indeed, MLX doesn't provide an easy way +to convert the model parameters into a list of ``np.array`` objects (the format we need +for the serialization of the messages to work). The way MLX stores its parameters is as follows: -.. code:: shell - - { - "layers": [ - {"weight": mlx.core.array, "bias": mlx.core.array}, - {"weight": mlx.core.array, "bias": mlx.core.array}, - ..., - {"weight": mlx.core.array, "bias": mlx.core.array} - ] - } - -Therefore, to get our list of ``np.array`` objects, we need to extract -each array and convert them into a NumPy array: - -.. code:: python - - def get_params(model): - layers = model.parameters()["layers"] - return [np.array(val) for layer in layers for _, val in layer.items()] - -For the ``set_params()`` function, we perform the reverse operation. We -receive a list of NumPy arrays and want to convert them into MLX -parameters. Therefore, we iterate through pairs of parameters and assign -them to the `weight` and `bias` keys of each layer dict: - -.. code:: python - - def set_params(model, parameters): - new_params = {} - new_params["layers"] = [ - {"weight": mx.array(parameters[i]), "bias": mx.array(parameters[i + 1])} - for i in range(0, len(parameters), 2) - ] - model.update(new_params) - -The rest of the functionality is directly inspired by the centralized -case. The ``fit()`` method in the client trains the model using the -local dataset: - -.. code:: python - - def fit(self, parameters, config): - self.set_parameters(parameters) - for _ in range(self.num_epochs): - for X, y in batch_iterate( - self.batch_size, self.train_images, self.train_labels - ): - _, grads = self.loss_and_grad_fn(self.model, X, y) - self.optimizer.update(self.model, grads) - mx.eval(self.model.parameters(), self.optimizer.state) - return self.get_parameters(config={}), len(self.train_images), {} - -Here, after updating the parameters, we perform the training as in the -centralized case, and return the new parameters. +.. code-block:: shell + + { + "layers": [ + {"weight": mlx.core.array, "bias": mlx.core.array}, + {"weight": mlx.core.array, "bias": mlx.core.array}, + ..., + {"weight": mlx.core.array, "bias": mlx.core.array} + ] + } + +Therefore, to get our list of ``np.array`` objects, we need to extract each array and +convert them into a NumPy array: + +.. code-block:: python + + def get_params(model): + layers = model.parameters()["layers"] + return [np.array(val) for layer in layers for _, val in layer.items()] + +For the ``set_params()`` function, we perform the reverse operation. We receive a list +of NumPy arrays and want to convert them into MLX parameters. Therefore, we iterate +through pairs of parameters and assign them to the `weight` and `bias` keys of each +layer dict: + +.. code-block:: python + + def set_params(model, parameters): + new_params = {} + new_params["layers"] = [ + {"weight": mx.array(parameters[i]), "bias": mx.array(parameters[i + 1])} + for i in range(0, len(parameters), 2) + ] + model.update(new_params) + +The rest of the functionality is directly inspired by the centralized case. The +``fit()`` method in the client trains the model using the local dataset: + +.. code-block:: python + + def fit(self, parameters, config): + self.set_parameters(parameters) + for _ in range(self.num_epochs): + for X, y in batch_iterate( + self.batch_size, self.train_images, self.train_labels + ): + _, grads = self.loss_and_grad_fn(self.model, X, y) + self.optimizer.update(self.model, grads) + mx.eval(self.model.parameters(), self.optimizer.state) + return self.get_parameters(config={}), len(self.train_images), {} + +Here, after updating the parameters, we perform the training as in the centralized case, +and return the new parameters. And for the ``evaluate()`` method of the client: -.. code:: python +.. code-block:: python - def evaluate(self, parameters, config): - self.set_parameters(parameters) - accuracy = eval_fn(self.model, self.test_images, self.test_labels) - loss = loss_fn(self.model, self.test_images, self.test_labels) - return loss.item(), len(self.test_images), {"accuracy": accuracy.item()} + def evaluate(self, parameters, config): + self.set_parameters(parameters) + accuracy = eval_fn(self.model, self.test_images, self.test_labels) + loss = loss_fn(self.model, self.test_images, self.test_labels) + return loss.item(), len(self.test_images), {"accuracy": accuracy.item()} -We also begin by updating the parameters with the ones sent by the -server, and then we compute the loss and accuracy using the functions -defined above. In the constructor of the ``FlowerClient`` we instantiate -the `MLP` model as well as other components such as the optimizer. +We also begin by updating the parameters with the ones sent by the server, and then we +compute the loss and accuracy using the functions defined above. In the constructor of +the ``FlowerClient`` we instantiate the `MLP` model as well as other components such as +the optimizer. Putting everything together we have: -.. code:: python - - class FlowerClient(NumPyClient): - def __init__( - self, - data, - num_layers, - hidden_dim, - num_classes, - batch_size, - learning_rate, - num_epochs, - ): - self.num_layers = num_layers - self.hidden_dim = hidden_dim - self.num_classes = num_classes - self.batch_size = batch_size - self.learning_rate = learning_rate - self.num_epochs = num_epochs - - self.train_images, self.train_labels, self.test_images, self.test_labels = data - self.model = MLP( - num_layers, self.train_images.shape[-1], hidden_dim, num_classes - ) - self.optimizer = optim.SGD(learning_rate=learning_rate) - self.loss_and_grad_fn = nn.value_and_grad(self.model, loss_fn) - self.num_epochs = num_epochs - self.batch_size = batch_size - - def get_parameters(self, config): - return get_params(self.model) - - def set_parameters(self, parameters): - set_params(self.model, parameters) - - def fit(self, parameters, config): - self.set_parameters(parameters) - for _ in range(self.num_epochs): - for X, y in batch_iterate( - self.batch_size, self.train_images, self.train_labels - ): - _, grads = self.loss_and_grad_fn(self.model, X, y) - self.optimizer.update(self.model, grads) - mx.eval(self.model.parameters(), self.optimizer.state) - return self.get_parameters(config={}), len(self.train_images), {} - - def evaluate(self, parameters, config): - self.set_parameters(parameters) - accuracy = eval_fn(self.model, self.test_images, self.test_labels) - loss = loss_fn(self.model, self.test_images, self.test_labels) - return loss.item(), len(self.test_images), {"accuracy": accuracy.item()} - -Finally, we can construct a ``ClientApp`` using the ``FlowerClient`` -defined above by means of a ``client_fn()`` callback. Note that -``context`` enables you to get access to hyperparemeters defined in -``pyproject.toml`` to configure the run. In this tutorial we access, -among other hyperparameters, the ``local-epochs`` setting to control the -number of epochs a ``ClientApp`` will perform when running the ``fit()`` -method. - -.. code:: python - - def client_fn(context: Context): - partition_id = context.node_config["partition-id"] - num_partitions = context.node_config["num-partitions"] - data = load_data(partition_id, num_partitions) - - num_layers = context.run_config["num-layers"] - hidden_dim = context.run_config["hidden-dim"] - num_classes = 10 - batch_size = context.run_config["batch-size"] - learning_rate = context.run_config["lr"] - num_epochs = context.run_config["local-epochs"] - - # Return Client instance - return FlowerClient( - data, num_layers, hidden_dim, num_classes, batch_size, learning_rate, num_epochs - ).to_client() - - - # Flower ClientApp - app = ClientApp(client_fn) +.. code-block:: python + + class FlowerClient(NumPyClient): + def __init__( + self, + data, + num_layers, + hidden_dim, + num_classes, + batch_size, + learning_rate, + num_epochs, + ): + self.num_layers = num_layers + self.hidden_dim = hidden_dim + self.num_classes = num_classes + self.batch_size = batch_size + self.learning_rate = learning_rate + self.num_epochs = num_epochs + + self.train_images, self.train_labels, self.test_images, self.test_labels = data + self.model = MLP( + num_layers, self.train_images.shape[-1], hidden_dim, num_classes + ) + self.optimizer = optim.SGD(learning_rate=learning_rate) + self.loss_and_grad_fn = nn.value_and_grad(self.model, loss_fn) + self.num_epochs = num_epochs + self.batch_size = batch_size + + def get_parameters(self, config): + return get_params(self.model) + + def set_parameters(self, parameters): + set_params(self.model, parameters) + + def fit(self, parameters, config): + self.set_parameters(parameters) + for _ in range(self.num_epochs): + for X, y in batch_iterate( + self.batch_size, self.train_images, self.train_labels + ): + _, grads = self.loss_and_grad_fn(self.model, X, y) + self.optimizer.update(self.model, grads) + mx.eval(self.model.parameters(), self.optimizer.state) + return self.get_parameters(config={}), len(self.train_images), {} + + def evaluate(self, parameters, config): + self.set_parameters(parameters) + accuracy = eval_fn(self.model, self.test_images, self.test_labels) + loss = loss_fn(self.model, self.test_images, self.test_labels) + return loss.item(), len(self.test_images), {"accuracy": accuracy.item()} + +Finally, we can construct a ``ClientApp`` using the ``FlowerClient`` defined above by +means of a ``client_fn()`` callback. Note that ``context`` enables you to get access to +hyperparemeters defined in ``pyproject.toml`` to configure the run. In this tutorial we +access, among other hyperparameters, the ``local-epochs`` setting to control the number +of epochs a ``ClientApp`` will perform when running the ``fit()`` method. + +.. code-block:: python + + def client_fn(context: Context): + partition_id = context.node_config["partition-id"] + num_partitions = context.node_config["num-partitions"] + data = load_data(partition_id, num_partitions) + + num_layers = context.run_config["num-layers"] + hidden_dim = context.run_config["hidden-dim"] + num_classes = 10 + batch_size = context.run_config["batch-size"] + learning_rate = context.run_config["lr"] + num_epochs = context.run_config["local-epochs"] + + # Return Client instance + return FlowerClient( + data, num_layers, hidden_dim, num_classes, batch_size, learning_rate, num_epochs + ).to_client() + + + # Flower ClientApp + app = ClientApp(client_fn) The ServerApp -------------- ++++++++++++++ -To construct a ``ServerApp``, we define a ``server_fn()`` callback with -an identical signature to that of ``client_fn()``, but the return type -is `ServerAppComponents +To construct a ``ServerApp``, we define a ``server_fn()`` callback with an identical +signature to that of ``client_fn()``, but the return type is `ServerAppComponents `_ as opposed to `Client -`_. -In this example we use the ``FedAvg`` strategy. +`_. In this +example we use the ``FedAvg`` strategy. -.. code:: python +.. code-block:: python - def server_fn(context: Context): - # Read from config - num_rounds = context.run_config["num-server-rounds"] + def server_fn(context: Context): + # Read from config + num_rounds = context.run_config["num-server-rounds"] - # Define strategy - strategy = FedAvg() - config = ServerConfig(num_rounds=num_rounds) + # Define strategy + strategy = FedAvg() + config = ServerConfig(num_rounds=num_rounds) - return ServerAppComponents(strategy=strategy, config=config) + return ServerAppComponents(strategy=strategy, config=config) - # Create ServerApp - app = ServerApp(server_fn=server_fn) + # Create ServerApp + app = ServerApp(server_fn=server_fn) -Congratulations! You've successfully built and run your first federated -learning system. +Congratulations! You've successfully built and run your first federated learning system. .. note:: - Check the `source code - `_ - of the extended version of this tutorial in - ``examples/quickstart-mlx`` in the Flower GitHub repository. + Check the `source code + `_ of the extended + version of this tutorial in ``examples/quickstart-mlx`` in the Flower GitHub + repository. diff --git a/doc/source/tutorial-quickstart-pandas.rst b/doc/source/tutorial-quickstart-pandas.rst index bb9cb1b28b54..00d831a15736 100644 --- a/doc/source/tutorial-quickstart-pandas.rst +++ b/doc/source/tutorial-quickstart-pandas.rst @@ -1,12 +1,12 @@ .. _quickstart-pandas: - Quickstart Pandas ================= .. meta:: - :description: Check out this Federated Learning quickstart tutorial for using Flower with Pandas to perform Federated Analytics. + :description: Check out this Federated Learning quickstart tutorial for using Flower with Pandas to perform Federated Analytics. Let's build a federated analytics system using Pandas and Flower! -Please refer to the `full code example `_ to learn more. +Please refer to the `full code example +`_ to learn more. diff --git a/doc/source/tutorial-quickstart-pytorch-lightning.rst b/doc/source/tutorial-quickstart-pytorch-lightning.rst index 7c74c9a1682f..089865a2969d 100644 --- a/doc/source/tutorial-quickstart-pytorch-lightning.rst +++ b/doc/source/tutorial-quickstart-pytorch-lightning.rst @@ -1,119 +1,118 @@ .. _quickstart-pytorch-lightning: -############################## - Quickstart PyTorch Lightning -############################## +Quickstart PyTorch Lightning +============================ -In this federated learning tutorial we will learn how to train an -AutoEncoder model on MNIST using Flower and PyTorch Lightning. It is -recommended to create a virtual environment and run everything within a -:doc:`virtualenv `. +In this federated learning tutorial we will learn how to train an AutoEncoder model on +MNIST using Flower and PyTorch Lightning. It is recommended to create a virtual +environment and run everything within a :doc:`virtualenv +`. Then, clone the code example directly from GitHub: -.. code:: shell +.. code-block:: shell - git clone --depth=1 https://github.com/adap/flower.git _tmp \ - && mv _tmp/examples/quickstart-pytorch-lightning . \ - && rm -rf _tmp && cd quickstart-pytorch-lightning + git clone --depth=1 https://github.com/adap/flower.git _tmp \ + && mv _tmp/examples/quickstart-pytorch-lightning . \ + && rm -rf _tmp && cd quickstart-pytorch-lightning -This will create a new directory called `quickstart-pytorch-lightning` -containing the following files: +This will create a new directory called `quickstart-pytorch-lightning` containing the +following files: -.. code:: shell +.. code-block:: shell - quickstart-pytorch-lightning - ├── pytorchlightning_example - │ ├── client_app.py # Defines your ClientApp - │ ├── server_app.py # Defines your ServerApp - │ └── task.py # Defines your model, training and data loading - ├── pyproject.toml # Project metadata like dependencies and configs - └── README.md + quickstart-pytorch-lightning + ├── pytorchlightning_example + │ ├── client_app.py # Defines your ClientApp + │ ├── server_app.py # Defines your ServerApp + │ └── task.py # Defines your model, training and data loading + ├── pyproject.toml # Project metadata like dependencies and configs + └── README.md Next, activate your environment, then run: -.. code:: shell +.. code-block:: shell - # Navigate to the example directory - $ cd path/to/quickstart-pytorch-lightning + # Navigate to the example directory + $ cd path/to/quickstart-pytorch-lightning - # Install project and dependencies - $ pip install -e . + # Install project and dependencies + $ pip install -e . -By default, Flower Simulation Engine will be started and it will create -a federation of 4 nodes using `FedAvg +By default, Flower Simulation Engine will be started and it will create a federation of +4 nodes using `FedAvg `_ -as the aggregation strategy. The dataset will be partitioned using -Flower Dataset's `IidPartitioner +as the aggregation strategy. The dataset will be partitioned using Flower Dataset's +`IidPartitioner `_. To run the project, do: -.. code:: shell +.. code-block:: shell - # Run with default arguments - $ flwr run . + # Run with default arguments + $ flwr run . With default arguments you will see an output like this one: -.. code:: shell - - Loading project configuration... - Success - INFO : Starting Flower ServerApp, config: num_rounds=3, no round_timeout - INFO : - INFO : [INIT] - INFO : Using initial global parameters provided by strategy - INFO : Starting evaluation of initial global parameters - INFO : Evaluation returned no results (`None`) - INFO : - INFO : [ROUND 1] - INFO : configure_fit: strategy sampled 2 clients (out of 4) - INFO : aggregate_evaluate: received 2 results and 0 failures - WARNING : No evaluate_metrics_aggregation_fn provided - INFO : - INFO : [ROUND 2] - INFO : configure_fit: strategy sampled 2 clients (out of 4) - INFO : aggregate_fit: received 2 results and 0 failures - INFO : configure_evaluate: strategy sampled 2 clients (out of 4) - INFO : aggregate_evaluate: received 2 results and 0 failures - INFO : - INFO : [ROUND 3] - INFO : configure_fit: strategy sampled 2 clients (out of 4) - INFO : aggregate_fit: received 2 results and 0 failures - INFO : configure_evaluate: strategy sampled 2 clients (out of 4) - INFO : aggregate_evaluate: received 2 results and 0 failures - INFO : - INFO : [SUMMARY] - INFO : Run finished 3 round(s) in 136.92s - INFO : History (loss, distributed): - INFO : round 1: 0.04982871934771538 - INFO : round 2: 0.046457378193736076 - INFO : round 3: 0.04506748169660568 - INFO : - -Each simulated `ClientApp` (two per round) will also log a summary of -their local training process. Expect this output to be similar to: - -.. code:: shell - - # The left part indicates the process ID running the `ClientApp` - (ClientAppActor pid=38155) ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - (ClientAppActor pid=38155) ┃ Test metric ┃ DataLoader 0 ┃ - (ClientAppActor pid=38155) ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ - (ClientAppActor pid=38155) │ test_loss │ 0.045175597071647644 │ - (ClientAppActor pid=38155) └───────────────────────────┴───────────────────────────┘ - -You can also override the parameters defined in the -``[tool.flwr.app.config]`` section in ``pyproject.toml`` like this: - -.. code:: shell - - # Override some arguments - $ flwr run . --run-config num-server-rounds=5 +.. code-block:: shell + + Loading project configuration... + Success + INFO : Starting Flower ServerApp, config: num_rounds=3, no round_timeout + INFO : + INFO : [INIT] + INFO : Using initial global parameters provided by strategy + INFO : Starting evaluation of initial global parameters + INFO : Evaluation returned no results (`None`) + INFO : + INFO : [ROUND 1] + INFO : configure_fit: strategy sampled 2 clients (out of 4) + INFO : aggregate_evaluate: received 2 results and 0 failures + WARNING : No evaluate_metrics_aggregation_fn provided + INFO : + INFO : [ROUND 2] + INFO : configure_fit: strategy sampled 2 clients (out of 4) + INFO : aggregate_fit: received 2 results and 0 failures + INFO : configure_evaluate: strategy sampled 2 clients (out of 4) + INFO : aggregate_evaluate: received 2 results and 0 failures + INFO : + INFO : [ROUND 3] + INFO : configure_fit: strategy sampled 2 clients (out of 4) + INFO : aggregate_fit: received 2 results and 0 failures + INFO : configure_evaluate: strategy sampled 2 clients (out of 4) + INFO : aggregate_evaluate: received 2 results and 0 failures + INFO : + INFO : [SUMMARY] + INFO : Run finished 3 round(s) in 136.92s + INFO : History (loss, distributed): + INFO : round 1: 0.04982871934771538 + INFO : round 2: 0.046457378193736076 + INFO : round 3: 0.04506748169660568 + INFO : + +Each simulated `ClientApp` (two per round) will also log a summary of their local +training process. Expect this output to be similar to: + +.. code-block:: shell + + # The left part indicates the process ID running the `ClientApp` + (ClientAppActor pid=38155) ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + (ClientAppActor pid=38155) ┃ Test metric ┃ DataLoader 0 ┃ + (ClientAppActor pid=38155) ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + (ClientAppActor pid=38155) │ test_loss │ 0.045175597071647644 │ + (ClientAppActor pid=38155) └───────────────────────────┴───────────────────────────┘ + +You can also override the parameters defined in the ``[tool.flwr.app.config]`` section +in ``pyproject.toml`` like this: + +.. code-block:: shell + + # Override some arguments + $ flwr run . --run-config num-server-rounds=5 .. note:: - Check the `source code - `_ - of this tutorial in ``examples/quickstart-pytorch-lightning`` in the - Flower GitHub repository. + Check the `source code + `_ + of this tutorial in ``examples/quickstart-pytorch-lightning`` in the Flower GitHub + repository. diff --git a/doc/source/tutorial-quickstart-pytorch.rst b/doc/source/tutorial-quickstart-pytorch.rst index d00b9efbe16b..6b99e378d086 100644 --- a/doc/source/tutorial-quickstart-pytorch.rst +++ b/doc/source/tutorial-quickstart-pytorch.rst @@ -1,384 +1,366 @@ .. _quickstart-pytorch: -#################### - Quickstart PyTorch -#################### - -In this federated learning tutorial we will learn how to train a -Convolutional Neural Network on CIFAR-10 using Flower and PyTorch. It is -recommended to create a virtual environment and run everything within a -:doc:`virtualenv `. - -Let's use `flwr new` to create a complete Flower+PyTorch project. It -will generate all the files needed to run, by default with the Flower -Simulation Engine, a federation of 10 nodes using `FedAvg +Quickstart PyTorch +================== + +In this federated learning tutorial we will learn how to train a Convolutional Neural +Network on CIFAR-10 using Flower and PyTorch. It is recommended to create a virtual +environment and run everything within a :doc:`virtualenv +`. + +Let's use `flwr new` to create a complete Flower+PyTorch project. It will generate all +the files needed to run, by default with the Flower Simulation Engine, a federation of +10 nodes using `FedAvg `_. The dataset will be partitioned using Flower Dataset's `IidPartitioner `_. -Now that we have a rough idea of what this example is about, let's get -started. First, install Flower in your new environment: +Now that we have a rough idea of what this example is about, let's get started. First, +install Flower in your new environment: -.. code:: shell +.. code-block:: shell - # In a new Python environment - $ pip install flwr + # In a new Python environment + $ pip install flwr -Then, run the command below. You will be prompted to select one of the -available templates (choose ``PyTorch``), give a name to your project, -and type in your developer name: +Then, run the command below. You will be prompted to select one of the available +templates (choose ``PyTorch``), give a name to your project, and type in your developer +name: -.. code:: shell +.. code-block:: shell - $ flwr new + $ flwr new -After running it you'll notice a new directory with your project name -has been created. It should have the following structure: +After running it you'll notice a new directory with your project name has been created. +It should have the following structure: -.. code:: shell +.. code-block:: shell - - ├── - │ ├── __init__.py - │ ├── client_app.py # Defines your ClientApp - │ ├── server_app.py # Defines your ServerApp - │ └── task.py # Defines your model, training and data loading - ├── pyproject.toml # Project metadata like dependencies and configs - └── README.md + + ├── + │ ├── __init__.py + │ ├── client_app.py # Defines your ClientApp + │ ├── server_app.py # Defines your ServerApp + │ └── task.py # Defines your model, training and data loading + ├── pyproject.toml # Project metadata like dependencies and configs + └── README.md -If you haven't yet installed the project and its dependencies, you can -do so by: +If you haven't yet installed the project and its dependencies, you can do so by: -.. code:: shell +.. code-block:: shell - # From the directory where your pyproject.toml is - $ pip install -e . + # From the directory where your pyproject.toml is + $ pip install -e . To run the project, do: -.. code:: shell +.. code-block:: shell - # Run with default arguments - $ flwr run . + # Run with default arguments + $ flwr run . With default arguments you will see an output like this one: -.. code:: shell - - Loading project configuration... - Success - WARNING : FAB ID is not provided; the default ClientApp will be loaded. - INFO : Starting Flower ServerApp, config: num_rounds=3, no round_timeout - INFO : - INFO : [INIT] - INFO : Using initial global parameters provided by strategy - INFO : Evaluating initial global parameters - INFO : - INFO : [ROUND 1] - INFO : configure_fit: strategy sampled 5 clients (out of 10) - INFO : aggregate_fit: received 5 results and 0 failures - WARNING : No fit_metrics_aggregation_fn provided - INFO : configure_evaluate: strategy sampled 10 clients (out of 10) - INFO : aggregate_evaluate: received 10 results and 0 failures - WARNING : No evaluate_metrics_aggregation_fn provided - INFO : - INFO : [ROUND 2] - INFO : configure_fit: strategy sampled 5 clients (out of 10) - INFO : aggregate_fit: received 5 results and 0 failures - INFO : configure_evaluate: strategy sampled 10 clients (out of 10) - INFO : aggregate_evaluate: received 10 results and 0 failures - INFO : - INFO : [ROUND 3] - INFO : configure_fit: strategy sampled 5 clients (out of 10) - INFO : aggregate_fit: received 5 results and 0 failures - INFO : configure_evaluate: strategy sampled 10 clients (out of 10) - INFO : aggregate_evaluate: received 10 results and 0 failures - INFO : - INFO : [SUMMARY] - INFO : Run finished 3 round(s) in 21.35s - INFO : History (loss, distributed): - INFO : round 1: 2.2978184528648855 - INFO : round 2: 2.173852103948593 - INFO : round 3: 2.039920600131154 - INFO : - -You can also override the parameters defined in the -``[tool.flwr.app.config]`` section in ``pyproject.toml`` like this: - -.. code:: shell - - # Override some arguments - $ flwr run . --run-config "num-server-rounds=5 local-epochs=3" - -What follows is an explanation of each component in the project you just -created: dataset partition, the model, defining the ``ClientApp`` and -defining the ``ServerApp``. - -********** - The Data -********** - -This tutorial uses `Flower Datasets `_ -to easily download and partition the `CIFAR-10` dataset. In this example -you'll make use of the `IidPartitioner +.. code-block:: shell + + Loading project configuration... + Success + WARNING : FAB ID is not provided; the default ClientApp will be loaded. + INFO : Starting Flower ServerApp, config: num_rounds=3, no round_timeout + INFO : + INFO : [INIT] + INFO : Using initial global parameters provided by strategy + INFO : Evaluating initial global parameters + INFO : + INFO : [ROUND 1] + INFO : configure_fit: strategy sampled 5 clients (out of 10) + INFO : aggregate_fit: received 5 results and 0 failures + WARNING : No fit_metrics_aggregation_fn provided + INFO : configure_evaluate: strategy sampled 10 clients (out of 10) + INFO : aggregate_evaluate: received 10 results and 0 failures + WARNING : No evaluate_metrics_aggregation_fn provided + INFO : + INFO : [ROUND 2] + INFO : configure_fit: strategy sampled 5 clients (out of 10) + INFO : aggregate_fit: received 5 results and 0 failures + INFO : configure_evaluate: strategy sampled 10 clients (out of 10) + INFO : aggregate_evaluate: received 10 results and 0 failures + INFO : + INFO : [ROUND 3] + INFO : configure_fit: strategy sampled 5 clients (out of 10) + INFO : aggregate_fit: received 5 results and 0 failures + INFO : configure_evaluate: strategy sampled 10 clients (out of 10) + INFO : aggregate_evaluate: received 10 results and 0 failures + INFO : + INFO : [SUMMARY] + INFO : Run finished 3 round(s) in 21.35s + INFO : History (loss, distributed): + INFO : round 1: 2.2978184528648855 + INFO : round 2: 2.173852103948593 + INFO : round 3: 2.039920600131154 + INFO : + +You can also override the parameters defined in the ``[tool.flwr.app.config]`` section +in ``pyproject.toml`` like this: + +.. code-block:: shell + + # Override some arguments + $ flwr run . --run-config "num-server-rounds=5 local-epochs=3" + +What follows is an explanation of each component in the project you just created: +dataset partition, the model, defining the ``ClientApp`` and defining the ``ServerApp``. + +The Data +-------- + +This tutorial uses `Flower Datasets `_ to easily +download and partition the `CIFAR-10` dataset. In this example you'll make use of the +`IidPartitioner `_ -to generate `num_partitions` partitions. You can choose `other -partitioners -`_ -available in Flower Datasets. Each ``ClientApp`` will call this function -to create dataloaders with the data that correspond to their data -partition. - -.. code:: python - - partitioner = IidPartitioner(num_partitions=num_partitions) - fds = FederatedDataset( - dataset="uoft-cs/cifar10", - partitioners={"train": partitioner}, - ) - partition = fds.load_partition(partition_id) - # Divide data on each node: 80% train, 20% test - partition_train_test = partition.train_test_split(test_size=0.2, seed=42) - pytorch_transforms = Compose([ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) - - - def apply_transforms(batch): - """Apply transforms to the partition from FederatedDataset.""" - batch["img"] = [pytorch_transforms(img) for img in batch["img"]] - return batch - - - partition_train_test = partition_train_test.with_transform(apply_transforms) - trainloader = DataLoader(partition_train_test["train"], batch_size=32, shuffle=True) - testloader = DataLoader(partition_train_test["test"], batch_size=32) - -*********** - The Model -*********** - -We defined a simple Convolutional Neural Network (CNN), but feel free to -replace it with a more sophisticated model if you'd like: - -.. code:: python - - class Net(nn.Module): - """Model (simple CNN adapted from 'PyTorch: A 60 Minute Blitz')""" - - def __init__(self): - super(Net, self).__init__() - self.conv1 = nn.Conv2d(3, 6, 5) - self.pool = nn.MaxPool2d(2, 2) - self.conv2 = nn.Conv2d(6, 16, 5) - self.fc1 = nn.Linear(16 * 5 * 5, 120) - self.fc2 = nn.Linear(120, 84) - self.fc3 = nn.Linear(84, 10) - - def forward(self, x): - x = self.pool(F.relu(self.conv1(x))) - x = self.pool(F.relu(self.conv2(x))) - x = x.view(-1, 16 * 5 * 5) - x = F.relu(self.fc1(x)) - x = F.relu(self.fc2(x)) - return self.fc3(x) - -In addition to defining the model architecture, we also include two -utility functions to perform both training (i.e. ``train()``) and -evaluation (i.e. ``test()``) using the above model. These functions -should look fairly familiar if you have some prior experience with -PyTorch. Note these functions do not have anything specific to Flower. -That being said, the training function will normally be called, as we'll -see later, from a Flower client passing its own data. In summary, your -clients can use standard training/testing functions to perform local -training or evaluation: - -.. code:: python - - def train(net, trainloader, epochs, device): - """Train the model on the training set.""" - net.to(device) # move model to GPU if available - criterion = torch.nn.CrossEntropyLoss().to(device) - optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9) - net.train() - running_loss = 0.0 - for _ in range(epochs): - for batch in trainloader: - images = batch["img"] - labels = batch["label"] - optimizer.zero_grad() - loss = criterion(net(images.to(device)), labels.to(device)) - loss.backward() - optimizer.step() - running_loss += loss.item() - - avg_trainloss = running_loss / len(trainloader) - return avg_trainloss - - - def test(net, testloader, device): - """Validate the model on the test set.""" - net.to(device) - criterion = torch.nn.CrossEntropyLoss() - correct, loss = 0, 0.0 - with torch.no_grad(): - for batch in testloader: - images = batch["img"].to(device) - labels = batch["label"].to(device) - outputs = net(images) - loss += criterion(outputs, labels).item() - correct += (torch.max(outputs.data, 1)[1] == labels).sum().item() - accuracy = correct / len(testloader.dataset) - return loss, accuracy - -*************** - The ClientApp -*************** - -The main changes we have to make to use `PyTorch` with `Flower` will be -found in the ``get_weights()`` and ``set_weights()`` functions. In -``get_weights()`` PyTorch model parameters are extracted and represented -as a list of NumPy arrays. The ``set_weights()`` function that's the -oposite: given a list of NumPy arrays it applies them to an existing -PyTorch model. Doing this in fairly easy in PyTorch. +to generate `num_partitions` partitions. You can choose `other partitioners +`_ available in +Flower Datasets. Each ``ClientApp`` will call this function to create dataloaders with +the data that correspond to their data partition. + +.. code-block:: python + + partitioner = IidPartitioner(num_partitions=num_partitions) + fds = FederatedDataset( + dataset="uoft-cs/cifar10", + partitioners={"train": partitioner}, + ) + partition = fds.load_partition(partition_id) + # Divide data on each node: 80% train, 20% test + partition_train_test = partition.train_test_split(test_size=0.2, seed=42) + pytorch_transforms = Compose([ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + + + def apply_transforms(batch): + """Apply transforms to the partition from FederatedDataset.""" + batch["img"] = [pytorch_transforms(img) for img in batch["img"]] + return batch + + + partition_train_test = partition_train_test.with_transform(apply_transforms) + trainloader = DataLoader(partition_train_test["train"], batch_size=32, shuffle=True) + testloader = DataLoader(partition_train_test["test"], batch_size=32) + +The Model +--------- + +We defined a simple Convolutional Neural Network (CNN), but feel free to replace it with +a more sophisticated model if you'd like: + +.. code-block:: python + + class Net(nn.Module): + """Model (simple CNN adapted from 'PyTorch: A 60 Minute Blitz')""" + + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + + def forward(self, x): + x = self.pool(F.relu(self.conv1(x))) + x = self.pool(F.relu(self.conv2(x))) + x = x.view(-1, 16 * 5 * 5) + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + return self.fc3(x) + +In addition to defining the model architecture, we also include two utility functions to +perform both training (i.e. ``train()``) and evaluation (i.e. ``test()``) using the +above model. These functions should look fairly familiar if you have some prior +experience with PyTorch. Note these functions do not have anything specific to Flower. +That being said, the training function will normally be called, as we'll see later, from +a Flower client passing its own data. In summary, your clients can use standard +training/testing functions to perform local training or evaluation: + +.. code-block:: python + + def train(net, trainloader, epochs, device): + """Train the model on the training set.""" + net.to(device) # move model to GPU if available + criterion = torch.nn.CrossEntropyLoss().to(device) + optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9) + net.train() + running_loss = 0.0 + for _ in range(epochs): + for batch in trainloader: + images = batch["img"] + labels = batch["label"] + optimizer.zero_grad() + loss = criterion(net(images.to(device)), labels.to(device)) + loss.backward() + optimizer.step() + running_loss += loss.item() + + avg_trainloss = running_loss / len(trainloader) + return avg_trainloss + + + def test(net, testloader, device): + """Validate the model on the test set.""" + net.to(device) + criterion = torch.nn.CrossEntropyLoss() + correct, loss = 0, 0.0 + with torch.no_grad(): + for batch in testloader: + images = batch["img"].to(device) + labels = batch["label"].to(device) + outputs = net(images) + loss += criterion(outputs, labels).item() + correct += (torch.max(outputs.data, 1)[1] == labels).sum().item() + accuracy = correct / len(testloader.dataset) + return loss, accuracy + +The ClientApp +------------- + +The main changes we have to make to use `PyTorch` with `Flower` will be found in the +``get_weights()`` and ``set_weights()`` functions. In ``get_weights()`` PyTorch model +parameters are extracted and represented as a list of NumPy arrays. The +``set_weights()`` function that's the oposite: given a list of NumPy arrays it applies +them to an existing PyTorch model. Doing this in fairly easy in PyTorch. .. note:: - The specific implementation of ``get_weights()`` and - ``set_weights()`` depends on the type of models you use. The ones - shown below work for a wide range of PyTorch models but you might - need to adjust them if you have more exotic model architectures. - -.. code:: python - - def get_weights(net): - return [val.cpu().numpy() for _, val in net.state_dict().items()] - - - def set_weights(net, parameters): - params_dict = zip(net.state_dict().keys(), parameters) - state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict}) - net.load_state_dict(state_dict, strict=True) - -The rest of the functionality is directly inspired by the centralized -case. The ``fit()`` method in the client trains the model using the -local dataset. Similarly, the ``evaluate()`` method is used to evaluate -the model received on a held-out validation set that the client might -have: - -.. code:: python - - class FlowerClient(NumPyClient): - def __init__(self, net, trainloader, valloader, local_epochs): - self.net = net - self.trainloader = trainloader - self.valloader = valloader - self.local_epochs = local_epochs - self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - self.net.to(device) - - def fit(self, parameters, config): - set_weights(self.net, parameters) - results = train( - self.net, - self.trainloader, - self.valloader, - self.local_epochs, - self.device, - ) - return get_weights(self.net), len(self.trainloader.dataset), results - - def evaluate(self, parameters, config): - set_weights(self.net, parameters) - loss, accuracy = test(self.net, self.valloader, self.device) - return loss, len(self.valloader.dataset), {"accuracy": accuracy} - -Finally, we can construct a ``ClientApp`` using the ``FlowerClient`` -defined above by means of a ``client_fn()`` callback. Note that the -`context` enables you to get access to hyperparemeters defined in your -``pyproject.toml`` to configure the run. In this tutorial we access the -`local-epochs` setting to control the number of epochs a ``ClientApp`` -will perform when running the ``fit()`` method. You could define + The specific implementation of ``get_weights()`` and ``set_weights()`` depends on + the type of models you use. The ones shown below work for a wide range of PyTorch + models but you might need to adjust them if you have more exotic model + architectures. + +.. code-block:: python + + def get_weights(net): + return [val.cpu().numpy() for _, val in net.state_dict().items()] + + + def set_weights(net, parameters): + params_dict = zip(net.state_dict().keys(), parameters) + state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict}) + net.load_state_dict(state_dict, strict=True) + +The rest of the functionality is directly inspired by the centralized case. The +``fit()`` method in the client trains the model using the local dataset. Similarly, the +``evaluate()`` method is used to evaluate the model received on a held-out validation +set that the client might have: + +.. code-block:: python + + class FlowerClient(NumPyClient): + def __init__(self, net, trainloader, valloader, local_epochs): + self.net = net + self.trainloader = trainloader + self.valloader = valloader + self.local_epochs = local_epochs + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self.net.to(device) + + def fit(self, parameters, config): + set_weights(self.net, parameters) + results = train( + self.net, + self.trainloader, + self.valloader, + self.local_epochs, + self.device, + ) + return get_weights(self.net), len(self.trainloader.dataset), results + + def evaluate(self, parameters, config): + set_weights(self.net, parameters) + loss, accuracy = test(self.net, self.valloader, self.device) + return loss, len(self.valloader.dataset), {"accuracy": accuracy} + +Finally, we can construct a ``ClientApp`` using the ``FlowerClient`` defined above by +means of a ``client_fn()`` callback. Note that the `context` enables you to get access +to hyperparemeters defined in your ``pyproject.toml`` to configure the run. In this +tutorial we access the `local-epochs` setting to control the number of epochs a +``ClientApp`` will perform when running the ``fit()`` method. You could define additioinal hyperparameters in ``pyproject.toml`` and access them here. -.. code:: python +.. code-block:: python - def client_fn(context: Context): - # Load model and data - net = Net() - partition_id = context.node_config["partition-id"] - num_partitions = context.node_config["num-partitions"] - trainloader, valloader = load_data(partition_id, num_partitions) - local_epochs = context.run_config["local-epochs"] + def client_fn(context: Context): + # Load model and data + net = Net() + partition_id = context.node_config["partition-id"] + num_partitions = context.node_config["num-partitions"] + trainloader, valloader = load_data(partition_id, num_partitions) + local_epochs = context.run_config["local-epochs"] - # Return Client instance - return FlowerClient(net, trainloader, valloader, local_epochs).to_client() + # Return Client instance + return FlowerClient(net, trainloader, valloader, local_epochs).to_client() - # Flower ClientApp - app = ClientApp(client_fn) + # Flower ClientApp + app = ClientApp(client_fn) -*************** - The ServerApp -*************** +The ServerApp +------------- -To construct a ``ServerApp`` we define a ``server_fn()`` callback with -an identical signature to that of ``client_fn()`` but the return type is -`ServerAppComponents +To construct a ``ServerApp`` we define a ``server_fn()`` callback with an identical +signature to that of ``client_fn()`` but the return type is `ServerAppComponents `_ as opposed to a `Client -`_. -In this example we use the `FedAvg`. To it we pass a randomly -initialized model that will server as the global model to federated. -Note that the value of ``fraction_fit`` is read from the run config. You -can find the default value defined in the ``pyproject.toml``. +`_. In this +example we use the `FedAvg`. To it we pass a randomly initialized model that will server +as the global model to federated. Note that the value of ``fraction_fit`` is read from +the run config. You can find the default value defined in the ``pyproject.toml``. -.. code:: python +.. code-block:: python - def server_fn(context: Context): - # Read from config - num_rounds = context.run_config["num-server-rounds"] - fraction_fit = context.run_config["fraction-fit"] + def server_fn(context: Context): + # Read from config + num_rounds = context.run_config["num-server-rounds"] + fraction_fit = context.run_config["fraction-fit"] - # Initialize model parameters - ndarrays = get_weights(Net()) - parameters = ndarrays_to_parameters(ndarrays) + # Initialize model parameters + ndarrays = get_weights(Net()) + parameters = ndarrays_to_parameters(ndarrays) - # Define strategy - strategy = FedAvg( - fraction_fit=fraction_fit, - fraction_evaluate=1.0, - min_available_clients=2, - initial_parameters=parameters, - ) - config = ServerConfig(num_rounds=num_rounds) + # Define strategy + strategy = FedAvg( + fraction_fit=fraction_fit, + fraction_evaluate=1.0, + min_available_clients=2, + initial_parameters=parameters, + ) + config = ServerConfig(num_rounds=num_rounds) - return ServerAppComponents(strategy=strategy, config=config) + return ServerAppComponents(strategy=strategy, config=config) - # Create ServerApp - app = ServerApp(server_fn=server_fn) + # Create ServerApp + app = ServerApp(server_fn=server_fn) -Congratulations! You've successfully built and run your first federated -learning system. +Congratulations! You've successfully built and run your first federated learning system. .. note:: - Check the `source code - `_ - of the extended version of this tutorial in - ``examples/quickstart-pytorch`` in the Flower GitHub repository. + Check the `source code + `_ of the + extended version of this tutorial in ``examples/quickstart-pytorch`` in the Flower + GitHub repository. -**************** - Video tutorial -**************** +Video tutorial +-------------- .. note:: - The video shown below shows how to setup a PyTorch + Flower project - using our previously recommended APIs. A new video tutorial will be - released that shows the new APIs (as the content above does) + The video shown below shows how to setup a PyTorch + Flower project using our + previously recommended APIs. A new video tutorial will be released that shows the + new APIs (as the content above does) .. meta:: - :description: Check out this Federated Learning quickstart tutorial for using Flower with PyTorch to train a CNN model on MNIST. + :description: Check out this Federated Learning quickstart tutorial for using Flower with PyTorch to train a CNN model on MNIST. .. youtube:: jOmmuzMIQ4c - :width: 100% + :width: 100% diff --git a/doc/source/tutorial-quickstart-scikitlearn.rst b/doc/source/tutorial-quickstart-scikitlearn.rst index fc3b58925c06..56bdf18cad17 100644 --- a/doc/source/tutorial-quickstart-scikitlearn.rst +++ b/doc/source/tutorial-quickstart-scikitlearn.rst @@ -1,77 +1,89 @@ .. _quickstart-scikitlearn: - Quickstart scikit-learn ======================= .. meta:: - :description: Check out this Federated Learning quickstart tutorial for using Flower with scikit-learn to train a linear regression model. + :description: Check out this Federated Learning quickstart tutorial for using Flower with scikit-learn to train a linear regression model. -In this tutorial, we will learn how to train a :code:`Logistic Regression` model on MNIST using Flower and scikit-learn. +In this tutorial, we will learn how to train a ``Logistic Regression`` model on MNIST +using Flower and scikit-learn. -It is recommended to create a virtual environment and run everything within this :doc:`virtualenv `. +It is recommended to create a virtual environment and run everything within this +:doc:`virtualenv `. Our example consists of one *server* and two *clients* all having the same model. -*Clients* are responsible for generating individual model parameter updates for the model based on their local datasets. -These updates are then sent to the *server* which will aggregate them to produce an updated global model. Finally, the *server* sends this improved version of the model back to each *client*. -A complete cycle of parameters updates is called a *round*. +*Clients* are responsible for generating individual model parameter updates for the +model based on their local datasets. These updates are then sent to the *server* which +will aggregate them to produce an updated global model. Finally, the *server* sends this +improved version of the model back to each *client*. A complete cycle of parameters +updates is called a *round*. -Now that we have a rough idea of what is going on, let's get started. We first need to install Flower. You can do this by running: +Now that we have a rough idea of what is going on, let's get started. We first need to +install Flower. You can do this by running: .. code-block:: shell - $ pip install flwr + $ pip install flwr Since we want to use scikit-learn, let's go ahead and install it: .. code-block:: shell - $ pip install scikit-learn + $ pip install scikit-learn Or simply install all dependencies using Poetry: .. code-block:: shell - $ poetry install - + $ poetry install Flower Client ------------- -Now that we have all our dependencies installed, let's run a simple distributed training with two clients and one server. -However, before setting up the client and server, we will define all functionalities that we need for our federated learning setup within :code:`utils.py`. The :code:`utils.py` contains different functions defining all the machine learning basics: +Now that we have all our dependencies installed, let's run a simple distributed training +with two clients and one server. However, before setting up the client and server, we +will define all functionalities that we need for our federated learning setup within +``utils.py``. The ``utils.py`` contains different functions defining all the machine +learning basics: -* :code:`get_model_parameters()` - * Returns the parameters of a :code:`sklearn` LogisticRegression model -* :code:`set_model_params()` - * Sets the parameters of a :code:`sklearn` LogisticRegression model -* :code:`set_initial_params()` - * Initializes the model parameters that the Flower server will ask for +- ``get_model_parameters()`` + - Returns the parameters of a ``sklearn`` LogisticRegression model +- ``set_model_params()`` + - Sets the parameters of a ``sklearn`` LogisticRegression model +- ``set_initial_params()`` + - Initializes the model parameters that the Flower server will ask for -Please check out :code:`utils.py` `here `_ for more details. -The pre-defined functions are used in the :code:`client.py` and imported. The :code:`client.py` also requires to import several packages such as Flower and scikit-learn: +Please check out ``utils.py`` `here +`_ for +more details. The pre-defined functions are used in the ``client.py`` and imported. The +``client.py`` also requires to import several packages such as Flower and scikit-learn: .. code-block:: python - import argparse - import warnings - - from sklearn.linear_model import LogisticRegression - from sklearn.metrics import log_loss - - import flwr as fl - import utils - from flwr_datasets import FederatedDataset + import argparse + import warnings + + from sklearn.linear_model import LogisticRegression + from sklearn.metrics import log_loss + + import flwr as fl + import utils + from flwr_datasets import FederatedDataset -Prior to local training, we need to load the MNIST dataset, a popular image classification dataset of handwritten digits for machine learning, and partition the dataset for FL. This can be conveniently achieved using `Flower Datasets `_. -The :code:`FederatedDataset.load_partition()` method loads the partitioned training set for each partition ID defined in the :code:`--partition-id` argument. +Prior to local training, we need to load the MNIST dataset, a popular image +classification dataset of handwritten digits for machine learning, and partition the +dataset for FL. This can be conveniently achieved using `Flower Datasets +`_. The ``FederatedDataset.load_partition()`` method +loads the partitioned training set for each partition ID defined in the +``--partition-id`` argument. .. code-block:: python if __name__ == "__main__": N_CLIENTS = 10 - + parser = argparse.ArgumentParser(description="Flower") parser.add_argument( "--partition-id", @@ -82,17 +94,17 @@ The :code:`FederatedDataset.load_partition()` method loads the partitioned train ) args = parser.parse_args() partition_id = args.partition_id - + fds = FederatedDataset(dataset="mnist", partitioners={"train": N_CLIENTS}) - + dataset = fds.load_partition(partition_id, "train").with_format("numpy") X, y = dataset["image"].reshape((len(dataset), -1)), dataset["label"] - + X_train, X_test = X[: int(0.8 * len(X))], X[int(0.8 * len(X)) :] y_train, y_test = y[: int(0.8 * len(y))], y[int(0.8 * len(y)) :] - -Next, the logistic regression model is defined and initialized with :code:`utils.set_initial_params()`. +Next, the logistic regression model is defined and initialized with +``utils.set_initial_params()``. .. code-block:: python @@ -104,28 +116,27 @@ Next, the logistic regression model is defined and initialized with :code:`utils utils.set_initial_params(model) -The Flower server interacts with clients through an interface called -:code:`Client`. When the server selects a particular client for training, it -sends training instructions over the network. The client receives those -instructions and calls one of the :code:`Client` methods to run your code -(i.e., to fit the logistic regression we defined earlier). - -Flower provides a convenience class called :code:`NumPyClient` which makes it -easier to implement the :code:`Client` interface when your workload uses scikit-learn. -Implementing :code:`NumPyClient` usually means defining the following methods -(:code:`set_parameters` is optional though): - -#. :code:`get_parameters` - * return the model weight as a list of NumPy ndarrays -#. :code:`set_parameters` (optional) - * update the local model weights with the parameters received from the server - * is directly imported with :code:`utils.set_model_params()` -#. :code:`fit` - * set the local model weights - * train the local model - * return the updated local model weights -#. :code:`evaluate` - * test the local model +The Flower server interacts with clients through an interface called ``Client``. When +the server selects a particular client for training, it sends training instructions over +the network. The client receives those instructions and calls one of the ``Client`` +methods to run your code (i.e., to fit the logistic regression we defined earlier). + +Flower provides a convenience class called ``NumPyClient`` which makes it easier to +implement the ``Client`` interface when your workload uses scikit-learn. Implementing +``NumPyClient`` usually means defining the following methods (``set_parameters`` is +optional though): + +1. ``get_parameters`` + - return the model weight as a list of NumPy ndarrays +2. ``set_parameters`` (optional) + - update the local model weights with the parameters received from the server + - is directly imported with ``utils.set_model_params()`` +3. ``fit`` + - set the local model weights + - train the local model + - return the updated local model weights +4. ``evaluate`` + - test the local model The methods can be implemented in the following way: @@ -149,27 +160,29 @@ The methods can be implemented in the following way: accuracy = model.score(X_test, y_test) return loss, len(X_test), {"accuracy": accuracy} - -We can now create an instance of our class :code:`MnistClient` and add one line -to actually run this client: +We can now create an instance of our class ``MnistClient`` and add one line to actually +run this client: .. code-block:: python fl.client.start_client("0.0.0.0:8080", client=MnistClient().to_client()) -That's it for the client. We only have to implement :code:`Client` or -:code:`NumPyClient` and call :code:`fl.client.start_client()`. If you implement a client of type :code:`NumPyClient` you'll need to first call its :code:`to_client()` method. The string :code:`"0.0.0.0:8080"` tells the client which server to connect to. In our case we can run the server and the client on the same machine, therefore we use -:code:`"0.0.0.0:8080"`. If we run a truly federated workload with the server and -clients running on different machines, all that needs to change is the -:code:`server_address` we pass to the client. +That's it for the client. We only have to implement ``Client`` or ``NumPyClient`` and +call ``fl.client.start_client()``. If you implement a client of type ``NumPyClient`` +you'll need to first call its ``to_client()`` method. The string ``"0.0.0.0:8080"`` +tells the client which server to connect to. In our case we can run the server and the +client on the same machine, therefore we use ``"0.0.0.0:8080"``. If we run a truly +federated workload with the server and clients running on different machines, all that +needs to change is the ``server_address`` we pass to the client. Flower Server ------------- -The following Flower server is a little bit more advanced and returns an evaluation function for the server-side evaluation. -First, we import again all required libraries such as Flower and scikit-learn. +The following Flower server is a little bit more advanced and returns an evaluation +function for the server-side evaluation. First, we import again all required libraries +such as Flower and scikit-learn. -:code:`server.py`, import Flower and start the server: +``server.py``, import Flower and start the server: .. code-block:: python @@ -179,12 +192,14 @@ First, we import again all required libraries such as Flower and scikit-learn. from sklearn.metrics import log_loss from sklearn.linear_model import LogisticRegression from typing import Dict - + from flwr_datasets import FederatedDataset -The number of federated learning rounds is set in :code:`fit_round()` and the evaluation is defined in :code:`get_evaluate_fn()`. -The evaluation function is called after each federated learning round and gives you information about loss and accuracy. -Note that we also make use of Flower Datasets here to load the test split of the MNIST dataset for server-side evaluation. +The number of federated learning rounds is set in ``fit_round()`` and the evaluation is +defined in ``get_evaluate_fn()``. The evaluation function is called after each federated +learning round and gives you information about loss and accuracy. Note that we also make +use of Flower Datasets here to load the test split of the MNIST dataset for server-side +evaluation. .. code-block:: python @@ -210,7 +225,13 @@ Note that we also make use of Flower Datasets here to load the test split of the return evaluate -The :code:`main` contains the server-side parameter initialization :code:`utils.set_initial_params()` as well as the aggregation strategy :code:`fl.server.strategy:FedAvg()`. The strategy is the default one, federated averaging (or FedAvg), with two clients and evaluation after each federated learning round. The server can be started with the command :code:`fl.server.start_server(server_address="0.0.0.0:8080", strategy=strategy, config=fl.server.ServerConfig(num_rounds=3))`. +The ``main`` contains the server-side parameter initialization +``utils.set_initial_params()`` as well as the aggregation strategy +``fl.server.strategy:FedAvg()``. The strategy is the default one, federated averaging +(or FedAvg), with two clients and evaluation after each federated learning round. The +server can be started with the command +``fl.server.start_server(server_address="0.0.0.0:8080", strategy=strategy, +config=fl.server.ServerConfig(num_rounds=3))``. .. code-block:: python @@ -223,21 +244,25 @@ The :code:`main` contains the server-side parameter initialization :code:`utils. evaluate_fn=get_evaluate_fn(model), on_fit_config_fn=fit_round, ) - fl.server.start_server(server_address="0.0.0.0:8080", strategy=strategy, config=fl.server.ServerConfig(num_rounds=3)) - + fl.server.start_server( + server_address="0.0.0.0:8080", + strategy=strategy, + config=fl.server.ServerConfig(num_rounds=3), + ) Train the model, federated! --------------------------- -With both client and server ready, we can now run everything and see federated -learning in action. Federated learning systems usually have a server and multiple clients. We, therefore, have to start the server first: +With both client and server ready, we can now run everything and see federated learning +in action. Federated learning systems usually have a server and multiple clients. We, +therefore, have to start the server first: .. code-block:: shell $ python3 server.py -Once the server is running we can start the clients in different terminals. -Open a new terminal and start the first client: +Once the server is running we can start the clients in different terminals. Open a new +terminal and start the first client: .. code-block:: shell @@ -249,8 +274,8 @@ Open another terminal and start the second client: $ python3 client.py -Each client will have its own dataset. -You should now see how the training does in the very first terminal (the one that started the server): +Each client will have its own dataset. You should now see how the training does in the +very first terminal (the one that started the server): .. code-block:: shell @@ -283,6 +308,7 @@ You should now see how the training does in the very first terminal (the one tha INFO flower 2022-01-13 13:43:21,232 | app.py:122 | app_evaluate: results [('ipv4:127.0.0.1:53980', EvaluateRes(loss=0.5843629240989685, num_examples=10000, accuracy=0.0, metrics={'accuracy': 0.8217})), ('ipv4:127.0.0.1:53982', EvaluateRes(loss=0.5843629240989685, num_examples=10000, accuracy=0.0, metrics={'accuracy': 0.8217}))] INFO flower 2022-01-13 13:43:21,232 | app.py:127 | app_evaluate: failures [] -Congratulations! -You've successfully built and run your first federated learning system. -The full `source code `_ for this example can be found in :code:`examples/sklearn-logreg-mnist`. +Congratulations! You've successfully built and run your first federated learning system. +The full `source code +`_ for this +example can be found in ``examples/sklearn-logreg-mnist``. diff --git a/doc/source/tutorial-quickstart-tensorflow.rst b/doc/source/tutorial-quickstart-tensorflow.rst index ffcd9efeb9bc..66cf69de6390 100644 --- a/doc/source/tutorial-quickstart-tensorflow.rst +++ b/doc/source/tutorial-quickstart-tensorflow.rst @@ -1,307 +1,290 @@ .. _quickstart-tensorflow: -####################### - Quickstart TensorFlow -####################### - -In this tutorial we will learn how to train a Convolutional Neural -Network on CIFAR-10 using the Flower framework and TensorFlow. First of -all, it is recommended to create a virtual environment and run -everything within a :doc:`virtualenv +Quickstart TensorFlow +===================== + +In this tutorial we will learn how to train a Convolutional Neural Network on CIFAR-10 +using the Flower framework and TensorFlow. First of all, it is recommended to create a +virtual environment and run everything within a :doc:`virtualenv `. -Let's use `flwr new` to create a complete Flower+TensorFlow project. It -will generate all the files needed to run, by default with the Flower -Simulation Engine, a federation of 10 nodes using `FedAvg +Let's use `flwr new` to create a complete Flower+TensorFlow project. It will generate +all the files needed to run, by default with the Flower Simulation Engine, a federation +of 10 nodes using `FedAvg `_. The dataset will be partitioned using Flower Dataset's `IidPartitioner `_. -Now that we have a rough idea of what this example is about, let's get -started. First, install Flower in your new environment: +Now that we have a rough idea of what this example is about, let's get started. First, +install Flower in your new environment: -.. code:: shell +.. code-block:: shell - # In a new Python environment - $ pip install flwr + # In a new Python environment + $ pip install flwr -Then, run the command below. You will be prompted to select one of the -available templates (choose ``TensorFlow``), give a name to your -project, and type in your developer name: +Then, run the command below. You will be prompted to select one of the available +templates (choose ``TensorFlow``), give a name to your project, and type in your +developer name: -.. code:: shell +.. code-block:: shell - $ flwr new + $ flwr new -After running it you'll notice a new directory with your project name -has been created. It should have the following structure: +After running it you'll notice a new directory with your project name has been created. +It should have the following structure: -.. code:: shell +.. code-block:: shell - - ├── - │ ├── __init__.py - │ ├── client_app.py # Defines your ClientApp - │ ├── server_app.py # Defines your ServerApp - │ └── task.py # Defines your model, training and data loading - ├── pyproject.toml # Project metadata like dependencies and configs - └── README.md + + ├── + │ ├── __init__.py + │ ├── client_app.py # Defines your ClientApp + │ ├── server_app.py # Defines your ServerApp + │ └── task.py # Defines your model, training and data loading + ├── pyproject.toml # Project metadata like dependencies and configs + └── README.md -If you haven't yet installed the project and its dependencies, you can -do so by: +If you haven't yet installed the project and its dependencies, you can do so by: -.. code:: shell +.. code-block:: shell - # From the directory where your pyproject.toml is - $ pip install -e . + # From the directory where your pyproject.toml is + $ pip install -e . To run the project, do: -.. code:: shell +.. code-block:: shell - # Run with default arguments - $ flwr run . + # Run with default arguments + $ flwr run . With default arguments you will see an output like this one: -.. code:: shell - - Loading project configuration... - Success - INFO : Starting Flower ServerApp, config: num_rounds=3, no round_timeout - INFO : - INFO : [INIT] - INFO : Using initial global parameters provided by strategy - INFO : Starting evaluation of initial global parameters - INFO : Evaluation returned no results (`None`) - INFO : - INFO : [ROUND 1] - INFO : configure_fit: strategy sampled 10 clients (out of 10) - INFO : aggregate_fit: received 10 results and 0 failures - WARNING : No fit_metrics_aggregation_fn provided - INFO : configure_evaluate: strategy sampled 10 clients (out of 10) - INFO : aggregate_evaluate: received 10 results and 0 failures - WARNING : No evaluate_metrics_aggregation_fn provided - INFO : - INFO : [ROUND 2] - INFO : configure_fit: strategy sampled 10 clients (out of 10) - INFO : aggregate_fit: received 10 results and 0 failures - INFO : configure_evaluate: strategy sampled 10 clients (out of 10) - INFO : aggregate_evaluate: received 10 results and 0 failures - INFO : - INFO : [ROUND 3] - INFO : configure_fit: strategy sampled 10 clients (out of 10) - INFO : aggregate_fit: received 10 results and 0 failures - INFO : configure_evaluate: strategy sampled 10 clients (out of 10) - INFO : aggregate_evaluate: received 10 results and 0 failures - INFO : - INFO : [SUMMARY] - INFO : Run finished 3 round(s) in 31.31s - INFO : History (loss, distributed): - INFO : round 1: 1.9066195368766785 - INFO : round 2: 1.657227087020874 - INFO : round 3: 1.559039831161499 - INFO : - -You can also override the parameters defined in the -``[tool.flwr.app.config]`` section in ``pyproject.toml`` like this: - -.. code:: shell - - # Override some arguments - $ flwr run . --run-config "num-server-rounds=5 batch-size=16" - -********** - The Data -********** - -This tutorial uses `Flower Datasets `_ -to easily download and partition the `CIFAR-10` dataset. In this example -you'll make use of the `IidPartitioner +.. code-block:: shell + + Loading project configuration... + Success + INFO : Starting Flower ServerApp, config: num_rounds=3, no round_timeout + INFO : + INFO : [INIT] + INFO : Using initial global parameters provided by strategy + INFO : Starting evaluation of initial global parameters + INFO : Evaluation returned no results (`None`) + INFO : + INFO : [ROUND 1] + INFO : configure_fit: strategy sampled 10 clients (out of 10) + INFO : aggregate_fit: received 10 results and 0 failures + WARNING : No fit_metrics_aggregation_fn provided + INFO : configure_evaluate: strategy sampled 10 clients (out of 10) + INFO : aggregate_evaluate: received 10 results and 0 failures + WARNING : No evaluate_metrics_aggregation_fn provided + INFO : + INFO : [ROUND 2] + INFO : configure_fit: strategy sampled 10 clients (out of 10) + INFO : aggregate_fit: received 10 results and 0 failures + INFO : configure_evaluate: strategy sampled 10 clients (out of 10) + INFO : aggregate_evaluate: received 10 results and 0 failures + INFO : + INFO : [ROUND 3] + INFO : configure_fit: strategy sampled 10 clients (out of 10) + INFO : aggregate_fit: received 10 results and 0 failures + INFO : configure_evaluate: strategy sampled 10 clients (out of 10) + INFO : aggregate_evaluate: received 10 results and 0 failures + INFO : + INFO : [SUMMARY] + INFO : Run finished 3 round(s) in 31.31s + INFO : History (loss, distributed): + INFO : round 1: 1.9066195368766785 + INFO : round 2: 1.657227087020874 + INFO : round 3: 1.559039831161499 + INFO : + +You can also override the parameters defined in the ``[tool.flwr.app.config]`` section +in ``pyproject.toml`` like this: + +.. code-block:: shell + + # Override some arguments + $ flwr run . --run-config "num-server-rounds=5 batch-size=16" + +The Data +-------- + +This tutorial uses `Flower Datasets `_ to easily +download and partition the `CIFAR-10` dataset. In this example you'll make use of the +`IidPartitioner `_ -to generate `num_partitions` partitions. You can choose `other -partitioners -`_ -available in Flower Datasets. Each ``ClientApp`` will call this function -to create the ``NumPy`` arrays that correspond to their data partition. - -.. code:: python - - partitioner = IidPartitioner(num_partitions=num_partitions) - fds = FederatedDataset( - dataset="uoft-cs/cifar10", - partitioners={"train": partitioner}, - ) - partition = fds.load_partition(partition_id, "train") - partition.set_format("numpy") - - # Divide data on each node: 80% train, 20% test - partition = partition.train_test_split(test_size=0.2) - x_train, y_train = partition["train"]["img"] / 255.0, partition["train"]["label"] - x_test, y_test = partition["test"]["img"] / 255.0, partition["test"]["label"] - -*********** - The Model -*********** - -Next, we need a model. We defined a simple Convolutional Neural Network -(CNN), but feel free to replace it with a more sophisticated model if -you'd like: - -.. code:: python - - def load_model(learning_rate: float = 0.001): - # Define a simple CNN for CIFAR-10 and set Adam optimizer - model = keras.Sequential( - [ - keras.Input(shape=(32, 32, 3)), - layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), - layers.MaxPooling2D(pool_size=(2, 2)), - layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), - layers.MaxPooling2D(pool_size=(2, 2)), - layers.Flatten(), - layers.Dropout(0.5), - layers.Dense(10, activation="softmax"), - ] - ) - model.compile( - "adam", - loss="sparse_categorical_crossentropy", - metrics=["accuracy"], - ) - return model - -*************** - The ClientApp -*************** - -With `TensorFlow`, we can use the built-in ``get_weights()`` and -``set_weights()`` functions, which simplifies the implementation with -`Flower`. The rest of the functionality in the ClientApp is directly -inspired by the centralized case. The ``fit()`` method in the client -trains the model using the local dataset. Similarly, the ``evaluate()`` -method is used to evaluate the model received on a held-out validation +to generate `num_partitions` partitions. You can choose `other partitioners +`_ available in +Flower Datasets. Each ``ClientApp`` will call this function to create the ``NumPy`` +arrays that correspond to their data partition. + +.. code-block:: python + + partitioner = IidPartitioner(num_partitions=num_partitions) + fds = FederatedDataset( + dataset="uoft-cs/cifar10", + partitioners={"train": partitioner}, + ) + partition = fds.load_partition(partition_id, "train") + partition.set_format("numpy") + + # Divide data on each node: 80% train, 20% test + partition = partition.train_test_split(test_size=0.2) + x_train, y_train = partition["train"]["img"] / 255.0, partition["train"]["label"] + x_test, y_test = partition["test"]["img"] / 255.0, partition["test"]["label"] + +The Model +--------- + +Next, we need a model. We defined a simple Convolutional Neural Network (CNN), but feel +free to replace it with a more sophisticated model if you'd like: + +.. code-block:: python + + def load_model(learning_rate: float = 0.001): + # Define a simple CNN for CIFAR-10 and set Adam optimizer + model = keras.Sequential( + [ + keras.Input(shape=(32, 32, 3)), + layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Flatten(), + layers.Dropout(0.5), + layers.Dense(10, activation="softmax"), + ] + ) + model.compile( + "adam", + loss="sparse_categorical_crossentropy", + metrics=["accuracy"], + ) + return model + +The ClientApp +------------- + +With `TensorFlow`, we can use the built-in ``get_weights()`` and ``set_weights()`` +functions, which simplifies the implementation with `Flower`. The rest of the +functionality in the ClientApp is directly inspired by the centralized case. The +``fit()`` method in the client trains the model using the local dataset. Similarly, the +``evaluate()`` method is used to evaluate the model received on a held-out validation set that the client might have: -.. code:: python - - class FlowerClient(NumPyClient): - def __init__(self, model, data, epochs, batch_size, verbose): - self.model = model - self.x_train, self.y_train, self.x_test, self.y_test = data - self.epochs = epochs - self.batch_size = batch_size - self.verbose = verbose - - def fit(self, parameters, config): - self.model.set_weights(parameters) - self.model.fit( - self.x_train, - self.y_train, - epochs=self.epochs, - batch_size=self.batch_size, - verbose=self.verbose, - ) - return self.model.get_weights(), len(self.x_train), {} - - def evaluate(self, parameters, config): - self.model.set_weights(parameters) - loss, accuracy = self.model.evaluate(self.x_test, self.y_test, verbose=0) - return loss, len(self.x_test), {"accuracy": accuracy} - -Finally, we can construct a ``ClientApp`` using the ``FlowerClient`` -defined above by means of a ``client_fn()`` callback. Note that the -`context` enables you to get access to hyperparameters defined in your -``pyproject.toml`` to configure the run. For example, in this tutorial -we access the `local-epochs` setting to control the number of epochs a -``ClientApp`` will perform when running the ``fit()`` method, in -addition to `batch-size`. You could define additional hyperparameters in -``pyproject.toml`` and access them here. - -.. code:: python - - def client_fn(context: Context): - # Load model and data - net = load_model() - - partition_id = context.node_config["partition-id"] - num_partitions = context.node_config["num-partitions"] - data = load_data(partition_id, num_partitions) - epochs = context.run_config["local-epochs"] - batch_size = context.run_config["batch-size"] - verbose = context.run_config.get("verbose") - - # Return Client instance - return FlowerClient( - net, data, epochs, batch_size, verbose - ).to_client() - - - # Flower ClientApp - app = ClientApp(client_fn=client_fn) - -*************** - The ServerApp -*************** - -To construct a ``ServerApp`` we define a ``server_fn()`` callback with -an identical signature to that of ``client_fn()`` but the return type is -`ServerAppComponents +.. code-block:: python + + class FlowerClient(NumPyClient): + def __init__(self, model, data, epochs, batch_size, verbose): + self.model = model + self.x_train, self.y_train, self.x_test, self.y_test = data + self.epochs = epochs + self.batch_size = batch_size + self.verbose = verbose + + def fit(self, parameters, config): + self.model.set_weights(parameters) + self.model.fit( + self.x_train, + self.y_train, + epochs=self.epochs, + batch_size=self.batch_size, + verbose=self.verbose, + ) + return self.model.get_weights(), len(self.x_train), {} + + def evaluate(self, parameters, config): + self.model.set_weights(parameters) + loss, accuracy = self.model.evaluate(self.x_test, self.y_test, verbose=0) + return loss, len(self.x_test), {"accuracy": accuracy} + +Finally, we can construct a ``ClientApp`` using the ``FlowerClient`` defined above by +means of a ``client_fn()`` callback. Note that the `context` enables you to get access +to hyperparameters defined in your ``pyproject.toml`` to configure the run. For example, +in this tutorial we access the `local-epochs` setting to control the number of epochs a +``ClientApp`` will perform when running the ``fit()`` method, in addition to +`batch-size`. You could define additional hyperparameters in ``pyproject.toml`` and +access them here. + +.. code-block:: python + + def client_fn(context: Context): + # Load model and data + net = load_model() + + partition_id = context.node_config["partition-id"] + num_partitions = context.node_config["num-partitions"] + data = load_data(partition_id, num_partitions) + epochs = context.run_config["local-epochs"] + batch_size = context.run_config["batch-size"] + verbose = context.run_config.get("verbose") + + # Return Client instance + return FlowerClient(net, data, epochs, batch_size, verbose).to_client() + + + # Flower ClientApp + app = ClientApp(client_fn=client_fn) + +The ServerApp +------------- + +To construct a ``ServerApp`` we define a ``server_fn()`` callback with an identical +signature to that of ``client_fn()`` but the return type is `ServerAppComponents `_ as opposed to a `Client -`_. -In this example we use the `FedAvg`. To it we pass a randomly -initialized model that will serve as the global model to federate. +`_. In this +example we use the `FedAvg`. To it we pass a randomly initialized model that will serve +as the global model to federate. -.. code:: python +.. code-block:: python - def server_fn(context: Context): - # Read from config - num_rounds = context.run_config["num-server-rounds"] + def server_fn(context: Context): + # Read from config + num_rounds = context.run_config["num-server-rounds"] - # Get parameters to initialize global model - parameters = ndarrays_to_parameters(load_model().get_weights()) + # Get parameters to initialize global model + parameters = ndarrays_to_parameters(load_model().get_weights()) - # Define strategy - strategy = strategy = FedAvg( - fraction_fit=1.0, - fraction_evaluate=1.0, - min_available_clients=2, - initial_parameters=parameters, - ) - config = ServerConfig(num_rounds=num_rounds) + # Define strategy + strategy = strategy = FedAvg( + fraction_fit=1.0, + fraction_evaluate=1.0, + min_available_clients=2, + initial_parameters=parameters, + ) + config = ServerConfig(num_rounds=num_rounds) - return ServerAppComponents(strategy=strategy, config=config) + return ServerAppComponents(strategy=strategy, config=config) - # Create ServerApp - app = ServerApp(server_fn=server_fn) -Congratulations! You've successfully built and run your first federated -learning system. + # Create ServerApp + app = ServerApp(server_fn=server_fn) -.. note:: +Congratulations! You've successfully built and run your first federated learning system. - Check the source code of the extended version of this tutorial in - |quickstart_tf_link|_ in the Flower GitHub repository. +.. note:: -.. |quickstart_tf_link| replace:: + Check the source code of the extended version of this tutorial in + |quickstart_tf_link|_ in the Flower GitHub repository. - :code:`examples/quickstart-tensorflow` +.. |quickstart_tf_link| replace:: ``examples/quickstart-tensorflow`` .. _quickstart_tf_link: https://github.com/adap/flower/blob/main/examples/quickstart-tensorflow -**************** - Video tutorial -**************** +Video tutorial +-------------- .. note:: - The video shown below shows how to setup a TensorFlow + Flower - project using our previously recommended APIs. A new video tutorial - will be released that shows the new APIs (as the content above does) + The video shown below shows how to setup a TensorFlow + Flower project using our + previously recommended APIs. A new video tutorial will be released that shows the + new APIs (as the content above does) .. meta:: - :description: Check out this Federated Learning quickstart tutorial for using Flower with TensorFlow to train a CNN model on CIFAR-10. + :description: Check out this Federated Learning quickstart tutorial for using Flower with TensorFlow to train a CNN model on CIFAR-10. .. youtube:: FGTc2TQq7VM - :width: 100% + :width: 100% diff --git a/doc/source/tutorial-quickstart-xgboost.rst b/doc/source/tutorial-quickstart-xgboost.rst index 34ad5f6e99c0..fe15227fdf11 100644 --- a/doc/source/tutorial-quickstart-xgboost.rst +++ b/doc/source/tutorial-quickstart-xgboost.rst @@ -1,63 +1,75 @@ .. _quickstart-xgboost: - Quickstart XGBoost -===================== +================== .. meta:: - :description: Check out this Federated Learning quickstart tutorial for using Flower with XGBoost to train classification models on trees. + :description: Check out this Federated Learning quickstart tutorial for using Flower with XGBoost to train classification models on trees. -.. youtube:: AY1vpXUpesc - :width: 100% +.. youtube:: AY1vpXUpesc + :width: 100% Federated XGBoost -------------------- +----------------- -EXtreme Gradient Boosting (**XGBoost**) is a robust and efficient implementation of gradient-boosted decision tree (**GBDT**), that maximises the computational boundaries for boosted tree methods. -It's primarily designed to enhance both the performance and computational speed of machine learning models. -In XGBoost, trees are constructed concurrently, unlike the sequential approach taken by GBDT. +EXtreme Gradient Boosting (**XGBoost**) is a robust and efficient implementation of +gradient-boosted decision tree (**GBDT**), that maximises the computational boundaries +for boosted tree methods. It's primarily designed to enhance both the performance and +computational speed of machine learning models. In XGBoost, trees are constructed +concurrently, unlike the sequential approach taken by GBDT. -Often, for tabular data on medium-sized datasets with fewer than 10k training examples, XGBoost surpasses the results of deep learning techniques. +Often, for tabular data on medium-sized datasets with fewer than 10k training examples, +XGBoost surpasses the results of deep learning techniques. Why federated XGBoost? -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Indeed, as the demand for data privacy and decentralized learning grows, there's an increasing requirement to implement federated XGBoost systems for specialised applications, like survival analysis and financial fraud detection. +~~~~~~~~~~~~~~~~~~~~~~ -Federated learning ensures that raw data remains on the local device, making it an attractive approach for sensitive domains where data security and privacy are paramount. -Given the robustness and efficiency of XGBoost, combining it with federated learning offers a promising solution for these specific challenges. +Indeed, as the demand for data privacy and decentralized learning grows, there's an +increasing requirement to implement federated XGBoost systems for specialised +applications, like survival analysis and financial fraud detection. -In this tutorial we will learn how to train a federated XGBoost model on HIGGS dataset using Flower and :code:`xgboost` package. -We use a simple example (`full code xgboost-quickstart `_) with two *clients* and one *server* -to demonstrate how federated XGBoost works, -and then we dive into a more complex example (`full code xgboost-comprehensive `_) to run various experiments. +Federated learning ensures that raw data remains on the local device, making it an +attractive approach for sensitive domains where data security and privacy are paramount. +Given the robustness and efficiency of XGBoost, combining it with federated learning +offers a promising solution for these specific challenges. +In this tutorial we will learn how to train a federated XGBoost model on HIGGS dataset +using Flower and ``xgboost`` package. We use a simple example (`full code +xgboost-quickstart +`_) with two +*clients* and one *server* to demonstrate how federated XGBoost works, and then we dive +into a more complex example (`full code xgboost-comprehensive +`_) to run +various experiments. Environment Setup --------------------- +----------------- -First of all, it is recommended to create a virtual environment and run everything within a :doc:`virtualenv `. +First of all, it is recommended to create a virtual environment and run everything +within a :doc:`virtualenv `. We first need to install Flower and Flower Datasets. You can do this by running : .. code-block:: shell - $ pip install flwr flwr-datasets + $ pip install flwr flwr-datasets -Since we want to use :code:`xgboost` package to build up XGBoost trees, let's go ahead and install :code:`xgboost`: +Since we want to use ``xgboost`` package to build up XGBoost trees, let's go ahead and +install ``xgboost``: .. code-block:: shell - $ pip install xgboost - + $ pip install xgboost Flower Client ------------------ +------------- -*Clients* are responsible for generating individual weight-updates for the model based on their local datasets. -Now that we have all our dependencies installed, let's run a simple distributed training with two clients and one server. +*Clients* are responsible for generating individual weight-updates for the model based +on their local datasets. Now that we have all our dependencies installed, let's run a +simple distributed training with two clients and one server. -In a file called :code:`client.py`, import xgboost, Flower, Flower Datasets and other related functions: +In a file called ``client.py``, import xgboost, Flower, Flower Datasets and other +related functions: .. code-block:: python @@ -84,9 +96,10 @@ In a file called :code:`client.py`, import xgboost, Flower, Flower Datasets and from flwr_datasets.partitioner import IidPartitioner Dataset partition and hyper-parameter selection -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Prior to local training, we require loading the HIGGS dataset from Flower Datasets and conduct data partitioning for FL: +Prior to local training, we require loading the HIGGS dataset from Flower Datasets and +conduct data partitioning for FL: .. code-block:: python @@ -99,8 +112,9 @@ Prior to local training, we require loading the HIGGS dataset from Flower Datase partition = fds.load_partition(partition_id=args.partition_id, split="train") partition.set_format("numpy") -In this example, we split the dataset into 30 partitions with uniform distribution (:code:`IidPartitioner(num_partitions=30)`). -Then, we load the partition for the given client based on :code:`partition_id`: +In this example, we split the dataset into 30 partitions with uniform distribution +(``IidPartitioner(num_partitions=30)``). Then, we load the partition for the given +client based on ``partition_id``: .. code-block:: python @@ -118,7 +132,8 @@ Then, we load the partition for the given client based on :code:`partition_id`: partition = fds.load_partition(idx=args.partition_id, split="train") partition.set_format("numpy") -After that, we do train/test splitting on the given partition (client's local data), and transform data format for :code:`xgboost` package. +After that, we do train/test splitting on the given partition (client's local data), and +transform data format for ``xgboost`` package. .. code-block:: python @@ -131,7 +146,8 @@ After that, we do train/test splitting on the given partition (client's local da train_dmatrix = transform_dataset_to_dmatrix(train_data) valid_dmatrix = transform_dataset_to_dmatrix(valid_data) -The functions of :code:`train_test_split` and :code:`transform_dataset_to_dmatrix` are defined as below: +The functions of ``train_test_split`` and ``transform_dataset_to_dmatrix`` are defined +as below: .. code-block:: python @@ -171,40 +187,39 @@ Finally, we define the hyper-parameters used for XGBoost training. "tree_method": "hist", } -The :code:`num_local_round` represents the number of iterations for local tree boost. -We use CPU for the training in default. -One can shift it to GPU by setting :code:`tree_method` to :code:`gpu_hist`. -We use AUC as evaluation metric. - +The ``num_local_round`` represents the number of iterations for local tree boost. We use +CPU for the training in default. One can shift it to GPU by setting ``tree_method`` to +``gpu_hist``. We use AUC as evaluation metric. Flower client definition for XGBoost -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -After loading the dataset we define the Flower client. -We follow the general rule to define :code:`XgbClient` class inherited from :code:`fl.client.Client`. +After loading the dataset we define the Flower client. We follow the general rule to +define ``XgbClient`` class inherited from ``fl.client.Client``. .. code-block:: python class XgbClient(fl.client.Client): - def __init__( - self, - train_dmatrix, - valid_dmatrix, - num_train, - num_val, - num_local_round, - params, - ): - self.train_dmatrix = train_dmatrix - self.valid_dmatrix = valid_dmatrix - self.num_train = num_train - self.num_val = num_val - self.num_local_round = num_local_round - self.params = params - -All required parameters defined above are passed to :code:`XgbClient`'s constructor. - -Then, we override :code:`get_parameters`, :code:`fit` and :code:`evaluate` methods insides :code:`XgbClient` class as follows. + def __init__( + self, + train_dmatrix, + valid_dmatrix, + num_train, + num_val, + num_local_round, + params, + ): + self.train_dmatrix = train_dmatrix + self.valid_dmatrix = valid_dmatrix + self.num_train = num_train + self.num_val = num_val + self.num_local_round = num_local_round + self.params = params + +All required parameters defined above are passed to ``XgbClient``'s constructor. + +Then, we override ``get_parameters``, ``fit`` and ``evaluate`` methods insides +``XgbClient`` class as follows. .. code-block:: python @@ -218,9 +233,10 @@ Then, we override :code:`get_parameters`, :code:`fit` and :code:`evaluate` metho parameters=Parameters(tensor_type="", tensors=[]), ) -Unlike neural network training, XGBoost trees are not started from a specified random weights. -In this case, we do not use :code:`get_parameters` and :code:`set_parameters` to initialise model parameters for XGBoost. -As a result, let's return an empty tensor in :code:`get_parameters` when it is called by the server at the first round. +Unlike neural network training, XGBoost trees are not started from a specified random +weights. In this case, we do not use ``get_parameters`` and ``set_parameters`` to +initialise model parameters for XGBoost. As a result, let's return an empty tensor in +``get_parameters`` when it is called by the server at the first round. .. code-block:: python @@ -259,9 +275,10 @@ As a result, let's return an empty tensor in :code:`get_parameters` when it is c metrics={}, ) -In :code:`fit`, at the first round, we call :code:`xgb.train()` to build up the first set of trees. -From the second round, we load the global model sent from server to new build Booster object, -and then update model weights on local training data with function :code:`local_boost` as follows: +In ``fit``, at the first round, we call ``xgb.train()`` to build up the first set of +trees. From the second round, we load the global model sent from server to new build +Booster object, and then update model weights on local training data with function +``local_boost`` as follows: .. code-block:: python @@ -278,8 +295,8 @@ and then update model weights on local training data with function :code:`local_ return bst -Given :code:`num_local_round`, we update trees by calling :code:`bst_input.update` method. -After training, the last :code:`N=num_local_round` trees will be extracted to send to the server. +Given ``num_local_round``, we update trees by calling ``bst_input.update`` method. After +training, the last ``N=num_local_round`` trees will be extracted to send to the server. .. code-block:: python @@ -310,40 +327,42 @@ After training, the last :code:`N=num_local_round` trees will be extracted to se metrics={"AUC": auc}, ) -In :code:`evaluate`, after loading the global model, we call :code:`bst.eval_set` function to conduct evaluation on valid set. -The AUC value will be returned. +In ``evaluate``, after loading the global model, we call ``bst.eval_set`` function to +conduct evaluation on valid set. The AUC value will be returned. -Now, we can create an instance of our class :code:`XgbClient` and add one line to actually run this client: +Now, we can create an instance of our class ``XgbClient`` and add one line to actually +run this client: .. code-block:: python - fl.client.start_client( - server_address="127.0.0.1:8080", - client=XgbClient( - train_dmatrix, - valid_dmatrix, - num_train, - num_val, - num_local_round, - params, - ).to_client(), - ) - -That's it for the client. We only have to implement :code:`Client` and call :code:`fl.client.start_client()`. -The string :code:`"[::]:8080"` tells the client which server to connect to. -In our case we can run the server and the client on the same machine, therefore we use -:code:`"[::]:8080"`. If we run a truly federated workload with the server and -clients running on different machines, all that needs to change is the -:code:`server_address` we point the client at. + fl.client.start_client( + server_address="127.0.0.1:8080", + client=XgbClient( + train_dmatrix, + valid_dmatrix, + num_train, + num_val, + num_local_round, + params, + ).to_client(), + ) +That's it for the client. We only have to implement ``Client`` and call +``fl.client.start_client()``. The string ``"[::]:8080"`` tells the client which server +to connect to. In our case we can run the server and the client on the same machine, +therefore we use ``"[::]:8080"``. If we run a truly federated workload with the server +and clients running on different machines, all that needs to change is the +``server_address`` we point the client at. Flower Server ------------------- +------------- -These updates are then sent to the *server* which will aggregate them to produce a better model. -Finally, the *server* sends this improved version of the model back to each *client* to finish a complete FL round. +These updates are then sent to the *server* which will aggregate them to produce a +better model. Finally, the *server* sends this improved version of the model back to +each *client* to finish a complete FL round. -In a file named :code:`server.py`, import Flower and FedXgbBagging from :code:`flwr.server.strategy`. +In a file named ``server.py``, import Flower and FedXgbBagging from +``flwr.server.strategy``. We first define a strategy for XGBoost bagging aggregation. @@ -361,6 +380,7 @@ We first define a strategy for XGBoost bagging aggregation. on_fit_config_fn=config_func, ) + def evaluate_metrics_aggregation(eval_metrics): """Return an aggregated metric (AUC) for evaluation.""" total_num = sum([num for num, _ in eval_metrics]) @@ -370,6 +390,7 @@ We first define a strategy for XGBoost bagging aggregation. metrics_aggregated = {"AUC": auc_aggregated} return metrics_aggregated + def config_func(rnd: int) -> Dict[str, str]: """Return a configuration with global epochs.""" config = { @@ -377,9 +398,10 @@ We first define a strategy for XGBoost bagging aggregation. } return config -We use two clients for this example. -An :code:`evaluate_metrics_aggregation` function is defined to collect and wighted average the AUC values from clients. -The :code:`config_func` function is to return the current FL round number to client's :code:`fit()` and :code:`evaluate()` methods. +We use two clients for this example. An ``evaluate_metrics_aggregation`` function is +defined to collect and wighted average the AUC values from clients. The ``config_func`` +function is to return the current FL round number to client's ``fit()`` and +``evaluate()`` methods. Then, we start the server: @@ -393,12 +415,13 @@ Then, we start the server: ) Tree-based bagging aggregation -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You must be curious about how bagging aggregation works. Let's look into the details. -In file :code:`flwr.server.strategy.fedxgb_bagging.py`, we define :code:`FedXgbBagging` inherited from :code:`flwr.server.strategy.FedAvg`. -Then, we override the :code:`aggregate_fit`, :code:`aggregate_evaluate` and :code:`evaluate` methods as follows: +In file ``flwr.server.strategy.fedxgb_bagging.py``, we define ``FedXgbBagging`` +inherited from ``flwr.server.strategy.FedAvg``. Then, we override the ``aggregate_fit``, +``aggregate_evaluate`` and ``evaluate`` methods as follows: .. code-block:: python @@ -493,7 +516,8 @@ Then, we override the :code:`aggregate_fit`, :code:`aggregate_evaluate` and :cod loss, metrics = eval_res return loss, metrics -In :code:`aggregate_fit`, we sequentially aggregate the clients' XGBoost trees by calling :code:`aggregate()` function: +In ``aggregate_fit``, we sequentially aggregate the clients' XGBoost trees by calling +``aggregate()`` function: .. code-block:: python @@ -552,28 +576,27 @@ In :code:`aggregate_fit`, we sequentially aggregate the clients' XGBoost trees b ) return tree_num, paral_tree_num -In this function, we first fetch the number of trees and the number of parallel trees for the current and previous model -by calling :code:`_get_tree_nums`. -Then, the fetched information will be aggregated. -After that, the trees (containing model weights) are aggregated to generate a new tree model. - -After traversal of all clients' models, a new global model is generated, -followed by the serialisation, and sending back to each client. +In this function, we first fetch the number of trees and the number of parallel trees +for the current and previous model by calling ``_get_tree_nums``. Then, the fetched +information will be aggregated. After that, the trees (containing model weights) are +aggregated to generate a new tree model. +After traversal of all clients' models, a new global model is generated, followed by the +serialisation, and sending back to each client. Launch Federated XGBoost! -------------------------------- +------------------------- -With both client and server ready, we can now run everything and see federated -learning in action. FL systems usually have a server and multiple clients. We -therefore have to start the server first: +With both client and server ready, we can now run everything and see federated learning +in action. FL systems usually have a server and multiple clients. We therefore have to +start the server first: .. code-block:: shell $ python3 server.py -Once the server is running we can start the clients in different terminals. -Open a new terminal and start the first client: +Once the server is running we can start the clients in different terminals. Open a new +terminal and start the first client: .. code-block:: shell @@ -585,8 +608,8 @@ Open another terminal and start the second client: $ python3 client.py --partition-id=1 -Each client will have its own dataset. -You should now see how the training does in the very first terminal (the one that started the server): +Each client will have its own dataset. You should now see how the training does in the +very first terminal (the one that started the server): .. code-block:: shell @@ -629,192 +652,197 @@ You should now see how the training does in the very first terminal (the one tha INFO : INFO : [SUMMARY] INFO : Run finished 5 round(s) in 1.67s - INFO : History (loss, distributed): - INFO : round 1: 0 - INFO : round 2: 0 - INFO : round 3: 0 - INFO : round 4: 0 - INFO : round 5: 0 - INFO : History (metrics, distributed, evaluate): - INFO : {'AUC': [(1, 0.76755), (2, 0.775), (3, 0.77935), (4, 0.7836), (5, 0.7872)]} - -Congratulations! -You've successfully built and run your first federated XGBoost system. -The AUC values can be checked in :code:`metrics_distributed`. -One can see that the average AUC increases over FL rounds. - -The full `source code `_ for this example can be found in :code:`examples/xgboost-quickstart`. - + INFO : History (loss, distributed): + INFO : round 1: 0 + INFO : round 2: 0 + INFO : round 3: 0 + INFO : round 4: 0 + INFO : round 5: 0 + INFO : History (metrics, distributed, evaluate): + INFO : {'AUC': [(1, 0.76755), (2, 0.775), (3, 0.77935), (4, 0.7836), (5, 0.7872)]} + +Congratulations! You've successfully built and run your first federated XGBoost system. +The AUC values can be checked in ``metrics_distributed``. One can see that the average +AUC increases over FL rounds. + +The full `source code +`_ for this +example can be found in ``examples/xgboost-quickstart``. Comprehensive Federated XGBoost ------------------------------------ +------------------------------- -Now that you have known how federated XGBoost work with Flower, it's time to run some more comprehensive experiments by customising the experimental settings. -In the xgboost-comprehensive example (`full code `_), -we provide more options to define various experimental setups, including aggregation strategies, data partitioning and centralised/distributed evaluation. -We also support :doc:`Flower simulation ` making it easy to simulate large client cohorts in a resource-aware manner. -Let's take a look! +Now that you have known how federated XGBoost work with Flower, it's time to run some +more comprehensive experiments by customising the experimental settings. In the +xgboost-comprehensive example (`full code +`_), we provide +more options to define various experimental setups, including aggregation strategies, +data partitioning and centralised/distributed evaluation. We also support :doc:`Flower +simulation ` making it easy to simulate large client cohorts in +a resource-aware manner. Let's take a look! Cyclic training -~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~ -In addition to bagging aggregation, we offer a cyclic training scheme, which performs FL in a client-by-client fashion. -Instead of aggregating multiple clients, there is only one single client participating in the training per round in the cyclic training scenario. -The trained local XGBoost trees will be passed to the next client as an initialised model for next round's boosting. +In addition to bagging aggregation, we offer a cyclic training scheme, which performs FL +in a client-by-client fashion. Instead of aggregating multiple clients, there is only +one single client participating in the training per round in the cyclic training +scenario. The trained local XGBoost trees will be passed to the next client as an +initialised model for next round's boosting. -To do this, we first customise a :code:`ClientManager` in :code:`server_utils.py`: +To do this, we first customise a ``ClientManager`` in ``server_utils.py``: .. code-block:: python - class CyclicClientManager(SimpleClientManager): - """Provides a cyclic client selection rule.""" - - def sample( - self, - num_clients: int, - min_num_clients: Optional[int] = None, - criterion: Optional[Criterion] = None, - ) -> List[ClientProxy]: - """Sample a number of Flower ClientProxy instances.""" - - # Block until at least num_clients are connected. - if min_num_clients is None: - min_num_clients = num_clients - self.wait_for(min_num_clients) - - # Sample clients which meet the criterion - available_cids = list(self.clients) - if criterion is not None: - available_cids = [ - cid for cid in available_cids if criterion.select(self.clients[cid]) - ] - - if num_clients > len(available_cids): - log( - INFO, - "Sampling failed: number of available clients" - " (%s) is less than number of requested clients (%s).", - len(available_cids), - num_clients, - ) - return [] - - # Return all available clients - return [self.clients[cid] for cid in available_cids] - -The customised :code:`ClientManager` samples all available clients in each FL round based on the order of connection to the server. -Then, we define a new strategy :code:`FedXgbCyclic` in :code:`flwr.server.strategy.fedxgb_cyclic.py`, -in order to sequentially select only one client in given round and pass the received model to next client. + class CyclicClientManager(SimpleClientManager): + """Provides a cyclic client selection rule.""" + + def sample( + self, + num_clients: int, + min_num_clients: Optional[int] = None, + criterion: Optional[Criterion] = None, + ) -> List[ClientProxy]: + """Sample a number of Flower ClientProxy instances.""" + + # Block until at least num_clients are connected. + if min_num_clients is None: + min_num_clients = num_clients + self.wait_for(min_num_clients) + + # Sample clients which meet the criterion + available_cids = list(self.clients) + if criterion is not None: + available_cids = [ + cid for cid in available_cids if criterion.select(self.clients[cid]) + ] + + if num_clients > len(available_cids): + log( + INFO, + "Sampling failed: number of available clients" + " (%s) is less than number of requested clients (%s).", + len(available_cids), + num_clients, + ) + return [] + + # Return all available clients + return [self.clients[cid] for cid in available_cids] + +The customised ``ClientManager`` samples all available clients in each FL round based on +the order of connection to the server. Then, we define a new strategy ``FedXgbCyclic`` +in ``flwr.server.strategy.fedxgb_cyclic.py``, in order to sequentially select only one +client in given round and pass the received model to next client. .. code-block:: python - class FedXgbCyclic(FedAvg): - """Configurable FedXgbCyclic strategy implementation.""" - - # pylint: disable=too-many-arguments,too-many-instance-attributes, line-too-long - def __init__( - self, - **kwargs: Any, - ): - self.global_model: Optional[bytes] = None - super().__init__(**kwargs) - - def aggregate_fit( - self, - server_round: int, - results: List[Tuple[ClientProxy, FitRes]], - failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]], - ) -> Tuple[Optional[Parameters], Dict[str, Scalar]]: - """Aggregate fit results using bagging.""" - if not results: - return None, {} - # Do not aggregate if there are failures and failures are not accepted - if not self.accept_failures and failures: - return None, {} - - # Fetch the client model from last round as global model - for _, fit_res in results: - update = fit_res.parameters.tensors - for bst in update: - self.global_model = bst - - return ( - Parameters(tensor_type="", tensors=[cast(bytes, self.global_model)]), - {}, - ) - -Unlike the original :code:`FedAvg`, we don't perform aggregation here. -Instead, we just make a copy of the received client model as global model by overriding :code:`aggregate_fit`. - -Also, the customised :code:`configure_fit` and :code:`configure_evaluate` methods ensure the clients to be sequentially selected given FL round: + class FedXgbCyclic(FedAvg): + """Configurable FedXgbCyclic strategy implementation.""" + + # pylint: disable=too-many-arguments,too-many-instance-attributes, line-too-long + def __init__( + self, + **kwargs: Any, + ): + self.global_model: Optional[bytes] = None + super().__init__(**kwargs) + + def aggregate_fit( + self, + server_round: int, + results: List[Tuple[ClientProxy, FitRes]], + failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]], + ) -> Tuple[Optional[Parameters], Dict[str, Scalar]]: + """Aggregate fit results using bagging.""" + if not results: + return None, {} + # Do not aggregate if there are failures and failures are not accepted + if not self.accept_failures and failures: + return None, {} + + # Fetch the client model from last round as global model + for _, fit_res in results: + update = fit_res.parameters.tensors + for bst in update: + self.global_model = bst + + return ( + Parameters(tensor_type="", tensors=[cast(bytes, self.global_model)]), + {}, + ) + +Unlike the original ``FedAvg``, we don't perform aggregation here. Instead, we just make +a copy of the received client model as global model by overriding ``aggregate_fit``. + +Also, the customised ``configure_fit`` and ``configure_evaluate`` methods ensure the +clients to be sequentially selected given FL round: .. code-block:: python - def configure_fit( - self, server_round: int, parameters: Parameters, client_manager: ClientManager - ) -> List[Tuple[ClientProxy, FitIns]]: - """Configure the next round of training.""" - config = {} - if self.on_fit_config_fn is not None: - # Custom fit config function provided - config = self.on_fit_config_fn(server_round) - fit_ins = FitIns(parameters, config) - - # Sample clients - sample_size, min_num_clients = self.num_fit_clients( - client_manager.num_available() - ) - clients = client_manager.sample( - num_clients=sample_size, - min_num_clients=min_num_clients, - ) - - # Sample the clients sequentially given server_round - sampled_idx = (server_round - 1) % len(clients) - sampled_clients = [clients[sampled_idx]] - - # Return client/config pairs - return [(client, fit_ins) for client in sampled_clients] - - def configure_evaluate( - self, server_round: int, parameters: Parameters, client_manager: ClientManager - ) -> List[Tuple[ClientProxy, EvaluateIns]]: - """Configure the next round of evaluation.""" - # Do not configure federated evaluation if fraction eval is 0. - if self.fraction_evaluate == 0.0: - return [] - - # Parameters and config - config = {} - if self.on_evaluate_config_fn is not None: - # Custom evaluation config function provided - config = self.on_evaluate_config_fn(server_round) - evaluate_ins = EvaluateIns(parameters, config) - - # Sample clients - sample_size, min_num_clients = self.num_evaluation_clients( - client_manager.num_available() - ) - clients = client_manager.sample( - num_clients=sample_size, - min_num_clients=min_num_clients, - ) - - # Sample the clients sequentially given server_round - sampled_idx = (server_round - 1) % len(clients) - sampled_clients = [clients[sampled_idx]] - - # Return client/config pairs - return [(client, evaluate_ins) for client in sampled_clients] + def configure_fit( + self, server_round: int, parameters: Parameters, client_manager: ClientManager + ) -> List[Tuple[ClientProxy, FitIns]]: + """Configure the next round of training.""" + config = {} + if self.on_fit_config_fn is not None: + # Custom fit config function provided + config = self.on_fit_config_fn(server_round) + fit_ins = FitIns(parameters, config) + + # Sample clients + sample_size, min_num_clients = self.num_fit_clients(client_manager.num_available()) + clients = client_manager.sample( + num_clients=sample_size, + min_num_clients=min_num_clients, + ) + + # Sample the clients sequentially given server_round + sampled_idx = (server_round - 1) % len(clients) + sampled_clients = [clients[sampled_idx]] + + # Return client/config pairs + return [(client, fit_ins) for client in sampled_clients] + def configure_evaluate( + self, server_round: int, parameters: Parameters, client_manager: ClientManager + ) -> List[Tuple[ClientProxy, EvaluateIns]]: + """Configure the next round of evaluation.""" + # Do not configure federated evaluation if fraction eval is 0. + if self.fraction_evaluate == 0.0: + return [] + + # Parameters and config + config = {} + if self.on_evaluate_config_fn is not None: + # Custom evaluation config function provided + config = self.on_evaluate_config_fn(server_round) + evaluate_ins = EvaluateIns(parameters, config) + + # Sample clients + sample_size, min_num_clients = self.num_evaluation_clients( + client_manager.num_available() + ) + clients = client_manager.sample( + num_clients=sample_size, + min_num_clients=min_num_clients, + ) + + # Sample the clients sequentially given server_round + sampled_idx = (server_round - 1) % len(clients) + sampled_clients = [clients[sampled_idx]] + + # Return client/config pairs + return [(client, evaluate_ins) for client in sampled_clients] Customised data partitioning -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In :code:`dataset.py`, we have a function :code:`instantiate_partitioner` to instantiate the data partitioner -based on the given :code:`num_partitions` and :code:`partitioner_type`. -Currently, we provide four supported partitioner type to simulate the uniformity/non-uniformity in data quantity (uniform, linear, square, exponential). +In ``dataset.py``, we have a function ``instantiate_partitioner`` to instantiate the +data partitioner based on the given ``num_partitions`` and ``partitioner_type``. +Currently, we provide four supported partitioner type to simulate the +uniformity/non-uniformity in data quantity (uniform, linear, square, exponential). .. code-block:: python @@ -841,11 +869,10 @@ Currently, we provide four supported partitioner type to simulate the uniformity ) return partitioner - Customised centralised/distributed evaluation -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To facilitate centralised evaluation, we define a function in :code:`server_utils.py`: +To facilitate centralised evaluation, we define a function in ``server_utils.py``: .. code-block:: python @@ -877,105 +904,112 @@ To facilitate centralised evaluation, we define a function in :code:`server_util return evaluate_fn -This function returns a evaluation function which instantiates a :code:`Booster` object and loads the global model weights to it. -The evaluation is conducted by calling :code:`eval_set()` method, and the tested AUC value is reported. +This function returns a evaluation function which instantiates a ``Booster`` object and +loads the global model weights to it. The evaluation is conducted by calling +``eval_set()`` method, and the tested AUC value is reported. As for distributed evaluation on the clients, it's same as the quick-start example by -overriding the :code:`evaluate()` method insides the :code:`XgbClient` class in :code:`client_utils.py`. +overriding the ``evaluate()`` method insides the ``XgbClient`` class in +``client_utils.py``. Flower simulation -~~~~~~~~~~~~~~~~~~~~ -We also provide an example code (:code:`sim.py`) to use the simulation capabilities of Flower to simulate federated XGBoost training on either a single machine or a cluster of machines. - -.. code-block:: python +~~~~~~~~~~~~~~~~~ - from logging import INFO - import xgboost as xgb - from tqdm import tqdm - - import flwr as fl - from flwr_datasets import FederatedDataset - from flwr.common.logger import log - from flwr.server.strategy import FedXgbBagging, FedXgbCyclic - - from dataset import ( - instantiate_partitioner, - train_test_split, - transform_dataset_to_dmatrix, - separate_xy, - resplit, - ) - from utils import ( - sim_args_parser, - NUM_LOCAL_ROUND, - BST_PARAMS, - ) - from server_utils import ( - eval_config, - fit_config, - evaluate_metrics_aggregation, - get_evaluate_fn, - CyclicClientManager, - ) - from client_utils import XgbClient - -After importing all required packages, we define a :code:`main()` function to perform the simulation process: +We also provide an example code (``sim.py``) to use the simulation capabilities of +Flower to simulate federated XGBoost training on either a single machine or a cluster of +machines. .. code-block:: python - def main(): - # Parse arguments for experimental settings - args = sim_args_parser() + from logging import INFO + import xgboost as xgb + from tqdm import tqdm - # Load (HIGGS) dataset and conduct partitioning - partitioner = instantiate_partitioner( - partitioner_type=args.partitioner_type, num_partitions=args.pool_size + import flwr as fl + from flwr_datasets import FederatedDataset + from flwr.common.logger import log + from flwr.server.strategy import FedXgbBagging, FedXgbCyclic + + from dataset import ( + instantiate_partitioner, + train_test_split, + transform_dataset_to_dmatrix, + separate_xy, + resplit, + ) + from utils import ( + sim_args_parser, + NUM_LOCAL_ROUND, + BST_PARAMS, ) - fds = FederatedDataset( - dataset="jxie/higgs", - partitioners={"train": partitioner}, - resplitter=resplit, + from server_utils import ( + eval_config, + fit_config, + evaluate_metrics_aggregation, + get_evaluate_fn, + CyclicClientManager, ) + from client_utils import XgbClient - # Load centralised test set - if args.centralised_eval or args.centralised_eval_client: - log(INFO, "Loading centralised test set...") - test_data = fds.load_split("test") - test_data.set_format("numpy") - num_test = test_data.shape[0] - test_dmatrix = transform_dataset_to_dmatrix(test_data) - - # Load partitions and reformat data to DMatrix for xgboost - log(INFO, "Loading client local partitions...") - train_data_list = [] - valid_data_list = [] - - # Load and process all client partitions. This upfront cost is amortized soon - # after the simulation begins since clients wont need to preprocess their partition. - for node_id in tqdm(range(args.pool_size), desc="Extracting client partition"): - # Extract partition for client with node_id - partition = fds.load_partition(node_id=node_id, split="train") - partition.set_format("numpy") - - if args.centralised_eval_client: - # Use centralised test set for evaluation - train_data = partition - num_train = train_data.shape[0] - x_test, y_test = separate_xy(test_data) - valid_data_list.append(((x_test, y_test), num_test)) - else: - # Train/test splitting - train_data, valid_data, num_train, num_val = train_test_split( - partition, test_fraction=args.test_fraction, seed=args.seed - ) - x_valid, y_valid = separate_xy(valid_data) - valid_data_list.append(((x_valid, y_valid), num_val)) +After importing all required packages, we define a ``main()`` function to perform the +simulation process: - x_train, y_train = separate_xy(train_data) - train_data_list.append(((x_train, y_train), num_train)) +.. code-block:: python + + def main(): + # Parse arguments for experimental settings + args = sim_args_parser() -We first load the dataset and perform data partitioning, and the pre-processed data is stored in a :code:`list`. -After the simulation begins, the clients won't need to pre-process their partitions again. + # Load (HIGGS) dataset and conduct partitioning + partitioner = instantiate_partitioner( + partitioner_type=args.partitioner_type, num_partitions=args.pool_size + ) + fds = FederatedDataset( + dataset="jxie/higgs", + partitioners={"train": partitioner}, + resplitter=resplit, + ) + + # Load centralised test set + if args.centralised_eval or args.centralised_eval_client: + log(INFO, "Loading centralised test set...") + test_data = fds.load_split("test") + test_data.set_format("numpy") + num_test = test_data.shape[0] + test_dmatrix = transform_dataset_to_dmatrix(test_data) + + # Load partitions and reformat data to DMatrix for xgboost + log(INFO, "Loading client local partitions...") + train_data_list = [] + valid_data_list = [] + + # Load and process all client partitions. This upfront cost is amortized soon + # after the simulation begins since clients wont need to preprocess their partition. + for node_id in tqdm(range(args.pool_size), desc="Extracting client partition"): + # Extract partition for client with node_id + partition = fds.load_partition(node_id=node_id, split="train") + partition.set_format("numpy") + + if args.centralised_eval_client: + # Use centralised test set for evaluation + train_data = partition + num_train = train_data.shape[0] + x_test, y_test = separate_xy(test_data) + valid_data_list.append(((x_test, y_test), num_test)) + else: + # Train/test splitting + train_data, valid_data, num_train, num_val = train_test_split( + partition, test_fraction=args.test_fraction, seed=args.seed + ) + x_valid, y_valid = separate_xy(valid_data) + valid_data_list.append(((x_valid, y_valid), num_val)) + + x_train, y_train = separate_xy(train_data) + train_data_list.append(((x_train, y_train), num_train)) + +We first load the dataset and perform data partitioning, and the pre-processed data is +stored in a ``list``. After the simulation begins, the clients won't need to pre-process +their partitions again. Then, we define the strategies and other hyper-parameters: @@ -985,21 +1019,21 @@ Then, we define the strategies and other hyper-parameters: if args.train_method == "bagging": # Bagging training strategy = FedXgbBagging( - evaluate_function=get_evaluate_fn(test_dmatrix) - if args.centralised_eval - else None, + evaluate_function=( + get_evaluate_fn(test_dmatrix) if args.centralised_eval else None + ), fraction_fit=(float(args.num_clients_per_round) / args.pool_size), min_fit_clients=args.num_clients_per_round, min_available_clients=args.pool_size, - min_evaluate_clients=args.num_evaluate_clients - if not args.centralised_eval - else 0, + min_evaluate_clients=( + args.num_evaluate_clients if not args.centralised_eval else 0 + ), fraction_evaluate=1.0 if not args.centralised_eval else 0.0, on_evaluate_config_fn=eval_config, on_fit_config_fn=fit_config, - evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation - if not args.centralised_eval - else None, + evaluate_metrics_aggregation_fn=( + evaluate_metrics_aggregation if not args.centralised_eval else None + ), ) else: # Cyclic training @@ -1028,7 +1062,7 @@ Then, we define the strategies and other hyper-parameters: new_lr = params["eta"] / args.pool_size params.update({"eta": new_lr}) -After that, we start the simulation by calling :code:`fl.simulation.start_simulation`: +After that, we start the simulation by calling ``fl.simulation.start_simulation``: .. code-block:: python @@ -1048,53 +1082,52 @@ After that, we start the simulation by calling :code:`fl.simulation.start_simula client_manager=CyclicClientManager() if args.train_method == "cyclic" else None, ) -One of key parameters for :code:`start_simulation` is :code:`client_fn` which returns a function to construct a client. -We define it as follows: +One of key parameters for ``start_simulation`` is ``client_fn`` which returns a function +to construct a client. We define it as follows: .. code-block:: python - def get_client_fn( - train_data_list, valid_data_list, train_method, params, num_local_round - ): - """Return a function to construct a client. - - The VirtualClientEngine will execute this function whenever a client is sampled by - the strategy to participate. - """ - - def client_fn(cid: str) -> fl.client.Client: - """Construct a FlowerClient with its own dataset partition.""" - x_train, y_train = train_data_list[int(cid)][0] - x_valid, y_valid = valid_data_list[int(cid)][0] - - # Reformat data to DMatrix - train_dmatrix = xgb.DMatrix(x_train, label=y_train) - valid_dmatrix = xgb.DMatrix(x_valid, label=y_valid) - - # Fetch the number of examples - num_train = train_data_list[int(cid)][1] - num_val = valid_data_list[int(cid)][1] - - # Create and return client - return XgbClient( - train_dmatrix, - valid_dmatrix, - num_train, - num_val, - num_local_round, - params, - train_method, - ) - - return client_fn - + def get_client_fn( + train_data_list, valid_data_list, train_method, params, num_local_round + ): + """Return a function to construct a client. + + The VirtualClientEngine will execute this function whenever a client is sampled by + the strategy to participate. + """ + + def client_fn(cid: str) -> fl.client.Client: + """Construct a FlowerClient with its own dataset partition.""" + x_train, y_train = train_data_list[int(cid)][0] + x_valid, y_valid = valid_data_list[int(cid)][0] + + # Reformat data to DMatrix + train_dmatrix = xgb.DMatrix(x_train, label=y_train) + valid_dmatrix = xgb.DMatrix(x_valid, label=y_valid) + + # Fetch the number of examples + num_train = train_data_list[int(cid)][1] + num_val = valid_data_list[int(cid)][1] + + # Create and return client + return XgbClient( + train_dmatrix, + valid_dmatrix, + num_train, + num_val, + num_local_round, + params, + train_method, + ) + return client_fn Arguments parser -~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~ -In :code:`utils.py`, we define the arguments parsers for clients, server and simulation, allowing users to specify different experimental settings. -Let's first see the sever side: +In ``utils.py``, we define the arguments parsers for clients, server and simulation, +allowing users to specify different experimental settings. Let's first see the sever +side: .. code-block:: python @@ -1102,190 +1135,192 @@ Let's first see the sever side: def server_args_parser(): - """Parse arguments to define experimental settings on server side.""" - parser = argparse.ArgumentParser() - - parser.add_argument( - "--train-method", - default="bagging", - type=str, - choices=["bagging", "cyclic"], - help="Training methods selected from bagging aggregation or cyclic training.", - ) - parser.add_argument( - "--pool-size", default=2, type=int, help="Number of total clients." - ) - parser.add_argument( - "--num-rounds", default=5, type=int, help="Number of FL rounds." - ) - parser.add_argument( - "--num-clients-per-round", - default=2, - type=int, - help="Number of clients participate in training each round.", - ) - parser.add_argument( - "--num-evaluate-clients", - default=2, - type=int, - help="Number of clients selected for evaluation.", - ) - parser.add_argument( - "--centralised-eval", - action="store_true", - help="Conduct centralised evaluation (True), or client evaluation on hold-out data (False).", - ) - - args = parser.parse_args() - return args - -This allows user to specify training strategies / the number of total clients / FL rounds / participating clients / clients for evaluation, -and evaluation fashion. Note that with :code:`--centralised-eval`, the sever will do centralised evaluation -and all functionalities for client evaluation will be disabled. + """Parse arguments to define experimental settings on server side.""" + parser = argparse.ArgumentParser() + + parser.add_argument( + "--train-method", + default="bagging", + type=str, + choices=["bagging", "cyclic"], + help="Training methods selected from bagging aggregation or cyclic training.", + ) + parser.add_argument( + "--pool-size", default=2, type=int, help="Number of total clients." + ) + parser.add_argument( + "--num-rounds", default=5, type=int, help="Number of FL rounds." + ) + parser.add_argument( + "--num-clients-per-round", + default=2, + type=int, + help="Number of clients participate in training each round.", + ) + parser.add_argument( + "--num-evaluate-clients", + default=2, + type=int, + help="Number of clients selected for evaluation.", + ) + parser.add_argument( + "--centralised-eval", + action="store_true", + help="Conduct centralised evaluation (True), or client evaluation on hold-out data (False).", + ) + + args = parser.parse_args() + return args + +This allows user to specify training strategies / the number of total clients / FL +rounds / participating clients / clients for evaluation, and evaluation fashion. Note +that with ``--centralised-eval``, the sever will do centralised evaluation and all +functionalities for client evaluation will be disabled. Then, the argument parser on client side: .. code-block:: python def client_args_parser(): - """Parse arguments to define experimental settings on client side.""" - parser = argparse.ArgumentParser() - - parser.add_argument( - "--train-method", - default="bagging", - type=str, - choices=["bagging", "cyclic"], - help="Training methods selected from bagging aggregation or cyclic training.", - ) - parser.add_argument( - "--num-partitions", default=10, type=int, help="Number of partitions." - ) - parser.add_argument( - "--partitioner-type", - default="uniform", - type=str, - choices=["uniform", "linear", "square", "exponential"], - help="Partitioner types.", - ) - parser.add_argument( - "--node-id", - default=0, - type=int, - help="Node ID used for the current client.", - ) - parser.add_argument( - "--seed", default=42, type=int, help="Seed used for train/test splitting." - ) - parser.add_argument( - "--test-fraction", - default=0.2, - type=float, - help="Test fraction for train/test splitting.", - ) - parser.add_argument( - "--centralised-eval", - action="store_true", - help="Conduct evaluation on centralised test set (True), or on hold-out data (False).", - ) - parser.add_argument( - "--scaled-lr", - action="store_true", - help="Perform scaled learning rate based on the number of clients (True).", - ) - - args = parser.parse_args() - return args - -This defines various options for client data partitioning. -Besides, clients also have an option to conduct evaluation on centralised test set by setting :code:`--centralised-eval`, -as well as an option to perform scaled learning rate based on the number of clients by setting :code:`--scaled-lr`. + """Parse arguments to define experimental settings on client side.""" + parser = argparse.ArgumentParser() + + parser.add_argument( + "--train-method", + default="bagging", + type=str, + choices=["bagging", "cyclic"], + help="Training methods selected from bagging aggregation or cyclic training.", + ) + parser.add_argument( + "--num-partitions", default=10, type=int, help="Number of partitions." + ) + parser.add_argument( + "--partitioner-type", + default="uniform", + type=str, + choices=["uniform", "linear", "square", "exponential"], + help="Partitioner types.", + ) + parser.add_argument( + "--node-id", + default=0, + type=int, + help="Node ID used for the current client.", + ) + parser.add_argument( + "--seed", default=42, type=int, help="Seed used for train/test splitting." + ) + parser.add_argument( + "--test-fraction", + default=0.2, + type=float, + help="Test fraction for train/test splitting.", + ) + parser.add_argument( + "--centralised-eval", + action="store_true", + help="Conduct evaluation on centralised test set (True), or on hold-out data (False).", + ) + parser.add_argument( + "--scaled-lr", + action="store_true", + help="Perform scaled learning rate based on the number of clients (True).", + ) + + args = parser.parse_args() + return args + +This defines various options for client data partitioning. Besides, clients also have an +option to conduct evaluation on centralised test set by setting ``--centralised-eval``, +as well as an option to perform scaled learning rate based on the number of clients by +setting ``--scaled-lr``. We also have an argument parser for simulation: .. code-block:: python - def sim_args_parser(): - """Parse arguments to define experimental settings on server side.""" - parser = argparse.ArgumentParser() - - parser.add_argument( - "--train-method", - default="bagging", - type=str, - choices=["bagging", "cyclic"], - help="Training methods selected from bagging aggregation or cyclic training.", - ) - - # Server side - parser.add_argument( - "--pool-size", default=5, type=int, help="Number of total clients." - ) - parser.add_argument( - "--num-rounds", default=30, type=int, help="Number of FL rounds." - ) - parser.add_argument( - "--num-clients-per-round", - default=5, - type=int, - help="Number of clients participate in training each round.", - ) - parser.add_argument( - "--num-evaluate-clients", - default=5, - type=int, - help="Number of clients selected for evaluation.", - ) - parser.add_argument( - "--centralised-eval", - action="store_true", - help="Conduct centralised evaluation (True), or client evaluation on hold-out data (False).", - ) - parser.add_argument( - "--num-cpus-per-client", - default=2, - type=int, - help="Number of CPUs used for per client.", - ) - - # Client side - parser.add_argument( - "--partitioner-type", - default="uniform", - type=str, - choices=["uniform", "linear", "square", "exponential"], - help="Partitioner types.", - ) - parser.add_argument( - "--seed", default=42, type=int, help="Seed used for train/test splitting." - ) - parser.add_argument( - "--test-fraction", - default=0.2, - type=float, - help="Test fraction for train/test splitting.", - ) - parser.add_argument( - "--centralised-eval-client", - action="store_true", - help="Conduct evaluation on centralised test set (True), or on hold-out data (False).", - ) - parser.add_argument( - "--scaled-lr", - action="store_true", - help="Perform scaled learning rate based on the number of clients (True).", - ) - - args = parser.parse_args() - return args + def sim_args_parser(): + """Parse arguments to define experimental settings on server side.""" + parser = argparse.ArgumentParser() + + parser.add_argument( + "--train-method", + default="bagging", + type=str, + choices=["bagging", "cyclic"], + help="Training methods selected from bagging aggregation or cyclic training.", + ) + + # Server side + parser.add_argument( + "--pool-size", default=5, type=int, help="Number of total clients." + ) + parser.add_argument( + "--num-rounds", default=30, type=int, help="Number of FL rounds." + ) + parser.add_argument( + "--num-clients-per-round", + default=5, + type=int, + help="Number of clients participate in training each round.", + ) + parser.add_argument( + "--num-evaluate-clients", + default=5, + type=int, + help="Number of clients selected for evaluation.", + ) + parser.add_argument( + "--centralised-eval", + action="store_true", + help="Conduct centralised evaluation (True), or client evaluation on hold-out data (False).", + ) + parser.add_argument( + "--num-cpus-per-client", + default=2, + type=int, + help="Number of CPUs used for per client.", + ) + + # Client side + parser.add_argument( + "--partitioner-type", + default="uniform", + type=str, + choices=["uniform", "linear", "square", "exponential"], + help="Partitioner types.", + ) + parser.add_argument( + "--seed", default=42, type=int, help="Seed used for train/test splitting." + ) + parser.add_argument( + "--test-fraction", + default=0.2, + type=float, + help="Test fraction for train/test splitting.", + ) + parser.add_argument( + "--centralised-eval-client", + action="store_true", + help="Conduct evaluation on centralised test set (True), or on hold-out data (False).", + ) + parser.add_argument( + "--scaled-lr", + action="store_true", + help="Perform scaled learning rate based on the number of clients (True).", + ) + + args = parser.parse_args() + return args This integrates all arguments for both client and server sides. Example commands -~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~ -To run a centralised evaluated experiment with bagging strategy on 5 clients with exponential distribution for 50 rounds, -we first start the server as below: +To run a centralised evaluated experiment with bagging strategy on 5 clients with +exponential distribution for 50 rounds, we first start the server as below: .. code-block:: shell @@ -1303,4 +1338,6 @@ To run the same experiment with Flower simulation: $ python3 sim.py --train-method=bagging --pool-size=5 --num-rounds=50 --num-clients-per-round=5 --partitioner-type=exponential --centralised-eval -The full `code `_ for this comprehensive example can be found in :code:`examples/xgboost-comprehensive`. +The full `code +`_ for this +comprehensive example can be found in ``examples/xgboost-comprehensive``. diff --git a/pyproject.toml b/pyproject.toml index f4555a6f1762..81c1369f6552 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -136,6 +136,15 @@ licensecheck = "==2024" pre-commit = "==3.5.0" sphinx-substitution-extensions = "2022.02.16" sphinxext-opengraph = "==0.9.1" +docstrfmt = { git = "https://github.com/charlesbvll/docstrfmt.git", branch = "patch-1" } + +[tool.docstrfmt] +extend_exclude = [ + "doc/source/conf.py", + "doc/source/tutorial-quickstart-huggingface.rst", + "doc/source/_templates/autosummary/*", + "doc/source/ref-api/*", +] [tool.isort] profile = "black"