diff --git a/.github/workflows/docker-server.yml b/.github/workflows/docker-superlink.yml similarity index 56% rename from .github/workflows/docker-server.yml rename to .github/workflows/docker-superlink.yml index f3717aa28dc6..b6a6b4114ba4 100644 --- a/.github/workflows/docker-server.yml +++ b/.github/workflows/docker-superlink.yml @@ -1,50 +1,45 @@ -name: Build docker server image +name: Build docker SuperLink image on: workflow_dispatch: inputs: flwr-version: - description: "Version of Flower" + description: "Version of Flower." required: true type: string - base-image-tag: - description: "The tag of the Flower base image." - required: false - type: string - default: "py3.11-ubuntu22.04" permissions: contents: read jobs: - build-server-images: + build-superlink-images: name: Build images uses: ./.github/workflows/_docker-build.yml # run only on default branch when using it with workflow_dispatch if: github.ref_name == github.event.repository.default_branch with: - namespace-repository: flwr/server - file-dir: src/docker/server + namespace-repository: flwr/superlink + file-dir: src/docker/superlink build-args: | FLWR_VERSION=${{ github.event.inputs.flwr-version }} - BASE_IMAGE_TAG=${{ github.event.inputs.base-image-tag }} + PYTHON_VERSION=3.11 + UBUNTU_VERSION=ubuntu22.04 tags: | - ${{ github.event.inputs.flwr-version }}-${{ github.event.inputs.base-image-tag }} + ${{ github.event.inputs.flwr-version }}-py3.11-ubuntu22.04 ${{ github.event.inputs.flwr-version }} - latest secrets: dockerhub-user: ${{ secrets.DOCKERHUB_USERNAME }} dockerhub-token: ${{ secrets.DOCKERHUB_TOKEN }} summary: - name: Build images + name: Summary runs-on: ubuntu-22.04 - needs: build-server-images + needs: build-superlink-images timeout-minutes: 10 steps: - run: | echo "### Images" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - for IMAGE in $(echo ${{ toJson(needs.build-server-images.outputs.metadata) }} | jq -r '.tags[]' ); do + for IMAGE in $(echo ${{ toJson(needs.build-superlink-images.outputs.metadata) }} | jq -r '.tags[]' ); do echo "- $IMAGE" >> $GITHUB_STEP_SUMMARY done diff --git a/.github/workflows/release-nightly.yml b/.github/workflows/release-nightly.yml index 823ff1513790..f30f01373245 100644 --- a/.github/workflows/release-nightly.yml +++ b/.github/workflows/release-nightly.yml @@ -8,16 +8,56 @@ env: FLWR_TELEMETRY_ENABLED: 0 jobs: - release_nightly: + release-nightly: runs-on: ubuntu-22.04 - name: Nightly + name: Relase nightly on PyPI if: github.repository == 'adap/flower' + outputs: + name: ${{ steps.release.outputs.name }} + version: ${{ steps.release.outputs.version }} steps: - uses: actions/checkout@v4 - name: Bootstrap uses: ./.github/actions/bootstrap - name: Release nightly + id: release env: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} run: | ./dev/publish-nightly.sh + echo "name=$(poetry version | awk {'print $1'})" >> $GITHUB_OUTPUT + echo "version=$(poetry version -s)" >> $GITHUB_OUTPUT + + wait-on-pypi: + runs-on: ubuntu-22.04 + timeout-minutes: 5 + name: Wait on PyPI + needs: release-nightly + steps: + - uses: actions/checkout@v4 + - name: Bootstrap + uses: ./.github/actions/bootstrap + - name: Wait until flwr package is avaiale on PyPI + run: until pip install ${{ needs.release-nightly.outputs.name }}==${{ needs.release-nightly.outputs.version }} --dry-run; do echo "Try again"; sleep 10; done + + build-docker-images: + name: Build nightly images + if: github.repository == 'adap/flower' + uses: ./.github/workflows/_docker-build.yml + needs: [release-nightly, wait-on-pypi] + strategy: + fail-fast: false + matrix: + images: [{ repository: "flwr/superlink", file-dir: "src/docker/superlink" }] + with: + namespace-repository: ${{ matrix.images.repository }} + file-dir: ${{ matrix.images.file-dir }} + build-args: | + FLWR_VERSION=${{ needs.release-nightly.outputs.version }} + FLWR_PACKAGE=${{ needs.release-nightly.outputs.name }} + tags: | + ${{ needs.release-nightly.outputs.version }} + nightly + secrets: + dockerhub-user: ${{ secrets.DOCKERHUB_USERNAME }} + dockerhub-token: ${{ secrets.DOCKERHUB_TOKEN }} diff --git a/dev/publish-nightly.sh b/dev/publish-nightly.sh index f3e8d170f6c5..0c03cdda9f49 100755 --- a/dev/publish-nightly.sh +++ b/dev/publish-nightly.sh @@ -16,18 +16,18 @@ # ============================================================================== set -e -cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/../ +cd "$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"/../ # This script will build and publish a nightly release of Flower under the condition # that at least one commit was made in the last 24 hours. # It will rename the package name in the pyproject.toml to from "flwr" to "flwr-nightly". # The version name in the pyproject.toml will be appended with "-dev" and the current date. -# The result will be a release on PyPi of the package "flwr-nightly" of version e.g. +# The result will be a release on PyPi of the package "flwr-nightly" of version e.g. # "0.1.1.dev20200716" as seen at https://pypi.org/project/flwr-nightly/ if [[ $(git log --since="24 hours ago" --pretty=oneline) ]]; then sed -i -E "s/^name = \"(.+)\"/name = \"\1-nightly\"/" pyproject.toml - sed -i -E "s/^version = \"(.+)\"/version = \"\1-dev$(date '+%Y%m%d')\"/" pyproject.toml + sed -i -E "s/^version = \"(.+)\"/version = \"\1.dev$(date '+%Y%m%d')\"/" pyproject.toml python -m poetry build python -m poetry publish -u __token__ -p $PYPI_TOKEN else diff --git a/doc/source/contributor-how-to-build-docker-images.rst b/doc/source/contributor-how-to-build-docker-images.rst index 5dead265bee2..bac201f6a7b9 100644 --- a/doc/source/contributor-how-to-build-docker-images.rst +++ b/doc/source/contributor-how-to-build-docker-images.rst @@ -1,8 +1,8 @@ How to build Docker Flower images locally ========================================= -Flower provides pre-made docker images on `Docker Hub `_ -that include all necessary dependencies for running the server. You can also build your own custom +Flower provides pre-made docker images on `Docker Hub `_ +that include all necessary dependencies for running the SuperLink. You can also build your own custom docker images from scratch with a different version of Python or Ubuntu if that is what you need. In this guide, we will explain what images exist and how to build them locally. @@ -20,15 +20,15 @@ Before we can start, we need to meet a few prerequisites in our local developmen :doc:`Run Flower using Docker ` which covers this step in more detail. -Currently, Flower provides two images, a base image and a server image. There will also be a client -image soon. The base image, as the name suggests, contains basic dependencies that both the server -and the client need. This includes system dependencies, Python and Python tools. The server image is -based on the base image, but it additionally installs the Flower server using ``pip``. +Currently, Flower provides two images, a ``base`` image and a ``superlink`` image. The base image, +as the name suggests, contains basic dependencies that the SuperLink needs. +This includes system dependencies, Python and Python tools. The SuperLink image is +based on the base image, but it additionally installs the SuperLink using ``pip``. The build instructions that assemble the images are located in the respective Dockerfiles. You can find them in the subdirectories of ``src/docker``. -Both, base and server image are configured via build arguments. Through build arguments, we can make +Both, base and SuperLink image are configured via build arguments. Through build arguments, we can make our build more flexible. For example, in the base image, we can specify the version of Python to install using the ``PYTHON_VERSION`` build argument. Some of the build arguments have default values, others must be specified when building the image. All available build arguments for each @@ -76,8 +76,8 @@ The following example creates a base image with Python 3.11.0, pip 23.0.1 and se The name of image is ``flwr_base`` and the tag ``0.1.0``. Remember that the build arguments as well as the name and tag can be adapted to your needs. These values serve as examples only. -Building the server image -------------------------- +Building the SuperLink image +---------------------------- .. list-table:: :widths: 25 45 15 15 @@ -89,47 +89,53 @@ Building the server image - Example * - ``BASE_REPOSITORY`` - The repository name of the base image. - - Defaults to ``flwr/server``. + - Defaults to ``flwr/base``. - - * - ``BASE_IMAGE_TAG`` - - The image tag of the base image. - - Defaults to ``py3.11-ubuntu22.04``. + * - ``PYTHON_VERSION`` + - The Python version of the base image. + - Defaults to ``py3.11``. + - + * - ``UBUNTU_VERSION`` + - The Ubuntu version of the base image. + - Defaults to ``ubuntu22.04``. + - + * - ``FLWR_PACKAGE`` + - The PyPI package to install. + - Defaults to ``flwr``. - * - ``FLWR_VERSION`` - Version of Flower to be installed. - Yes - - ``1.7.0`` + - ``1.8.0`` -The following example creates a server image with the official Flower base image py3.11-ubuntu22.04 -and Flower 1.7.0: + +The following example creates a SuperLink image with the official Flower base image +py3.11-ubuntu22.04 and Flower 1.8.0: .. code-block:: bash - $ cd src/docker/server/ + $ cd src/docker/superlink/ $ docker build \ - --build-arg BASE_IMAGE_TAG=py3.11-ubuntu22.04 \ - --build-arg FLWR_VERSION=1.7.0 \ - -t flwr_server:0.1.0 . + --build-arg FLWR_VERSION=1.8.0 \ + -t flwr_superlink:0.1.0 . -The name of image is ``flwr_server`` and the tag ``0.1.0``. Remember that the build arguments as well -as the name and tag can be adapted to your needs. These values serve as examples only. +The name of image is ``flwr_superlink`` and the tag ``0.1.0``. Remember that the build arguments as +well as the name and tag can be adapted to your needs. These values serve as examples only. If you want to use your own base image instead of the official Flower base image, all you need to do -is set the ``BASE_REPOSITORY`` and ``BASE_IMAGE_TAG`` build arguments. The value of -``BASE_REPOSITORY`` must match the name of your image and the value of ``BASE_IMAGE_TAG`` must match -the tag of your image. - +is set the ``BASE_REPOSITORY``, ``PYTHON_VERSION`` and ``UBUNTU_VERSION`` build arguments. .. code-block:: bash - $ cd src/docker/server/ + $ cd src/docker/superlink/ $ docker build \ --build-arg BASE_REPOSITORY=flwr_base \ - --build-arg BASE_IMAGE_TAG=0.1.0 \ - --build-arg FLWR_VERSION=1.7.0 \ - -t flwr_server:0.1.0 . + --build-arg PYTHON_VERSION=3.11 \ + --build-arg UBUNTU_VERSION=ubuntu22.04 \ + --build-arg FLWR_VERSION=1.8.0 \ + -t flwr_superlink:0.1.0 . After creating the image, we can test whether the image is working: .. code-block:: bash - $ docker run --rm flwr_server:0.1.0 --help + $ docker run --rm flwr_superlink:0.1.0 --help diff --git a/doc/source/how-to-run-flower-using-docker.rst b/doc/source/how-to-run-flower-using-docker.rst index ed034c820142..25262109f247 100644 --- a/doc/source/how-to-run-flower-using-docker.rst +++ b/doc/source/how-to-run-flower-using-docker.rst @@ -2,14 +2,14 @@ Run Flower using Docker ======================= The simplest way to get started with Flower is by using the pre-made Docker images, which you can -find on `Docker Hub `_. +find on `Docker Hub `_. Before you start, make sure that the Docker daemon is running: .. code-block:: bash $ docker -v - Docker version 24.0.7, build afdd53b + Docker version 26.0.0, build 2ae903e If you do not see the version of Docker but instead get an error saying that the command was not found, you will need to install Docker first. You can find installation instruction @@ -21,8 +21,8 @@ was not found, you will need to install Docker first. You can find installation you can follow the `Post-installation steps `_ on the official Docker website. -Flower server -------------- +Flower SuperLink +---------------- Quickstart ~~~~~~~~~~ @@ -31,43 +31,41 @@ If you're looking to try out Flower, you can use the following command: .. code-block:: bash - $ docker run --rm -p 9091:9091 -p 9092:9092 flwr/server:1.7.0-py3.11-ubuntu22.04 \ - --insecure + $ docker run --rm -p 9091:9091 -p 9092:9092 flwr/superlink:1.8.0 --insecure -The command will pull the Docker image with the tag ``1.7.0-py3.11-ubuntu22.04`` from Docker Hub. -The tag contains the information which Flower, Python and Ubuntu is used. In this case, it -uses Flower 1.7.0, Python 3.11 and Ubuntu 22.04. The ``--rm`` flag tells Docker to remove -the container after it exits. +The command pulls the Docker image with the tag ``1.8.0`` from Docker Hub. The tag specifies +the Flower version. In this case, Flower 1.8.0. The ``--rm`` flag tells Docker to remove the +container after it exits. .. note:: - By default, the Flower server keeps state in-memory. When using the Docker flag - ``--rm``, the state is not persisted between container starts. We will show below how to save the - state in a file on your host system. + By default, the Flower SuperLink keeps state in-memory. When using the Docker flag ``--rm``, the + state is not persisted between container starts. We will show below how to save the state in a + file on your host system. The ``-p :`` flag tells Docker to map the ports ``9091``/``9092`` of the host to ``9091``/``9092`` of the container, allowing you to access the Driver API on ``http://localhost:9091`` and the Fleet API on ``http://localhost:9092``. Lastly, any flag that comes after the tag is passed -to the Flower server. Here, we are passing the flag ``--insecure``. +to the Flower SuperLink. Here, we are passing the flag ``--insecure``. .. attention:: - The ``--insecure`` flag enables insecure communication (using HTTP, not HTTPS) and should only be used - for testing purposes. We strongly recommend enabling + The ``--insecure`` flag enables insecure communication (using HTTP, not HTTPS) and should only be + used for testing purposes. We strongly recommend enabling `SSL `_ when deploying to a production environment. -You can use ``--help`` to view all available flags that the server supports: +You can use ``--help`` to view all available flags that the SuperLink supports: .. code-block:: bash - $ docker run --rm flwr/server:1.7.0-py3.11-ubuntu22.04 --help + $ docker run --rm flwr/superlink:1.8.0 --help Mounting a volume to store the state on the host system ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If you want to persist the state of the server on your host system, all you need to do is specify a -path where you want to save the file on your host system and a name for the database file. In the +If you want to persist the state of the SuperLink on your host system, all you need to do is specify +a path where you want to save the file on your host system and a name for the database file. In the example below, we tell Docker via the flag ``-v`` to mount the user's home directory (``~/`` on your host) into the ``/app/`` directory of the container. Furthermore, we use the flag ``--database`` to specify the name of the database file. @@ -75,18 +73,19 @@ flag ``--database`` to specify the name of the database file. .. code-block:: bash $ docker run --rm \ - -p 9091:9091 -p 9092:9092 -v ~/:/app/ flwr/server:1.7.0-py3.11-ubuntu22.04 \ + -p 9091:9091 -p 9092:9092 -v ~/:/app/ flwr/superlink:1.8.0 \ --insecure \ --database state.db -As soon as the server starts, the file ``state.db`` is created in the user's home directory on -your host system. If the file already exists, the server tries to restore the state from the file. -To start the server with an empty database, simply remove the ``state.db`` file. +As soon as the SuperLink starts, the file ``state.db`` is created in the user's home directory on +your host system. If the file already exists, the SuperLink tries to restore the state from the +file. To start the SuperLink with an empty database, simply remove the ``state.db`` file. Enabling SSL for secure connections ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -To enable SSL, you will need a CA certificate, a server certificate and a server private key. +To enable SSL, you will need a PEM-encoded root certificate, a PEM-encoded private key and a +PEM-encoded certificate chain. .. note:: For testing purposes, you can generate your own self-signed certificates. The @@ -95,20 +94,21 @@ To enable SSL, you will need a CA certificate, a server certificate and a server Assuming all files we need are in the local ``certificates`` directory, we can use the flag ``-v`` to mount the local directory into the ``/app/`` directory of the container. This allows the -server to access the files within the container. Finally, we pass the names of the certificates to -the server with the ``--certificates`` flag. +SuperLink to access the files within the container. Finally, we pass the names of the certificates +to the SuperLink with the ``--certificates`` flag. .. code-block:: bash $ docker run --rm \ - -p 9091:9091 -p 9092:9092 -v ./certificates/:/app/ flwr/server:1.7.0-py3.11-ubuntu22.04 \ + -p 9091:9091 -p 9092:9092 -v ./certificates/:/app/ flwr/superlink:1.8.0 \ --certificates ca.crt server.pem server.key -Using a different Flower or Python version -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Using a different Flower version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If you want to use a different version of Flower or Python, you can do so by changing the tag. -All versions we provide are available on `Docker Hub `_. +If you want to use a different version of Flower, for example Flower nightly, you can do so by +changing the tag. All available versions are on +`Docker Hub `_. Pinning a Docker image to a specific version ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -118,19 +118,19 @@ updates of system dependencies that should not change the functionality of Flowe want to ensure that you always use the same image, you can specify the hash of the image instead of the tag. -The following command returns the current image hash referenced by the ``server:1.7.0-py3.11-ubuntu22.04`` tag: +The following command returns the current image hash referenced by the ``superlink:1.8.0`` tag: .. code-block:: bash - $ docker inspect --format='{{index .RepoDigests 0}}' flwr/server:1.7.0-py3.11-ubuntu22.04 - flwr/server@sha256:c4be5012f9d73e3022e98735a889a463bb2f4f434448ebc19c61379920b1b327 + $ docker inspect --format='{{index .RepoDigests 0}}' flwr/superlink:1.8.0 + flwr/superlink@sha256:1b855d1fa4e344e4d95db99793f2bb35d8c63f6a1decdd736863bfe4bb0fe46c -Next, we can pin the hash when running a new server container: +Next, we can pin the hash when running a new SuperLink container: .. code-block:: bash $ docker run \ - --rm flwr/server@sha256:c4be5012f9d73e3022e98735a889a463bb2f4f434448ebc19c61379920b1b327 \ + --rm flwr/superlink@sha256:1b855d1fa4e344e4d95db99793f2bb35d8c63f6a1decdd736863bfe4bb0fe46c \ --insecure Setting environment variables @@ -141,4 +141,4 @@ To set a variable inside a Docker container, you can use the ``-e = .. code-block:: bash $ docker run -e FLWR_TELEMETRY_ENABLED=0 \ - --rm flwr/server:1.7.0-py3.11-ubuntu22.04 --insecure + --rm flwr/superlink:1.8.0 --insecure diff --git a/doc/source/how-to-upgrade-to-flower-next.rst b/doc/source/how-to-upgrade-to-flower-next.rst new file mode 100644 index 000000000000..0df0152caead --- /dev/null +++ b/doc/source/how-to-upgrade-to-flower-next.rst @@ -0,0 +1,333 @@ +Upgrade to Flower Next +====================== + +Welcome to the migration guide for updating Flower to Flower Next! Whether you're a seasoned user +or just getting started, this guide will help you smoothly transition your existing setup to take +advantage of the latest features and improvements in Flower Next, starting from version 1.8. + +.. note:: + This guide shows how to reuse pre-``1.8`` Flower code with minimum code changes by + using the *compatibility layer* in Flower Next. In another guide, we will show how + to run Flower Next end-to-end with pure Flower Next APIs. + +Let's dive in! + +.. + Generate link text as literal. Refs: + - https://stackoverflow.com/q/71651598 + - https://github.com/jgm/pandoc/issues/3973#issuecomment-337087394 + +.. |clientapp_link| replace:: ``ClientApp()`` +.. |serverapp_link| replace:: ``ServerApp()`` +.. |startclient_link| replace:: ``start_client()`` +.. |startserver_link| replace:: ``start_server()`` +.. |startsim_link| replace:: ``start_simulation()`` +.. |runsimcli_link| replace:: ``flower-simulation`` +.. |runsim_link| replace:: ``run_simulation()`` +.. |flowernext_superlink_link| replace:: ``flower-superlink`` +.. |flowernext_clientapp_link| replace:: ``flower-client-app`` +.. |flowernext_serverapp_link| replace:: ``flower-server-app`` +.. _clientapp_link: ref-api/flwr.client.ClientApp.html +.. _serverapp_link: ref-api/flwr.server.ServerApp.html +.. _startclient_link: ref-api/flwr.client.start_client.html +.. _startserver_link: ref-api/flwr.server.start_server.html +.. _startsim_link: ref-api/flwr.simulation.start_simulation.html +.. _runsimcli_link: ref-api/flwr.simulation.run_simulation_from_cli.html +.. _runsim_link: ref-api/flwr.simulation.run_simulation.html +.. _flowernext_superlink_link: ref-api-cli.html#flower-superlink +.. _flowernext_clientapp_link: ref-api-cli.html#flower-client-app +.. _flowernext_serverapp_link: ref-api-cli.html#flower-server-app + +Install update +-------------- + +Using pip +~~~~~~~~~ + +Here's how to update an existing installation of Flower to Flower Next with ``pip``: + +.. code-block:: bash + + $ python -m pip install -U flwr + +or if you need Flower Next with simulation: + +.. code-block:: bash + + $ python -m pip install -U flwr[simulation] + + +Ensure you set the following version constraint in your ``requirements.txt`` + +.. code-block:: + + # Without simulation support + flwr>=1.8,<2.0 + + # With simulation support + flwr[simulation]>=1.8, <2.0 + +or ``pyproject.toml``: + +.. code-block:: toml + + # Without simulation support + dependencies = ["flwr>=1.8,2.0"] + + # With simulation support + dependencies = ["flwr[simulation]>=1.8,2.0"] + +Using Poetry +~~~~~~~~~~~~ + +Update the ``flwr`` dependency in ``pyproject.toml`` and then reinstall (don't forget to delete ``poetry.lock`` via ``rm poetry.lock`` before running ``poetry install``). + +Ensure you set the following version constraint in your ``pyproject.toml``: + +.. code-block:: toml + + [tool.poetry.dependencies] + python = "^3.8" + + # Without simulation support + flwr = ">=1.8,<2.0" + + # With simulation support + flwr = { version = ">=1.8,<2.0", extras = ["simulation"] } + +Required changes +---------------- + +In Flower Next, the *infrastructure* and *application layers* have been decoupled. +Instead of starting a client in code via ``start_client()``, you create a |clientapp_link|_ and start it via the command line. +Instead of starting a server in code via ``start_server()``, you create a |serverapp_link|_ and start it via the command line. +The long-running components of server and client are called SuperLink and SuperNode. +The following non-breaking changes that require manual updates and allow you to run your project both in the traditional way and in the Flower Next way: + +|clientapp_link|_ +~~~~~~~~~~~~~~~~~ +- Wrap your existing client with |clientapp_link|_ instead of launching it via + |startclient_link|_. Here's an example: + +.. code-block:: python + :emphasize-lines: 5,11 + + # Flower 1.8 + def client_fn(cid: str): + return flwr.client.FlowerClient().to_client() + + app = flwr.client.ClientApp( + client_fn=client_fn, + ) + + # Flower 1.7 + if __name__ == "__main__": + flwr.client.start_client( + server_address="127.0.0.1:8080", + client=flwr.client.FlowerClient().to_client(), + ) + +|serverapp_link|_ +~~~~~~~~~~~~~~~~~ +- Wrap your existing strategy with |serverapp_link|_ instead of starting the server + via |startserver_link|_. Here's an example: + +.. code-block:: python + :emphasize-lines: 2,9 + + # Flower 1.8 + app = flwr.server.ServerApp( + config=config, + strategy=strategy, + ) + + # Flower 1.7 + if __name__ == "__main__": + flwr.server.start_server( + server_address="0.0.0.0:8080", + config=config, + strategy=strategy, + ) + +Deployment +~~~~~~~~~~ +- Run the ``SuperLink`` using |flowernext_superlink_link|_ before running, in sequence, + |flowernext_clientapp_link|_ (2x) and |flowernext_serverapp_link|_. There is no need to + execute `client.py` and `server.py` as Python scripts. +- Here's an example to start the server without HTTPS (only for prototyping): + +.. code-block:: bash + + # Start a Superlink + $ flower-superlink --insecure + + # In a new terminal window, start a long-running SuperNode + $ flower-client-app client:app --insecure + + # In another terminal window, start another long-running SuperNode (at least 2 SuperNodes are required) + $ flower-client-app client:app --insecure + + # In yet another terminal window, run the ServerApp (this starts the actual training run) + $ flower-server-app server:app --insecure + +- Here's another example to start with HTTPS. Use the ``--certificates`` command line + argument to pass paths to (CA certificate, server certificate, and server private key). + +.. code-block:: bash + + # Start a secure Superlink + $ flower-superlink --certificates \ + \ + \ + + + # In a new terminal window, start a long-running secure SuperNode + $ flower-client-app client:app \ + --root-certificates \ + --server 127.0.0.1:9092 + + # In another terminal window, start another long-running secure SuperNode (at least 2 SuperNodes are required) + $ flower-client-app client:app \ + --root-certificates \ + --server 127.0.0.1:9092 + + # In yet another terminal window, run the ServerApp (this starts the actual training run) + $ flower-server-app server:app \ + --root-certificates \ + --server 127.0.0.1:9091 + +Simulation in CLI +~~~~~~~~~~~~~~~~~ +- Wrap your existing client and strategy with |clientapp_link|_ and |serverapp_link|_, + respectively. There is no need to use |startsim_link|_ anymore. Here's an example: + +.. code-block:: python + :emphasize-lines: 9,13,20 + + # Regular Flower client implementation + class FlowerClient(NumPyClient): + # ... + + # Flower 1.8 + def client_fn(cid: str): + return FlowerClient().to_client() + + client_app = flwr.client.ClientApp( + client_fn=client_fn, + ) + + server_app = flwr.server.ServerApp( + config=config, + strategy=strategy, + ) + + # Flower 1.7 + if __name__ == "__main__": + hist = flwr.simulation.start_simulation( + num_clients=100, + ... + ) + +- Run |runsimcli_link|_ in CLI and point to the ``server_app`` / ``client_app`` object in the + code instead of executing the Python script. Here's an example (assuming the + ``server_app`` and ``client_app`` objects are in a ``sim.py`` module): + +.. code-block:: bash + + # Flower 1.8 + $ flower-simulation \ + --server-app=sim:server_app \ + --client-app=sim:client_app \ + --num-supernodes=100 + +.. code-block:: python + + # Flower 1.7 + $ python sim.py + +- Set default resources for each |clientapp_link|_ using the ``--backend-config`` command + line argument instead of setting the ``client_resources`` argument in + |startsim_link|_. Here's an example: + +.. code-block:: bash + :emphasize-lines: 6 + + # Flower 1.8 + $ flower-simulation \ + --client-app=sim:client_app \ + --server-app=sim:server_app \ + --num-supernodes=100 \ + --backend-config='{"client_resources": {"num_cpus": 2, "num_gpus": 0.25}}' + +.. code-block:: python + :emphasize-lines: 5 + + # Flower 1.7 (in `sim.py`) + if __name__ == "__main__": + hist = flwr.simulation.start_simulation( + num_clients=100, + client_resources = {'num_cpus': 2, "num_gpus": 0.25}, + ... + ) + +Simulation in a Notebook +~~~~~~~~~~~~~~~~~~~~~~~~ +- Run |runsim_link|_ in your notebook instead of |startsim_link|_. Here's an example: + +.. code-block:: python + :emphasize-lines: 19,27 + + NUM_CLIENTS = + + def client_fn(cid: str): + # ... + return FlowerClient().to_client() + + client_app = flwr.client.ClientApp( + client_fn=client_fn, + ) + + server_app = flwr.server.ServerApp( + config=config, + strategy=strategy, + ) + + backend_config = {"client_resources": {"num_cpus": 2, "num_gpus": 0.25}} + + # Flower 1.8 + flwr.simulation.run_simulation( + server_app=server_app, + client_app=client_app, + num_supernodes=NUM_CLIENTS, + backend_config=backend_config, + ) + + # Flower 1.7 + flwr.simulation.start_simulation( + client_fn=client_fn, + num_clients=NUM_CLIENTS, + config=config, + strategy=strategy, + client_resources=backend_config["client_resources"], + ) + + +Further help +------------ + +Some official `Flower code examples `_ are already +updated to Flower Next so they can serve as a reference for using the Flower Next API. If there are +further questions, `join the Flower Slack `_ and use the channel ``#questions``. +You can also `participate in Flower Discuss `_ where you can find us +answering questions, or share and learn from others about migrating to Flower Next. + +.. admonition:: Important + :class: important + + As we continuously enhance Flower Next at a rapid pace, we'll be periodically + updating this guide. Please feel free to share any feedback with us! + +.. + [TODO] Add links to Flower Next 101 and Flower Glossary + +Happy migrating! 🚀 diff --git a/doc/source/index.rst b/doc/source/index.rst index 59a7a5e565ca..d01dbfa6b965 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -89,10 +89,11 @@ Problem-oriented how-to guides show step-by-step how to achieve a specific goal. how-to-monitor-simulation how-to-configure-logging how-to-enable-ssl-connections - how-to-upgrade-to-flower-1.0 how-to-use-built-in-mods - how-to-run-flower-using-docker how-to-use-differential-privacy + how-to-run-flower-using-docker + how-to-upgrade-to-flower-1.0 + how-to-upgrade-to-flower-next .. toctree:: :maxdepth: 1 diff --git a/examples/llm-flowertune/requirements.txt b/examples/llm-flowertune/requirements.txt index aac315b4101a..7c66612eb2a5 100644 --- a/examples/llm-flowertune/requirements.txt +++ b/examples/llm-flowertune/requirements.txt @@ -1,5 +1,5 @@ flwr[rest,simulation]>=1.8.0, <2.0 -flwr_datasets==0.0.2 +flwr-datasets>=0.0.2 hydra-core==1.3.2 trl==0.7.2 bitsandbytes==0.41.3 diff --git a/examples/quickstart-cpp/README.md b/examples/quickstart-cpp/README.md index d8982048793c..d6cbeebe1bc6 100644 --- a/examples/quickstart-cpp/README.md +++ b/examples/quickstart-cpp/README.md @@ -1,4 +1,4 @@ -# Flower Clients in C++ +# Flower Clients in C++ (under development) In this example you will train a linear model on synthetic data using C++ clients. @@ -12,7 +12,7 @@ Many thanks to the original contributors to this code: ## Install requirements -You'll need CMake and Python. +You'll need CMake and Python with `flwr` installed. ### Building the example @@ -23,16 +23,20 @@ cmake -S . -B build cmake --build build ``` -## Run the server and two clients in separate terminals +## Run the `Flower SuperLink`, the two clients, and the `Flower ServerApp` in separate terminals ```bash -python server.py +flwr-superlink --insecure ``` ```bash -build/flwr_client 0 127.0.0.1:8080 +build/flwr_client 0 127.0.0.1:9092 ``` ```bash -build/flwr_client 1 127.0.0.1:8080 +build/flwr_client 1 127.0.0.1:9092 +``` + +```bash +flower-server-app server:app --insecure ``` diff --git a/examples/quickstart-cpp/driver.py b/examples/quickstart-cpp/driver.py deleted file mode 100644 index f19cf0e9bd98..000000000000 --- a/examples/quickstart-cpp/driver.py +++ /dev/null @@ -1,10 +0,0 @@ -import flwr as fl -from fedavg_cpp import FedAvgCpp - -# Start Flower server for three rounds of federated learning -if __name__ == "__main__": - fl.server.start_driver( - server_address="0.0.0.0:9091", - config=fl.server.ServerConfig(num_rounds=3), - strategy=FedAvgCpp(), - ) diff --git a/examples/quickstart-cpp/server.py b/examples/quickstart-cpp/server.py index 99d549a4bb45..8ad8e0b3647c 100644 --- a/examples/quickstart-cpp/server.py +++ b/examples/quickstart-cpp/server.py @@ -14,11 +14,3 @@ config=fl.server.ServerConfig(num_rounds=3), strategy=strategy, ) - -# Start Flower server for three rounds of federated learning -if __name__ == "__main__": - fl.server.start_server( - server_address="0.0.0.0:8080", - config=fl.server.ServerConfig(num_rounds=3), - strategy=strategy, - ) diff --git a/pyproject.toml b/pyproject.toml index f2af2aa0815d..d66f8848c68e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ flwr = "flwr.cli.app:app" flower-driver-api = "flwr.server:run_driver_api" flower-fleet-api = "flwr.server:run_fleet_api" flower-superlink = "flwr.server:run_superlink" +flower-supernode = "flwr.client:run_supernode" flower-client-app = "flwr.client:run_client_app" flower-server-app = "flwr.server:run_server_app" flower-simulation = "flwr.simulation:run_simulation_from_cli" diff --git a/src/py/flwr/cli/flower_toml.py b/src/py/flwr/cli/config_utils.py similarity index 91% rename from src/py/flwr/cli/flower_toml.py rename to src/py/flwr/cli/config_utils.py index 103f83532054..a6bf1d5a6b3a 100644 --- a/src/py/flwr/cli/flower_toml.py +++ b/src/py/flwr/cli/config_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utility to validate the `flower.toml` file.""" +"""Utility to validate the `pyproject.toml` file.""" import os from typing import Any, Dict, List, Optional, Tuple @@ -25,7 +25,7 @@ def load_and_validate_with_defaults( path: Optional[str] = None, ) -> Tuple[Optional[Dict[str, Any]], List[str], List[str]]: - """Load and validate flower.toml as dict. + """Load and validate pyproject.toml as dict. Returns ------- @@ -37,7 +37,7 @@ def load_and_validate_with_defaults( if config is None: errors = [ - "Project configuration could not be loaded. flower.toml does not exist." + "Project configuration could not be loaded. pyproject.toml does not exist." ] return (None, errors, []) @@ -58,10 +58,10 @@ def load_and_validate_with_defaults( def load(path: Optional[str] = None) -> Optional[Dict[str, Any]]: - """Load flower.toml and return as dict.""" + """Load pyproject.toml and return as dict.""" if path is None: cur_dir = os.getcwd() - toml_path = os.path.join(cur_dir, "flower.toml") + toml_path = os.path.join(cur_dir, "pyproject.toml") else: toml_path = path @@ -74,7 +74,7 @@ def load(path: Optional[str] = None) -> Optional[Dict[str, Any]]: def validate_fields(config: Dict[str, Any]) -> Tuple[bool, List[str], List[str]]: - """Validate flower.toml fields.""" + """Validate pyproject.toml fields.""" errors = [] warnings = [] @@ -106,7 +106,7 @@ def validate_fields(config: Dict[str, Any]) -> Tuple[bool, List[str], List[str]] def validate(config: Dict[str, Any]) -> Tuple[bool, List[str], List[str]]: - """Validate flower.toml.""" + """Validate pyproject.toml.""" is_valid, errors, warnings = validate_fields(config) if not is_valid: diff --git a/src/py/flwr/cli/flower_toml_test.py b/src/py/flwr/cli/config_utils_test.py similarity index 65% rename from src/py/flwr/cli/flower_toml_test.py rename to src/py/flwr/cli/config_utils_test.py index 72a52e4e8b9b..4d949c0042a4 100644 --- a/src/py/flwr/cli/flower_toml_test.py +++ b/src/py/flwr/cli/config_utils_test.py @@ -18,15 +18,29 @@ import textwrap from typing import Any, Dict -from .flower_toml import load, validate, validate_fields +from .config_utils import load, validate, validate_fields -def test_load_flower_toml_load_from_cwd(tmp_path: str) -> None: +def test_load_pyproject_toml_load_from_cwd(tmp_path: str) -> None: """Test if load_template returns a string.""" # Prepare - flower_toml_content = """ + pyproject_toml_content = """ + [build-system] + requires = ["hatchling"] + build-backend = "hatchling.build" + [project] name = "fedgpt" + version = "1.0.0" + description = "" + authors = [ + { name = "The Flower Authors", email = "hello@flower.ai" }, + ] + license = {text = "Apache License (2.0)"} + dependencies = [ + "flwr[simulation]>=1.8.0,<2.0", + "numpy>=1.21.0", + ] [flower.components] serverapp = "fedgpt.server:app" @@ -39,8 +53,14 @@ def test_load_flower_toml_load_from_cwd(tmp_path: str) -> None: count = 10 # optional """ expected_config = { + "build-system": {"build-backend": "hatchling.build", "requires": ["hatchling"]}, "project": { "name": "fedgpt", + "version": "1.0.0", + "description": "", + "authors": [{"email": "hello@flower.ai", "name": "The Flower Authors"}], + "license": {"text": "Apache License (2.0)"}, + "dependencies": ["flwr[simulation]>=1.8.0,<2.0", "numpy>=1.21.0"], }, "flower": { "components": { @@ -60,8 +80,8 @@ def test_load_flower_toml_load_from_cwd(tmp_path: str) -> None: try: # Change into the temporary directory os.chdir(tmp_path) - with open("flower.toml", "w", encoding="utf-8") as f: - f.write(textwrap.dedent(flower_toml_content)) + with open("pyproject.toml", "w", encoding="utf-8") as f: + f.write(textwrap.dedent(pyproject_toml_content)) # Execute config = load() @@ -72,12 +92,26 @@ def test_load_flower_toml_load_from_cwd(tmp_path: str) -> None: os.chdir(origin) -def test_load_flower_toml_from_path(tmp_path: str) -> None: +def test_load_pyproject_toml_from_path(tmp_path: str) -> None: """Test if load_template returns a string.""" # Prepare - flower_toml_content = """ + pyproject_toml_content = """ + [build-system] + requires = ["hatchling"] + build-backend = "hatchling.build" + [project] name = "fedgpt" + version = "1.0.0" + description = "" + authors = [ + { name = "The Flower Authors", email = "hello@flower.ai" }, + ] + license = {text = "Apache License (2.0)"} + dependencies = [ + "flwr[simulation]>=1.8.0,<2.0", + "numpy>=1.21.0", + ] [flower.components] serverapp = "fedgpt.server:app" @@ -90,8 +124,14 @@ def test_load_flower_toml_from_path(tmp_path: str) -> None: count = 10 # optional """ expected_config = { + "build-system": {"build-backend": "hatchling.build", "requires": ["hatchling"]}, "project": { "name": "fedgpt", + "version": "1.0.0", + "description": "", + "authors": [{"email": "hello@flower.ai", "name": "The Flower Authors"}], + "license": {"text": "Apache License (2.0)"}, + "dependencies": ["flwr[simulation]>=1.8.0,<2.0", "numpy>=1.21.0"], }, "flower": { "components": { @@ -111,11 +151,11 @@ def test_load_flower_toml_from_path(tmp_path: str) -> None: try: # Change into the temporary directory os.chdir(tmp_path) - with open("flower.toml", "w", encoding="utf-8") as f: - f.write(textwrap.dedent(flower_toml_content)) + with open("pyproject.toml", "w", encoding="utf-8") as f: + f.write(textwrap.dedent(pyproject_toml_content)) # Execute - config = load(path=os.path.join(tmp_path, "flower.toml")) + config = load(path=os.path.join(tmp_path, "pyproject.toml")) # Assert assert config == expected_config @@ -123,8 +163,8 @@ def test_load_flower_toml_from_path(tmp_path: str) -> None: os.chdir(origin) -def test_validate_flower_toml_fields_empty() -> None: - """Test that validate_flower_toml_fields fails correctly.""" +def test_validate_pyproject_toml_fields_empty() -> None: + """Test that validate_pyproject_toml_fields fails correctly.""" # Prepare config: Dict[str, Any] = {} @@ -137,8 +177,8 @@ def test_validate_flower_toml_fields_empty() -> None: assert len(warnings) == 0 -def test_validate_flower_toml_fields_no_flower() -> None: - """Test that validate_flower_toml_fields fails correctly.""" +def test_validate_pyproject_toml_fields_no_flower() -> None: + """Test that validate_pyproject_toml_fields fails correctly.""" # Prepare config = { "project": { @@ -159,8 +199,8 @@ def test_validate_flower_toml_fields_no_flower() -> None: assert len(warnings) == 0 -def test_validate_flower_toml_fields_no_flower_components() -> None: - """Test that validate_flower_toml_fields fails correctly.""" +def test_validate_pyproject_toml_fields_no_flower_components() -> None: + """Test that validate_pyproject_toml_fields fails correctly.""" # Prepare config = { "project": { @@ -182,8 +222,8 @@ def test_validate_flower_toml_fields_no_flower_components() -> None: assert len(warnings) == 0 -def test_validate_flower_toml_fields_no_server_and_client_app() -> None: - """Test that validate_flower_toml_fields fails correctly.""" +def test_validate_pyproject_toml_fields_no_server_and_client_app() -> None: + """Test that validate_pyproject_toml_fields fails correctly.""" # Prepare config = { "project": { @@ -205,8 +245,8 @@ def test_validate_flower_toml_fields_no_server_and_client_app() -> None: assert len(warnings) == 0 -def test_validate_flower_toml_fields() -> None: - """Test that validate_flower_toml_fields succeeds correctly.""" +def test_validate_pyproject_toml_fields() -> None: + """Test that validate_pyproject_toml_fields succeeds correctly.""" # Prepare config = { "project": { @@ -228,8 +268,8 @@ def test_validate_flower_toml_fields() -> None: assert len(warnings) == 0 -def test_validate_flower_toml() -> None: - """Test that validate_flower_toml succeeds correctly.""" +def test_validate_pyproject_toml() -> None: + """Test that validate_pyproject_toml succeeds correctly.""" # Prepare config = { "project": { @@ -256,8 +296,8 @@ def test_validate_flower_toml() -> None: assert not warnings -def test_validate_flower_toml_fail() -> None: - """Test that validate_flower_toml fails correctly.""" +def test_validate_pyproject_toml_fail() -> None: + """Test that validate_pyproject_toml fails correctly.""" # Prepare config = { "project": { diff --git a/src/py/flwr/cli/new/new.py b/src/py/flwr/cli/new/new.py index 0c429ce34cf2..201e145c194e 100644 --- a/src/py/flwr/cli/new/new.py +++ b/src/py/flwr/cli/new/new.py @@ -58,8 +58,9 @@ def render_template(template: str, data: Dict[str, str]) -> str: """Render template.""" tpl_file = load_template(template) tpl = Template(tpl_file) - result = tpl.substitute(data) - return result + if ".gitignore" not in template: + return tpl.substitute(data) + return tpl.template def create_file(file_path: str, content: str) -> None: @@ -127,16 +128,19 @@ def new( # List of files to render files = { + ".gitignore": {"template": "app/.gitignore.tpl"}, "README.md": {"template": "app/README.md.tpl"}, - "flower.toml": {"template": "app/flower.toml.tpl"}, "pyproject.toml": {"template": f"app/pyproject.{framework_str}.toml.tpl"}, f"{pnl}/__init__.py": {"template": "app/code/__init__.py.tpl"}, f"{pnl}/server.py": {"template": f"app/code/server.{framework_str}.py.tpl"}, f"{pnl}/client.py": {"template": f"app/code/client.{framework_str}.py.tpl"}, } - # In case framework is MlFramework.PYTORCH generate additionally the task.py file - if framework_str == MlFramework.PYTORCH.value.lower(): + # Depending on the framework, generate task.py file + frameworks_with_tasks = [ + MlFramework.PYTORCH.value.lower(), + ] + if framework_str in frameworks_with_tasks: files[f"{pnl}/task.py"] = {"template": f"app/code/task.{framework_str}.py.tpl"} context = {"project_name": project_name} diff --git a/src/py/flwr/cli/new/new_test.py b/src/py/flwr/cli/new/new_test.py index 11620d234191..e7d5ba3b9922 100644 --- a/src/py/flwr/cli/new/new_test.py +++ b/src/py/flwr/cli/new/new_test.py @@ -68,8 +68,8 @@ def test_new(tmp_path: str) -> None: expected_files_top_level = { "fedgpt", "README.md", - "flower.toml", "pyproject.toml", + ".gitignore", } expected_files_module = { "__init__.py", diff --git a/src/py/flwr/cli/new/templates/app/.gitignore.tpl b/src/py/flwr/cli/new/templates/app/.gitignore.tpl new file mode 100644 index 000000000000..68bc17f9ff21 --- /dev/null +++ b/src/py/flwr/cli/new/templates/app/.gitignore.tpl @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/src/py/flwr/cli/new/templates/app/code/client.tensorflow.py.tpl b/src/py/flwr/cli/new/templates/app/code/client.tensorflow.py.tpl index cc00f8ff0b8c..4a9448848313 100644 --- a/src/py/flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +++ b/src/py/flwr/cli/new/templates/app/code/client.tensorflow.py.tpl @@ -1 +1,57 @@ """$project_name: A Flower / TensorFlow app.""" + +import os + +import tensorflow as tf +from flwr.client import ClientApp, NumPyClient +from flwr_datasets import FederatedDataset + + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" + +# Define Flower client +class FlowerClient(NumPyClient): + def __init__(self, model, train_data, test_data): + self.model = model + self.x_train, self.y_train = train_data + self.x_test, self.y_test = test_data + + def get_parameters(self, config): + return self.model.get_weights() + + def fit(self, parameters, config): + self.model.set_weights(parameters) + self.model.fit(self.x_train, self.y_train, epochs=1, batch_size=32, verbose=0) + return self.model.get_weights(), len(self.x_train), {} + + def evaluate(self, parameters, config): + self.model.set_weights(parameters) + loss, accuracy = self.model.evaluate(self.x_test, self.y_test, verbose=0) + return loss, len(self.x_test), {"accuracy": accuracy} + + +fds = FederatedDataset(dataset="cifar10", partitioners={"train": 2}) + +def client_fn(cid: str): + """Create and return an instance of Flower `Client`.""" + + # Load model and data (MobileNetV2, CIFAR-10) + model = tf.keras.applications.MobileNetV2((32, 32, 3), classes=10, weights=None) + model.compile("adam", "sparse_categorical_crossentropy", metrics=["accuracy"]) + + # Download and partition dataset + partition = fds.load_partition(int(cid), "train") + partition.set_format("numpy") + + # Divide data on each node: 80% train, 20% test + partition = partition.train_test_split(test_size=0.2, seed=42) + train_data = partition["train"]["img"] / 255.0, partition["train"]["label"] + test_data = partition["test"]["img"] / 255.0, partition["test"]["label"] + + return FlowerClient(model, train_data, test_data).to_client() + + +# Flower ClientApp +app = ClientApp( + client_fn=client_fn, +) diff --git a/src/py/flwr/cli/new/templates/app/code/server.tensorflow.py.tpl b/src/py/flwr/cli/new/templates/app/code/server.tensorflow.py.tpl index cc00f8ff0b8c..98cea74f0c92 100644 --- a/src/py/flwr/cli/new/templates/app/code/server.tensorflow.py.tpl +++ b/src/py/flwr/cli/new/templates/app/code/server.tensorflow.py.tpl @@ -1 +1,19 @@ """$project_name: A Flower / TensorFlow app.""" + +from flwr.server import ServerApp, ServerConfig +from flwr.server.strategy import FedAvg + +# Define config +config = ServerConfig(num_rounds=3) + +strategy = FedAvg( + fraction_fit=1.0, + fraction_evaluate=1.0, + min_available_clients=2, +) + +# Flower ServerApp +app = ServerApp( + config=config, + strategy=strategy, +) diff --git a/src/py/flwr/cli/new/templates/app/flower.toml.tpl b/src/py/flwr/cli/new/templates/app/flower.toml.tpl deleted file mode 100644 index 07a6ffaf9e49..000000000000 --- a/src/py/flwr/cli/new/templates/app/flower.toml.tpl +++ /dev/null @@ -1,13 +0,0 @@ -[project] -name = "$project_name" -version = "1.0.0" -description = "" -license = "Apache-2.0" -authors = [ - "The Flower Authors ", -] -readme = "README.md" - -[flower.components] -serverapp = "$project_name.server:app" -clientapp = "$project_name.client:app" diff --git a/src/py/flwr/cli/new/templates/app/pyproject.numpy.toml.tpl b/src/py/flwr/cli/new/templates/app/pyproject.numpy.toml.tpl index 9701c62af6f0..47e5dda2a4a4 100644 --- a/src/py/flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +++ b/src/py/flwr/cli/new/templates/app/pyproject.numpy.toml.tpl @@ -17,3 +17,7 @@ dependencies = [ [tool.hatch.build.targets.wheel] packages = ["."] + +[flower.components] +serverapp = "$project_name.server:app" +clientapp = "$project_name.client:app" diff --git a/src/py/flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl b/src/py/flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl index 0661c7b730c1..dec132890e81 100644 --- a/src/py/flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +++ b/src/py/flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl @@ -19,3 +19,7 @@ dependencies = [ [tool.hatch.build.targets.wheel] packages = ["."] + +[flower.components] +serverapp = "$project_name.server:app" +clientapp = "$project_name.client:app" diff --git a/src/py/flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl b/src/py/flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl index 5a017eb6ed74..2afe9a86e2c5 100644 --- a/src/py/flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +++ b/src/py/flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl @@ -18,3 +18,7 @@ dependencies = [ [tool.hatch.build.targets.wheel] packages = ["."] + +[flower.components] +serverapp = "$project_name.server:app" +clientapp = "$project_name.client:app" diff --git a/src/py/flwr/cli/run/run.py b/src/py/flwr/cli/run/run.py index 98b5da1843a6..4cd5124e1668 100644 --- a/src/py/flwr/cli/run/run.py +++ b/src/py/flwr/cli/run/run.py @@ -18,7 +18,7 @@ import typer -from flwr.cli import flower_toml +from flwr.cli import config_utils from flwr.simulation.run_simulation import _run_simulation @@ -26,7 +26,7 @@ def run() -> None: """Run Flower project.""" typer.secho("Loading project configuration... ", fg=typer.colors.BLUE) - config, errors, warnings = flower_toml.load_and_validate_with_defaults() + config, errors, warnings = config_utils.load_and_validate_with_defaults() if config is None: typer.secho( diff --git a/src/py/flwr/client/__init__.py b/src/py/flwr/client/__init__.py index a721fb584164..a7f6cf3288a7 100644 --- a/src/py/flwr/client/__init__.py +++ b/src/py/flwr/client/__init__.py @@ -21,6 +21,7 @@ from .client import Client as Client from .client_app import ClientApp as ClientApp from .numpy_client import NumPyClient as NumPyClient +from .supernode import run_supernode as run_supernode from .typing import ClientFn as ClientFn __all__ = [ @@ -29,6 +30,7 @@ "ClientFn", "NumPyClient", "run_client_app", + "run_supernode", "start_client", "start_numpy_client", ] diff --git a/src/py/flwr/client/app.py b/src/py/flwr/client/app.py index 4fa9c80c6cdf..0e417bf10708 100644 --- a/src/py/flwr/client/app.py +++ b/src/py/flwr/client/app.py @@ -47,14 +47,15 @@ from .message_handler.message_handler import handle_control_message from .node_state import NodeState from .numpy_client import NumPyClient +from .supernode.app import parse_args_run_client_app def run_client_app() -> None: """Run Flower client app.""" - event(EventType.RUN_CLIENT_APP_ENTER) - log(INFO, "Long-running Flower client starting") + event(EventType.RUN_CLIENT_APP_ENTER) + args = _parse_args_run_client_app().parse_args() # Obtain certificates @@ -131,56 +132,7 @@ def _parse_args_run_client_app() -> argparse.ArgumentParser: description="Start a Flower client app", ) - parser.add_argument( - "client-app", - help="For example: `client:app` or `project.package.module:wrapper.app`", - ) - parser.add_argument( - "--insecure", - action="store_true", - help="Run the client without HTTPS. By default, the client runs with " - "HTTPS enabled. Use this flag only if you understand the risks.", - ) - parser.add_argument( - "--rest", - action="store_true", - help="Use REST as a transport layer for the client.", - ) - parser.add_argument( - "--root-certificates", - metavar="ROOT_CERT", - type=str, - help="Specifies the path to the PEM-encoded root certificate file for " - "establishing secure HTTPS connections.", - ) - parser.add_argument( - "--server", - default="0.0.0.0:9092", - help="Server address", - ) - parser.add_argument( - "--max-retries", - type=int, - default=None, - help="The maximum number of times the client will try to connect to the" - "server before giving up in case of a connection error. By default," - "it is set to None, meaning there is no limit to the number of tries.", - ) - parser.add_argument( - "--max-wait-time", - type=float, - default=None, - help="The maximum duration before the client stops trying to" - "connect to the server in case of connection error. By default, it" - "is set to None, meaning there is no limit to the total time.", - ) - parser.add_argument( - "--dir", - default="", - help="Add specified directory to the PYTHONPATH and load Flower " - "app from there." - " Default: current working directory.", - ) + parse_args_run_client_app(parser=parser) return parser diff --git a/src/py/flwr/client/rest_client/connection.py b/src/py/flwr/client/rest_client/connection.py index 0e6a7ef554e6..37e798e7cc3c 100644 --- a/src/py/flwr/client/rest_client/connection.py +++ b/src/py/flwr/client/rest_client/connection.py @@ -173,14 +173,14 @@ def ping() -> None: log( WARN, "[Node] POST /%s: missing header `Content-Type`", - PATH_PULL_TASK_INS, + PATH_PING, ) return if res.headers["content-type"] != "application/protobuf": log( WARN, "[Node] POST /%s: header `Content-Type` has wrong value", - PATH_PULL_TASK_INS, + PATH_PING, ) return @@ -223,14 +223,14 @@ def create_node() -> None: log( WARN, "[Node] POST /%s: missing header `Content-Type`", - PATH_PULL_TASK_INS, + PATH_CREATE_NODE, ) return if res.headers["content-type"] != "application/protobuf": log( WARN, "[Node] POST /%s: header `Content-Type` has wrong value", - PATH_PULL_TASK_INS, + PATH_CREATE_NODE, ) return @@ -277,14 +277,14 @@ def delete_node() -> None: log( WARN, "[Node] POST /%s: missing header `Content-Type`", - PATH_PULL_TASK_INS, + PATH_DELETE_NODE, ) return if res.headers["content-type"] != "application/protobuf": log( WARN, "[Node] POST /%s: header `Content-Type` has wrong value", - PATH_PULL_TASK_INS, + PATH_DELETE_NODE, ) # Cleanup diff --git a/src/py/flwr/client/supernode/__init__.py b/src/py/flwr/client/supernode/__init__.py new file mode 100644 index 000000000000..efd95c4ca6ba --- /dev/null +++ b/src/py/flwr/client/supernode/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2024 Flower Labs GmbH. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Flower SuperNode.""" + + +from .app import run_supernode as run_supernode + +__all__ = [ + "run_supernode", +] diff --git a/src/py/flwr/client/supernode/app.py b/src/py/flwr/client/supernode/app.py new file mode 100644 index 000000000000..35c71994fe4d --- /dev/null +++ b/src/py/flwr/client/supernode/app.py @@ -0,0 +1,107 @@ +# Copyright 2024 Flower Labs GmbH. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Flower SuperNode.""" + +import argparse +from logging import DEBUG, INFO + +from flwr.common import EventType, event +from flwr.common.exit_handlers import register_exit_handlers +from flwr.common.logger import log + + +def run_supernode() -> None: + """Run Flower SuperNode.""" + log(INFO, "Starting Flower SuperNode") + + event(EventType.RUN_SUPERNODE_ENTER) + + args = _parse_args_run_supernode().parse_args() + + log( + DEBUG, + "Flower will load ClientApp `%s`", + getattr(args, "client-app"), + ) + + # Graceful shutdown + register_exit_handlers( + event_type=EventType.RUN_SUPERNODE_LEAVE, + ) + + +def _parse_args_run_supernode() -> argparse.ArgumentParser: + """Parse flower-supernode command line arguments.""" + parser = argparse.ArgumentParser( + description="Start a Flower SuperNode", + ) + + parse_args_run_client_app(parser=parser) + + return parser + + +def parse_args_run_client_app(parser: argparse.ArgumentParser) -> None: + """Parse command line arguments.""" + parser.add_argument( + "client-app", + help="For example: `client:app` or `project.package.module:wrapper.app`", + ) + parser.add_argument( + "--insecure", + action="store_true", + help="Run the client without HTTPS. By default, the client runs with " + "HTTPS enabled. Use this flag only if you understand the risks.", + ) + parser.add_argument( + "--rest", + action="store_true", + help="Use REST as a transport layer for the client.", + ) + parser.add_argument( + "--root-certificates", + metavar="ROOT_CERT", + type=str, + help="Specifies the path to the PEM-encoded root certificate file for " + "establishing secure HTTPS connections.", + ) + parser.add_argument( + "--server", + default="0.0.0.0:9092", + help="Server address", + ) + parser.add_argument( + "--max-retries", + type=int, + default=None, + help="The maximum number of times the client will try to connect to the" + "server before giving up in case of a connection error. By default," + "it is set to None, meaning there is no limit to the number of tries.", + ) + parser.add_argument( + "--max-wait-time", + type=float, + default=None, + help="The maximum duration before the client stops trying to" + "connect to the server in case of connection error. By default, it" + "is set to None, meaning there is no limit to the total time.", + ) + parser.add_argument( + "--dir", + default="", + help="Add specified directory to the PYTHONPATH and load Flower " + "app from there." + " Default: current working directory.", + ) diff --git a/src/py/flwr/common/record/recordset.py b/src/py/flwr/common/record/recordset.py index d8ef44ab15c2..212cbbc8e6e8 100644 --- a/src/py/flwr/common/record/recordset.py +++ b/src/py/flwr/common/record/recordset.py @@ -16,23 +16,20 @@ from dataclasses import dataclass -from typing import Callable, Dict, Optional, Type, TypeVar +from typing import Dict, Optional, cast from .configsrecord import ConfigsRecord from .metricsrecord import MetricsRecord from .parametersrecord import ParametersRecord from .typeddict import TypedDict -T = TypeVar("T") +class RecordSetData: + """Inner data container for the RecordSet class.""" -@dataclass -class RecordSet: - """RecordSet stores groups of parameters, metrics and configs.""" - - _parameters_records: TypedDict[str, ParametersRecord] - _metrics_records: TypedDict[str, MetricsRecord] - _configs_records: TypedDict[str, ConfigsRecord] + parameters_records: TypedDict[str, ParametersRecord] + metrics_records: TypedDict[str, MetricsRecord] + configs_records: TypedDict[str, ConfigsRecord] def __init__( self, @@ -40,40 +37,82 @@ def __init__( metrics_records: Optional[Dict[str, MetricsRecord]] = None, configs_records: Optional[Dict[str, ConfigsRecord]] = None, ) -> None: - def _get_check_fn(__t: Type[T]) -> Callable[[T], None]: - def _check_fn(__v: T) -> None: - if not isinstance(__v, __t): - raise TypeError(f"Expected `{__t}`, but `{type(__v)}` was passed.") - - return _check_fn - - self._parameters_records = TypedDict[str, ParametersRecord]( - _get_check_fn(str), _get_check_fn(ParametersRecord) + self.parameters_records = TypedDict[str, ParametersRecord]( + self._check_fn_str, self._check_fn_params ) - self._metrics_records = TypedDict[str, MetricsRecord]( - _get_check_fn(str), _get_check_fn(MetricsRecord) + self.metrics_records = TypedDict[str, MetricsRecord]( + self._check_fn_str, self._check_fn_metrics ) - self._configs_records = TypedDict[str, ConfigsRecord]( - _get_check_fn(str), _get_check_fn(ConfigsRecord) + self.configs_records = TypedDict[str, ConfigsRecord]( + self._check_fn_str, self._check_fn_configs ) if parameters_records is not None: - self._parameters_records.update(parameters_records) + self.parameters_records.update(parameters_records) if metrics_records is not None: - self._metrics_records.update(metrics_records) + self.metrics_records.update(metrics_records) if configs_records is not None: - self._configs_records.update(configs_records) + self.configs_records.update(configs_records) + + def _check_fn_str(self, key: str) -> None: + if not isinstance(key, str): + raise TypeError( + f"Expected `{str.__name__}`, but " + f"received `{type(key).__name__}` for the key." + ) + + def _check_fn_params(self, record: ParametersRecord) -> None: + if not isinstance(record, ParametersRecord): + raise TypeError( + f"Expected `{ParametersRecord.__name__}`, but " + f"received `{type(record).__name__}` for the value." + ) + + def _check_fn_metrics(self, record: MetricsRecord) -> None: + if not isinstance(record, MetricsRecord): + raise TypeError( + f"Expected `{MetricsRecord.__name__}`, but " + f"received `{type(record).__name__}` for the value." + ) + + def _check_fn_configs(self, record: ConfigsRecord) -> None: + if not isinstance(record, ConfigsRecord): + raise TypeError( + f"Expected `{ConfigsRecord.__name__}`, but " + f"received `{type(record).__name__}` for the value." + ) + + +@dataclass +class RecordSet: + """RecordSet stores groups of parameters, metrics and configs.""" + + def __init__( + self, + parameters_records: Optional[Dict[str, ParametersRecord]] = None, + metrics_records: Optional[Dict[str, MetricsRecord]] = None, + configs_records: Optional[Dict[str, ConfigsRecord]] = None, + ) -> None: + data = RecordSetData( + parameters_records=parameters_records, + metrics_records=metrics_records, + configs_records=configs_records, + ) + setattr(self, "_data", data) # noqa @property def parameters_records(self) -> TypedDict[str, ParametersRecord]: """Dictionary holding ParametersRecord instances.""" - return self._parameters_records + data = cast(RecordSetData, getattr(self, "_data")) # noqa + return data.parameters_records @property def metrics_records(self) -> TypedDict[str, MetricsRecord]: """Dictionary holding MetricsRecord instances.""" - return self._metrics_records + data = cast(RecordSetData, getattr(self, "_data")) # noqa + return data.metrics_records @property def configs_records(self) -> TypedDict[str, ConfigsRecord]: """Dictionary holding ConfigsRecord instances.""" - return self._configs_records + data = cast(RecordSetData, getattr(self, "_data")) # noqa + return data.configs_records diff --git a/src/py/flwr/common/record/recordset_test.py b/src/py/flwr/common/record/recordset_test.py index 0e0b149881be..94d087795841 100644 --- a/src/py/flwr/common/record/recordset_test.py +++ b/src/py/flwr/common/record/recordset_test.py @@ -14,6 +14,7 @@ # ============================================================================== """RecordSet tests.""" +import pickle from copy import deepcopy from typing import Callable, Dict, List, OrderedDict, Type, Union @@ -33,7 +34,7 @@ Parameters, ) -from . import Array, ConfigsRecord, MetricsRecord, ParametersRecord +from . import Array, ConfigsRecord, MetricsRecord, ParametersRecord, RecordSet def get_ndarrays() -> NDArrays: @@ -398,3 +399,18 @@ def test_count_bytes_configsrecord() -> None: record_bytest_count = c_record.count_bytes() assert bytes_in_dict == record_bytest_count + + +def test_record_is_picklable() -> None: + """Test if RecordSet and *Record are picklable.""" + # Prepare + p_record = ParametersRecord() + m_record = MetricsRecord({"aa": 123}) + c_record = ConfigsRecord({"cc": bytes(9)}) + rs = RecordSet() + rs.parameters_records["params"] = p_record + rs.metrics_records["metrics"] = m_record + rs.configs_records["configs"] = c_record + + # Execute + pickle.dumps((p_record, m_record, c_record, rs)) diff --git a/src/py/flwr/common/telemetry.py b/src/py/flwr/common/telemetry.py index 8eb594085d31..41fe1508e652 100644 --- a/src/py/flwr/common/telemetry.py +++ b/src/py/flwr/common/telemetry.py @@ -160,6 +160,10 @@ def _generate_next_value_(name: str, start: int, count: int, last_values: List[A RUN_SERVER_APP_ENTER = auto() RUN_SERVER_APP_LEAVE = auto() + # SuperNode + RUN_SUPERNODE_ENTER = auto() + RUN_SUPERNODE_LEAVE = auto() + # Use the ThreadPoolExecutor with max_workers=1 to have a queue # and also ensure that telemetry calls are not blocking. diff --git a/src/py/flwr/server/app.py b/src/py/flwr/server/app.py index e04cfb37e118..70e53da765da 100644 --- a/src/py/flwr/server/app.py +++ b/src/py/flwr/server/app.py @@ -291,9 +291,11 @@ def run_fleet_api() -> None: # pylint: disable=too-many-branches, too-many-locals, too-many-statements def run_superlink() -> None: - """Run Flower server (Driver API and Fleet API).""" - log(INFO, "Starting Flower server") + """Run Flower SuperLink (Driver API and Fleet API).""" + log(INFO, "Starting Flower SuperLink") + event(EventType.RUN_SUPERLINK_ENTER) + args = _parse_args_run_superlink().parse_args() # Parse IP address @@ -568,9 +570,7 @@ def _parse_args_run_fleet_api() -> argparse.ArgumentParser: def _parse_args_run_superlink() -> argparse.ArgumentParser: """Parse command line arguments for both Driver API and Fleet API.""" parser = argparse.ArgumentParser( - description="This will start a Flower server " - "(meaning, a Driver API and a Fleet API), " - "that clients will be able to connect to.", + description="Start a Flower SuperLink", ) _add_args_common(parser=parser) diff --git a/src/py/flwr/server/driver/abc_driver.py b/src/py/flwr/server/driver/abc_driver.py new file mode 100644 index 000000000000..b95cec95ab47 --- /dev/null +++ b/src/py/flwr/server/driver/abc_driver.py @@ -0,0 +1,140 @@ +# Copyright 2024 Flower Labs GmbH. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Driver (abstract base class).""" + + +from abc import ABC, abstractmethod +from typing import Iterable, List, Optional + +from flwr.common import Message, RecordSet + + +class Driver(ABC): + """Abstract base Driver class for the Driver API.""" + + @abstractmethod + def create_message( # pylint: disable=too-many-arguments + self, + content: RecordSet, + message_type: str, + dst_node_id: int, + group_id: str, + ttl: Optional[float] = None, + ) -> Message: + """Create a new message with specified parameters. + + This method constructs a new `Message` with given content and metadata. + The `run_id` and `src_node_id` will be set automatically. + + Parameters + ---------- + content : RecordSet + The content for the new message. This holds records that are to be sent + to the destination node. + message_type : str + The type of the message, defining the action to be executed on + the receiving end. + dst_node_id : int + The ID of the destination node to which the message is being sent. + group_id : str + The ID of the group to which this message is associated. In some settings, + this is used as the FL round. + ttl : Optional[float] (default: None) + Time-to-live for the round trip of this message, i.e., the time from sending + this message to receiving a reply. It specifies in seconds the duration for + which the message and its potential reply are considered valid. If unset, + the default TTL (i.e., `common.DEFAULT_TTL`) will be used. + + Returns + ------- + message : Message + A new `Message` instance with the specified content and metadata. + """ + + @abstractmethod + def get_node_ids(self) -> List[int]: + """Get node IDs.""" + + @abstractmethod + def push_messages(self, messages: Iterable[Message]) -> Iterable[str]: + """Push messages to specified node IDs. + + This method takes an iterable of messages and sends each message + to the node specified in `dst_node_id`. + + Parameters + ---------- + messages : Iterable[Message] + An iterable of messages to be sent. + + Returns + ------- + message_ids : Iterable[str] + An iterable of IDs for the messages that were sent, which can be used + to pull replies. + """ + + @abstractmethod + def pull_messages(self, message_ids: Iterable[str]) -> Iterable[Message]: + """Pull messages based on message IDs. + + This method is used to collect messages from the SuperLink + that correspond to a set of given message IDs. + + Parameters + ---------- + message_ids : Iterable[str] + An iterable of message IDs for which reply messages are to be retrieved. + + Returns + ------- + messages : Iterable[Message] + An iterable of messages received. + """ + + @abstractmethod + def send_and_receive( + self, + messages: Iterable[Message], + *, + timeout: Optional[float] = None, + ) -> Iterable[Message]: + """Push messages to specified node IDs and pull the reply messages. + + This method sends a list of messages to their destination node IDs and then + waits for the replies. It continues to pull replies until either all + replies are received or the specified timeout duration is exceeded. + + Parameters + ---------- + messages : Iterable[Message] + An iterable of messages to be sent. + timeout : Optional[float] (default: None) + The timeout duration in seconds. If specified, the method will wait for + replies for this duration. If `None`, there is no time limit and the method + will wait until replies for all messages are received. + + Returns + ------- + replies : Iterable[Message] + An iterable of reply messages received from the SuperLink. + + Notes + ----- + This method uses `push_messages` to send the messages and `pull_messages` + to collect the replies. If `timeout` is set, the method may not return + replies for all sent messages. A message remains valid until its TTL, + which is not affected by `timeout`. + """ diff --git a/src/py/flwr/server/superlink/driver/driver_servicer.py b/src/py/flwr/server/superlink/driver/driver_servicer.py index c5e8d055b708..9c76bb63681b 100644 --- a/src/py/flwr/server/superlink/driver/driver_servicer.py +++ b/src/py/flwr/server/superlink/driver/driver_servicer.py @@ -64,7 +64,7 @@ def CreateRun( """Create run ID.""" log(INFO, "DriverServicer.CreateRun") state: State = self.state_factory.state() - run_id = state.create_run() + run_id = state.create_run("None/None", "None") return CreateRunResponse(run_id=run_id) def PushTaskIns( diff --git a/src/py/flwr/server/superlink/fleet/vce/vce_api_test.py b/src/py/flwr/server/superlink/fleet/vce/vce_api_test.py index 66c3c21326d5..1da726f88f1e 100644 --- a/src/py/flwr/server/superlink/fleet/vce/vce_api_test.py +++ b/src/py/flwr/server/superlink/fleet/vce/vce_api_test.py @@ -81,7 +81,7 @@ def register_messages_into_state( ) -> Dict[UUID, float]: """Register `num_messages` into the state factory.""" state: InMemoryState = state_factory.state() # type: ignore - state.run_ids.add(run_id) + state.run_ids[run_id] = ("Mock/mock", "v1.0.0") # Artificially add TaskIns to state so they can be processed # by the Simulation Engine logic nodes_cycle = cycle(nodes_mapping.keys()) # we have more messages than supernodes diff --git a/src/py/flwr/server/superlink/state/in_memory_state.py b/src/py/flwr/server/superlink/state/in_memory_state.py index 3ac65d6bf922..f6c84b0dc85a 100644 --- a/src/py/flwr/server/superlink/state/in_memory_state.py +++ b/src/py/flwr/server/superlink/state/in_memory_state.py @@ -36,7 +36,8 @@ class InMemoryState(State): def __init__(self) -> None: # Map node_id to (online_until, ping_interval) self.node_ids: Dict[int, Tuple[float, float]] = {} - self.run_ids: Set[int] = set() + # Map run_id to (fab_id, fab_version) + self.run_ids: Dict[int, Tuple[str, str]] = {} self.task_ins_store: Dict[UUID, TaskIns] = {} self.task_res_store: Dict[UUID, TaskRes] = {} self.lock = threading.Lock() @@ -238,18 +239,26 @@ def get_nodes(self, run_id: int) -> Set[int]: if online_until > current_time } - def create_run(self) -> int: - """Create one run.""" + def create_run(self, fab_id: str, fab_version: str) -> int: + """Create a new run for the specified `fab_id` and `fab_version`.""" # Sample a random int64 as run_id with self.lock: run_id: int = int.from_bytes(os.urandom(8), "little", signed=True) if run_id not in self.run_ids: - self.run_ids.add(run_id) + self.run_ids[run_id] = (fab_id, fab_version) return run_id log(ERROR, "Unexpected run creation failure.") return 0 + def get_run(self, run_id: int) -> Tuple[int, str, str]: + """Retrieve information about the run with the specified `run_id`.""" + with self.lock: + if run_id not in self.run_ids: + log(ERROR, "`run_id` is invalid") + return 0, "", "" + return run_id, *self.run_ids[run_id] + def acknowledge_ping(self, node_id: int, ping_interval: float) -> bool: """Acknowledge a ping received from a node, serving as a heartbeat.""" with self.lock: diff --git a/src/py/flwr/server/superlink/state/sqlite_state.py b/src/py/flwr/server/superlink/state/sqlite_state.py index a40dbde16aaf..7fb7ae42653a 100644 --- a/src/py/flwr/server/superlink/state/sqlite_state.py +++ b/src/py/flwr/server/superlink/state/sqlite_state.py @@ -46,7 +46,9 @@ SQL_CREATE_TABLE_RUN = """ CREATE TABLE IF NOT EXISTS run( - run_id INTEGER UNIQUE + run_id INTEGER UNIQUE, + fab_id TEXT, + fab_version TEXT ); """ @@ -558,8 +560,8 @@ def get_nodes(self, run_id: int) -> Set[int]: result: Set[int] = {row["node_id"] for row in rows} return result - def create_run(self) -> int: - """Create one run and store it in state.""" + def create_run(self, fab_id: str, fab_version: str) -> int: + """Create a new run for the specified `fab_id` and `fab_version`.""" # Sample a random int64 as run_id run_id: int = int.from_bytes(os.urandom(8), "little", signed=True) @@ -567,12 +569,22 @@ def create_run(self) -> int: query = "SELECT COUNT(*) FROM run WHERE run_id = ?;" # If run_id does not exist if self.query(query, (run_id,))[0]["COUNT(*)"] == 0: - query = "INSERT INTO run VALUES(:run_id);" - self.query(query, {"run_id": run_id}) + query = "INSERT INTO run (run_id, fab_id, fab_version) VALUES (?, ?, ?);" + self.query(query, (run_id, fab_id, fab_version)) return run_id log(ERROR, "Unexpected run creation failure.") return 0 + def get_run(self, run_id: int) -> Tuple[int, str, str]: + """Retrieve information about the run with the specified `run_id`.""" + query = "SELECT * FROM run WHERE run_id = ?;" + try: + row = self.query(query, (run_id,))[0] + return run_id, row["fab_id"], row["fab_version"] + except sqlite3.IntegrityError: + log(ERROR, "`run_id` does not exist.") + return 0, "", "" + def acknowledge_ping(self, node_id: int, ping_interval: float) -> bool: """Acknowledge a ping received from a node, serving as a heartbeat.""" # Update `online_until` and `ping_interval` for the given `node_id` diff --git a/src/py/flwr/server/superlink/state/state.py b/src/py/flwr/server/superlink/state/state.py index b356cd47befa..8b087e3d644a 100644 --- a/src/py/flwr/server/superlink/state/state.py +++ b/src/py/flwr/server/superlink/state/state.py @@ -16,7 +16,7 @@ import abc -from typing import List, Optional, Set +from typing import List, Optional, Set, Tuple from uuid import UUID from flwr.proto.task_pb2 import TaskIns, TaskRes # pylint: disable=E0611 @@ -150,8 +150,26 @@ def get_nodes(self, run_id: int) -> Set[int]: """ @abc.abstractmethod - def create_run(self) -> int: - """Create one run.""" + def create_run(self, fab_id: str, fab_version: str) -> int: + """Create a new run for the specified `fab_id` and `fab_version`.""" + + @abc.abstractmethod + def get_run(self, run_id: int) -> Tuple[int, str, str]: + """Retrieve information about the run with the specified `run_id`. + + Parameters + ---------- + run_id : int + The identifier of the run. + + Returns + ------- + Tuple[int, str, str] + A tuple containing three elements: + - `run_id`: The identifier of the run, same as the specified `run_id`. + - `fab_id`: The identifier of the FAB used in the specified run. + - `fab_version`: The version of the FAB used in the specified run. + """ @abc.abstractmethod def acknowledge_ping(self, node_id: int, ping_interval: float) -> bool: diff --git a/src/py/flwr/server/superlink/state/state_test.py b/src/py/flwr/server/superlink/state/state_test.py index f48f498aed06..281707e16be0 100644 --- a/src/py/flwr/server/superlink/state/state_test.py +++ b/src/py/flwr/server/superlink/state/state_test.py @@ -43,6 +43,20 @@ def state_factory(self) -> State: """Provide state implementation to test.""" raise NotImplementedError() + def test_create_and_get_run(self) -> None: + """Test if create_run and get_run work correctly.""" + # Prepare + state: State = self.state_factory() + run_id = state.create_run("Mock/mock", "v1.0.0") + + # Execute + actual_run_id, fab_id, fab_version = state.get_run(run_id) + + # Assert + assert actual_run_id == run_id + assert fab_id == "Mock/mock" + assert fab_version == "v1.0.0" + def test_get_task_ins_empty(self) -> None: """Validate that a new state has no TaskIns.""" # Prepare @@ -70,7 +84,7 @@ def test_store_task_ins_one(self) -> None: # Prepare consumer_node_id = 1 state = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") task_ins = create_task_ins( consumer_node_id=consumer_node_id, anonymous=False, run_id=run_id ) @@ -105,7 +119,7 @@ def test_store_and_delete_tasks(self) -> None: # Prepare consumer_node_id = 1 state = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") task_ins_0 = create_task_ins( consumer_node_id=consumer_node_id, anonymous=False, run_id=run_id ) @@ -179,7 +193,7 @@ def test_task_ins_store_anonymous_and_retrieve_anonymous(self) -> None: """ # Prepare state: State = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") task_ins = create_task_ins(consumer_node_id=0, anonymous=True, run_id=run_id) # Execute @@ -194,7 +208,7 @@ def test_task_ins_store_anonymous_and_fail_retrieving_identitiy(self) -> None: """Store anonymous TaskIns and fail to retrieve it.""" # Prepare state: State = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") task_ins = create_task_ins(consumer_node_id=0, anonymous=True, run_id=run_id) # Execute @@ -208,7 +222,7 @@ def test_task_ins_store_identity_and_fail_retrieving_anonymous(self) -> None: """Store identity TaskIns and fail retrieving it as anonymous.""" # Prepare state: State = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") task_ins = create_task_ins(consumer_node_id=1, anonymous=False, run_id=run_id) # Execute @@ -222,7 +236,7 @@ def test_task_ins_store_identity_and_retrieve_identity(self) -> None: """Store identity TaskIns and retrieve it.""" # Prepare state: State = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") task_ins = create_task_ins(consumer_node_id=1, anonymous=False, run_id=run_id) # Execute @@ -239,7 +253,7 @@ def test_task_ins_store_delivered_and_fail_retrieving(self) -> None: """Fail retrieving delivered task.""" # Prepare state: State = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") task_ins = create_task_ins(consumer_node_id=1, anonymous=False, run_id=run_id) # Execute @@ -282,7 +296,7 @@ def test_task_res_store_and_retrieve_by_task_ins_id(self) -> None: """Store TaskRes retrieve it by task_ins_id.""" # Prepare state: State = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") task_ins_id = uuid4() task_res = create_task_res( producer_node_id=0, @@ -303,7 +317,7 @@ def test_node_ids_initial_state(self) -> None: """Test retrieving all node_ids and empty initial state.""" # Prepare state: State = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") # Execute retrieved_node_ids = state.get_nodes(run_id) @@ -315,7 +329,7 @@ def test_create_node_and_get_nodes(self) -> None: """Test creating a client node.""" # Prepare state: State = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") node_ids = [] # Execute @@ -331,7 +345,7 @@ def test_delete_node(self) -> None: """Test deleting a client node.""" # Prepare state: State = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") node_id = state.create_node(ping_interval=10) # Execute @@ -345,7 +359,7 @@ def test_get_nodes_invalid_run_id(self) -> None: """Test retrieving all node_ids with invalid run_id.""" # Prepare state: State = self.state_factory() - state.create_run() + state.create_run("mock/mock", "v1.0.0") invalid_run_id = 61016 state.create_node(ping_interval=10) @@ -359,7 +373,7 @@ def test_num_task_ins(self) -> None: """Test if num_tasks returns correct number of not delivered task_ins.""" # Prepare state: State = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") task_0 = create_task_ins(consumer_node_id=0, anonymous=True, run_id=run_id) task_1 = create_task_ins(consumer_node_id=0, anonymous=True, run_id=run_id) @@ -377,7 +391,7 @@ def test_num_task_res(self) -> None: """Test if num_tasks returns correct number of not delivered task_res.""" # Prepare state: State = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") task_0 = create_task_res( producer_node_id=0, anonymous=True, ancestry=["1"], run_id=run_id ) @@ -399,7 +413,7 @@ def test_acknowledge_ping(self) -> None: """Test if acknowledge_ping works and if get_nodes return online nodes.""" # Prepare state: State = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") node_ids = [state.create_node(ping_interval=10) for _ in range(100)] for node_id in node_ids[:70]: state.acknowledge_ping(node_id, ping_interval=30) @@ -418,7 +432,7 @@ def test_node_unavailable_error(self) -> None: """Test if get_task_res return TaskRes containing node unavailable error.""" # Prepare state: State = self.state_factory() - run_id = state.create_run() + run_id = state.create_run("mock/mock", "v1.0.0") node_id_0 = state.create_node(ping_interval=90) node_id_1 = state.create_node(ping_interval=30) # Create and store TaskIns