From c078d3a02ead14d5db69b74a0452001affe5dfb5 Mon Sep 17 00:00:00 2001 From: Robert Steiner Date: Tue, 19 Dec 2023 19:50:21 +0100 Subject: [PATCH 01/12] Fix sphinx warnings (#2721) --- ...sages.rst => contributor-how-to-create-new-messages.rst} | 0 src/py/flwr/simulation/app.py | 6 +++--- 2 files changed, 3 insertions(+), 3 deletions(-) rename doc/source/{contributor-how-create-new-messages.rst => contributor-how-to-create-new-messages.rst} (100%) diff --git a/doc/source/contributor-how-create-new-messages.rst b/doc/source/contributor-how-to-create-new-messages.rst similarity index 100% rename from doc/source/contributor-how-create-new-messages.rst rename to doc/source/contributor-how-to-create-new-messages.rst diff --git a/src/py/flwr/simulation/app.py b/src/py/flwr/simulation/app.py index 0bb9290b6911..c519f5a551f0 100644 --- a/src/py/flwr/simulation/app.py +++ b/src/py/flwr/simulation/app.py @@ -107,8 +107,8 @@ def start_simulation( List `client_id`s for each client. This is only required if `num_clients` is not set. Setting both `num_clients` and `clients_ids` with `len(clients_ids)` not equal to `num_clients` generates an error. - client_resources : Optional[Dict[str, float]] (default: `{"num_cpus": 1, - "num_gpus": 0.0}` CPU and GPU resources for a single client. Supported keys + client_resources : Optional[Dict[str, float]] (default: `{"num_cpus": 1, "num_gpus": 0.0}`) + CPU and GPU resources for a single client. Supported keys are `num_cpus` and `num_gpus`. To understand the GPU utilization caused by `num_gpus`, as well as using custom resources, please consult the Ray documentation. @@ -160,7 +160,7 @@ def start_simulation( ------- hist : flwr.server.history.History Object containing metrics from training. - """ + """ # noqa: E501 # pylint: disable-msg=too-many-locals event( EventType.START_SIMULATION_ENTER, From f6a10f99226f41ad7692ac011fa356faede878ee Mon Sep 17 00:00:00 2001 From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com> Date: Wed, 20 Dec 2023 11:58:15 +0100 Subject: [PATCH 02/12] Migrate TensorFlow quickstart to Flower Datasets (#2318) Co-authored-by: jafermarq --- examples/quickstart-tensorflow/README.md | 8 +++--- examples/quickstart-tensorflow/client.py | 26 +++++++++++++++++-- examples/quickstart-tensorflow/pyproject.toml | 1 + .../quickstart-tensorflow/requirements.txt | 1 + examples/quickstart-tensorflow/run.sh | 2 +- examples/quickstart-tensorflow/server.py | 17 ++++++++++++ 6 files changed, 48 insertions(+), 7 deletions(-) diff --git a/examples/quickstart-tensorflow/README.md b/examples/quickstart-tensorflow/README.md index 7ada48797d03..92d38c9340d7 100644 --- a/examples/quickstart-tensorflow/README.md +++ b/examples/quickstart-tensorflow/README.md @@ -1,7 +1,7 @@ # Flower Example using TensorFlow/Keras -This introductory example to Flower uses Keras but deep knowledge of Keras is not necessarily required to run the example. However, it will help you understanding how to adapt Flower to your use-cases. -Running this example in itself is quite easy. +This introductory example to Flower uses Keras but deep knowledge of Keras is not necessarily required to run the example. However, it will help you understand how to adapt Flower to your use case. +Running this example in itself is quite easy. This example uses [Flower Datasets](https://flower.dev/docs/datasets/) to download, partition and preprocess the CIFAR-10 dataset. ## Project Setup @@ -50,7 +50,7 @@ pip install -r requirements.txt ## Run Federated Learning with TensorFlow/Keras and Flower -Afterwards you are ready to start the Flower server as well as the clients. You can simply start the server in a terminal as follows: +Afterward, you are ready to start the Flower server as well as the clients. You can simply start the server in a terminal as follows: ```shell poetry run python3 server.py @@ -62,7 +62,7 @@ Now you are ready to start the Flower clients which will participate in the lear poetry run python3 client.py ``` -Alternatively you can run all of it in one shell as follows: +Alternatively, you can run all of it in one shell as follows: ```shell poetry run python3 server.py & diff --git a/examples/quickstart-tensorflow/client.py b/examples/quickstart-tensorflow/client.py index fc367e2c3053..d998adbdd899 100644 --- a/examples/quickstart-tensorflow/client.py +++ b/examples/quickstart-tensorflow/client.py @@ -1,16 +1,38 @@ +import argparse import os import flwr as fl import tensorflow as tf - +from flwr_datasets import FederatedDataset # Make TensorFlow log less verbose os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" +# Parse arguments +parser = argparse.ArgumentParser(description="Flower") +parser.add_argument( + "--node-id", + type=int, + choices=[0, 1, 2], + required=True, + help="Partition of the dataset (0,1 or 2). " + "The dataset is divided into 3 partitions created artificially.", +) +args = parser.parse_args() + # Load model and data (MobileNetV2, CIFAR-10) model = tf.keras.applications.MobileNetV2((32, 32, 3), classes=10, weights=None) model.compile("adam", "sparse_categorical_crossentropy", metrics=["accuracy"]) -(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data() + +# Download and partition dataset +fds = FederatedDataset(dataset="cifar10", partitioners={"train": 3}) +partition = fds.load_partition(args.node_id, "train") +partition.set_format("numpy") + +# Divide data on each node: 80% train, 20% test +partition = partition.train_test_split(test_size=0.2) +x_train, y_train = partition["train"]["img"] / 255.0, partition["train"]["label"] +x_test, y_test = partition["test"]["img"] / 255.0, partition["test"]["label"] # Define Flower client diff --git a/examples/quickstart-tensorflow/pyproject.toml b/examples/quickstart-tensorflow/pyproject.toml index 68d4f9aada52..e027a7353181 100644 --- a/examples/quickstart-tensorflow/pyproject.toml +++ b/examples/quickstart-tensorflow/pyproject.toml @@ -11,5 +11,6 @@ authors = ["The Flower Authors "] [tool.poetry.dependencies] python = ">=3.8,<3.11" flwr = ">=1.0,<2.0" +flwr-datasets = { extras = ["vision"], version = ">=0.0.2,<1.0.0" } tensorflow-cpu = {version = ">=2.9.1,<2.11.1 || >2.11.1", markers = "platform_machine == \"x86_64\""} tensorflow-macos = {version = ">=2.9.1,<2.11.1 || >2.11.1", markers = "sys_platform == \"darwin\" and platform_machine == \"arm64\""} diff --git a/examples/quickstart-tensorflow/requirements.txt b/examples/quickstart-tensorflow/requirements.txt index 6420aab25ec8..7f025975cae9 100644 --- a/examples/quickstart-tensorflow/requirements.txt +++ b/examples/quickstart-tensorflow/requirements.txt @@ -1,3 +1,4 @@ flwr>=1.0, <2.0 +flwr-datasets[vision]>=0.0.2, <1.0.0 tensorflow-macos>=2.9.1, != 2.11.1 ; sys_platform == "darwin" and platform_machine == "arm64" tensorflow-cpu>=2.9.1, != 2.11.1 ; platform_machine == "x86_64" diff --git a/examples/quickstart-tensorflow/run.sh b/examples/quickstart-tensorflow/run.sh index c64f362086aa..439abea8df4b 100755 --- a/examples/quickstart-tensorflow/run.sh +++ b/examples/quickstart-tensorflow/run.sh @@ -6,7 +6,7 @@ sleep 3 # Sleep for 3s to give the server enough time to start for i in `seq 0 1`; do echo "Starting client $i" - python client.py & + python client.py --node-id $i & done # This will allow you to use CTRL+C to stop all background processes diff --git a/examples/quickstart-tensorflow/server.py b/examples/quickstart-tensorflow/server.py index 39c350388c1b..fe691a88aba0 100644 --- a/examples/quickstart-tensorflow/server.py +++ b/examples/quickstart-tensorflow/server.py @@ -1,8 +1,25 @@ +from typing import List, Tuple + import flwr as fl +from flwr.common import Metrics + + +# Define metric aggregation function +def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: + # Multiply accuracy of each client by number of examples used + accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics] + examples = [num_examples for num_examples, _ in metrics] + + # Aggregate and return custom metric (weighted average) + return {"accuracy": sum(accuracies) / sum(examples)} + +# Define strategy +strategy = fl.server.strategy.FedAvg(evaluate_metrics_aggregation_fn=weighted_average) # Start Flower server fl.server.start_server( server_address="0.0.0.0:8080", config=fl.server.ServerConfig(num_rounds=3), + strategy=strategy, ) From 5c710567d775db98ac02b8026a323a65a0e2dd5f Mon Sep 17 00:00:00 2001 From: Robert Steiner Date: Wed, 20 Dec 2023 16:23:38 +0100 Subject: [PATCH 03/12] Pin update PR branch action to commit sha (#2731) --- .github/workflows/update-pr.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update-pr.yml b/.github/workflows/update-pr.yml index 78ef5bc86772..64b16aeabebf 100644 --- a/.github/workflows/update-pr.yml +++ b/.github/workflows/update-pr.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Automatically update mergeable PRs - uses: adRise/update-pr-branch@v0.7.0 + uses: adRise/update-pr-branch@cd305ecbd76bf63056c9400ce2c725293fc3e0c0 # v0.7.0 with: token: ${{ secrets.FLWRMACHINE_TOKEN }} base: 'main' From e8decbf0a51b4eecad5760b64c16a1eb46d9aac8 Mon Sep 17 00:00:00 2001 From: "Daniel J. Beutel" Date: Wed, 20 Dec 2023 16:45:35 +0100 Subject: [PATCH 04/12] Update CODEOWNERS (#2733) --- .github/CODEOWNERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index a5eadadf8604..5191bc8625da 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -5,3 +5,6 @@ # Flower Baselines /baselines @jafermarq @tanertopal @danieljanes + +# Flower Examples +/examples @jafermarq @tanertopal @danieljanes From 41cc357415fefc09737955ce1707589cba638b72 Mon Sep 17 00:00:00 2001 From: Robert Steiner Date: Wed, 20 Dec 2023 16:54:06 +0100 Subject: [PATCH 05/12] Skip upload wheel step for dependabot (#2732) --- .github/workflows/e2e.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index a5121ad71b38..ed7535409fa4 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -30,12 +30,12 @@ jobs: - name: Test wheel run: ./dev/test-wheel.sh - name: Upload wheel - if: ${{ github.repository == 'adap/flower' && !github.event.pull_request.head.repo.fork }} + if: ${{ github.repository == 'adap/flower' && !github.event.pull_request.head.repo.fork && github.actor != 'dependabot[bot]' }} id: upload env: - AWS_DEFAULT_REGION: ${{ secrets. AWS_DEFAULT_REGION }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets. AWS_SECRET_ACCESS_KEY }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} run: | cd ./dist echo "WHL_PATH=$(ls *.whl)" >> "$GITHUB_OUTPUT" @@ -73,7 +73,7 @@ jobs: dataset: | import tensorflow as tf tf.keras.datasets.cifar10.load_data() - + - directory: tabnet dataset: | import tensorflow_datasets as tfds @@ -83,7 +83,7 @@ jobs: dataset: | from torchvision.datasets import CIFAR10 CIFAR10('./data', download=True) - + - directory: pytorch-lightning dataset: | from torchvision.datasets import MNIST @@ -102,7 +102,7 @@ jobs: - directory: fastai dataset: | from fastai.vision.all import untar_data, URLs - untar_data(URLs.MNIST) + untar_data(URLs.MNIST) - directory: pandas dataset: | From 1bf2ac3580426eeb9cd2f02d8da13935bc568a52 Mon Sep 17 00:00:00 2001 From: Pritam Neog Date: Wed, 20 Dec 2023 21:32:13 +0530 Subject: [PATCH 06/12] Make `node-id` required for quickstart-pytorch (#2712) Co-authored-by: jafermarq --- examples/quickstart-pytorch/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/quickstart-pytorch/client.py b/examples/quickstart-pytorch/client.py index ad57645002f8..1edb42d1ec81 100644 --- a/examples/quickstart-pytorch/client.py +++ b/examples/quickstart-pytorch/client.py @@ -99,6 +99,7 @@ def apply_transforms(batch): parser.add_argument( "--node-id", choices=[0, 1, 2], + required=True, type=int, help="Partition of the dataset divided into 3 iid partitions created artificially.", ) From 440b21dbc03e05c0469aadaa3cfe8ce43e5ce42d Mon Sep 17 00:00:00 2001 From: Robert Steiner Date: Wed, 20 Dec 2023 17:12:24 +0100 Subject: [PATCH 07/12] How to build docker images locally (#2701) --- .github/workflows/docker-server.yml | 8 +- ...contributor-how-to-build-docker-images.rst | 135 ++++++++++++++++++ doc/source/index.rst | 1 + src/docker/server/Dockerfile | 5 +- 4 files changed, 143 insertions(+), 6 deletions(-) create mode 100644 doc/source/contributor-how-to-build-docker-images.rst diff --git a/.github/workflows/docker-server.yml b/.github/workflows/docker-server.yml index 8233b5206d49..f580a8e9a280 100644 --- a/.github/workflows/docker-server.yml +++ b/.github/workflows/docker-server.yml @@ -7,8 +7,8 @@ on: description: "Version of Flower e.g. (1.6.0)." required: true type: string - base-image-version: - description: "Version of the Flower base image." + base-image-tag: + description: "The tag of the Flower base image." required: false type: string default: "py3.11-ubuntu22.04" @@ -27,9 +27,9 @@ jobs: file-dir: src/docker/server build-args: | FLWR_VERSION=${{ github.event.inputs.flwr-version }} - BASE_IMAGE_VERSION=${{ github.event.inputs.base-image-version }} + BASE_IMAGE_TAG=${{ github.event.inputs.base-image-tag }} tags: | - ${{ github.event.inputs.flwr-version }}-${{ github.event.inputs.base-image-version }} + ${{ github.event.inputs.flwr-version }}-${{ github.event.inputs.base-image-tag }} ${{ github.event.inputs.flwr-version }} latest secrets: diff --git a/doc/source/contributor-how-to-build-docker-images.rst b/doc/source/contributor-how-to-build-docker-images.rst new file mode 100644 index 000000000000..d85e48155de0 --- /dev/null +++ b/doc/source/contributor-how-to-build-docker-images.rst @@ -0,0 +1,135 @@ +How to build Docker Flower images locally +========================================= + +Flower provides pre-made docker images on `Docker Hub `_ +that include all necessary dependencies for running the server. You can also build your own custom +docker images from scratch with a different version of Python or Ubuntu if that is what you need. +In this guide, we will explain what images exist and how to build them locally. + +Before we can start, we need to meet a few prerequisites in our local development environment. + +#. Clone the flower repository. + + .. code-block:: bash + + $ git clone https://github.com/adap/flower.git && cd flower + +#. Verify the Docker daemon is running. + + Please follow the first section on + `Run Flower using Docker `_ + which covers this step in more detail. + +Currently, Flower provides two images, a base image and a server image. There will also be a client +image soon. The base image, as the name suggests, contains basic dependencies that both the server +and the client need. This includes system dependencies, Python and Python tools. The server image is +based on the base image, but it additionally installs the Flower server using ``pip``. + +The build instructions that assemble the images are located in the respective Dockerfiles. You +can find them in the subdirectories of ``src/docker``. + +Both, base and server image are configured via build arguments. Through build arguments, we can make +our build more flexible. For example, in the base image, we can specify the version of Python to +install using the ``PYTHON_VERSION`` build argument. Some of the build arguments have default +values, others must be specified when building the image. All available build arguments for each +image are listed in one of the tables below. + +Building the base image +----------------------- + +.. list-table:: + :widths: 25 45 15 15 + :header-rows: 1 + + * - Build argument + - Description + - Required + - Example + * - ``PYTHON_VERSION`` + - Version of ``python`` to be installed. + - Yes + - ``3.11`` + * - ``PIP_VERSION`` + - Version of ``pip`` to be installed. + - Yes + - ``23.0.1`` + * - ``SETUPTOOLS_VERSION`` + - Version of ``setuptools`` to be installed. + - Yes + - ``69.0.2`` + * - ``UBUNTU_VERSION`` + - Version of the official Ubuntu Docker image. + - Defaults to ``22.04``. + - + +The following example creates a base image with Python 3.11.0, pip 23.0.1 and setuptools 69.0.2: + +.. code-block:: bash + + $ cd src/docker/base/ + $ docker build \ + --build-arg PYTHON_VERSION=3.11.0 \ + --build-arg PIP_VERSION=23.0.1 \ + --build-arg SETUPTOOLS_VERSION=69.0.2 \ + -t flwr_base:0.1.0 . + +The name of image is ``flwr_base`` and the tag ``0.1.0``. Remember that the build arguments as well +as the name and tag can be adapted to your needs. These values serve as examples only. + +Building the server image +------------------------- + +.. list-table:: + :widths: 25 45 15 15 + :header-rows: 1 + + * - Build argument + - Description + - Required + - Example + * - ``BASE_REPOSITORY`` + - The repository name of the base image. + - Defaults to ``flwr/server``. + - + * - ``BASE_IMAGE_TAG`` + - The image tag of the base image. + - Defaults to ``py3.11-ubuntu22.04``. + - + * - ``FLWR_VERSION`` + - Version of Flower to be installed. + - Yes + - ``1.6.0`` + +The following example creates a server image with the official Flower base image py3.11-ubuntu22.04 +and Flower 1.6.0: + +.. code-block:: bash + + $ cd src/docker/server/ + $ docker build \ + --build-arg BASE_IMAGE_TAG=py3.11-ubuntu22.04 \ + --build-arg FLWR_VERSION=1.6.0 \ + -t flwr_server:0.1.0 . + +The name of image is ``flwr_server`` and the tag ``0.1.0``. Remember that the build arguments as well +as the name and tag can be adapted to your needs. These values serve as examples only. + +If you want to use your own base image instead of the official Flower base image, all you need to do +is set the ``BASE_REPOSITORY`` and ``BASE_IMAGE_TAG`` build arguments. The value of +``BASE_REPOSITORY`` must match the name of your image and the value of ``BASE_IMAGE_TAG`` must match +the tag of your image. + +.. code-block:: bash + + $ cd src/docker/server/ + $ docker build \ + --build-arg BASE_REPOSITORY=flwr_base \ + --build-arg BASE_IMAGE_TAG=0.1.0 \ + --build-arg FLWR_VERSION=1.6.0 \ + -t flwr_server:0.1.0 . + +After creating the image, we can test whether the image is working: + +.. code-block:: bash + + $ docker run --rm flwr_server:0.1.0 --help diff --git a/doc/source/index.rst b/doc/source/index.rst index f7a4ec3daeda..c4e91b100cc0 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -167,6 +167,7 @@ The Flower community welcomes contributions. The following docs are intended to contributor-how-to-write-documentation contributor-how-to-release-flower contributor-how-to-contribute-translations + contributor-how-to-build-docker-images .. toctree:: :maxdepth: 1 diff --git a/src/docker/server/Dockerfile b/src/docker/server/Dockerfile index 9bf3214bb42c..c42246b16104 100644 --- a/src/docker/server/Dockerfile +++ b/src/docker/server/Dockerfile @@ -1,7 +1,8 @@ # Copyright 2023 Flower Labs GmbH. All Rights Reserved. -ARG BASE_IMAGE_VERSION=py3.11-ubuntu22.04 -FROM flwr/base:$BASE_IMAGE_VERSION as server +ARG BASE_REPOSITORY=flwr/base +ARG BASE_IMAGE_TAG=py3.11-ubuntu22.04 +FROM $BASE_REPOSITORY:$BASE_IMAGE_TAG as server WORKDIR /app ARG FLWR_VERSION From 3091630d502d823c97ebb5df5a1b11960e416d4d Mon Sep 17 00:00:00 2001 From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com> Date: Wed, 20 Dec 2023 17:20:43 +0100 Subject: [PATCH 08/12] Strip pycharm metadata in the format.sh script (#2734) --- dev/format.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/format.sh b/dev/format.sh index a5bc32915545..6b9cdaf5f44c 100755 --- a/dev/format.sh +++ b/dev/format.sh @@ -18,7 +18,7 @@ python -m docformatter -i -r examples # Notebooks python -m black --ipynb -q doc/source/*.ipynb -KEYS="metadata.celltoolbar metadata.language_info metadata.toc metadata.notify_time metadata.varInspector metadata.accelerator metadata.vscode cell.metadata.id cell.metadata.heading_collapsed cell.metadata.hidden cell.metadata.code_folding cell.metadata.tags cell.metadata.init_cell cell.metadata.vscode" +KEYS="metadata.celltoolbar metadata.language_info metadata.toc metadata.notify_time metadata.varInspector metadata.accelerator metadata.vscode cell.metadata.id cell.metadata.heading_collapsed cell.metadata.hidden cell.metadata.code_folding cell.metadata.tags cell.metadata.init_cell cell.metadata.vscode cell.metadata.pycharm" python -m nbstripout doc/source/*.ipynb --extra-keys "$KEYS" python -m nbstripout examples/*/*.ipynb --extra-keys "$KEYS" From 789d511daa1729dfb68d77ba10fda1de03905a88 Mon Sep 17 00:00:00 2001 From: Robert Steiner Date: Wed, 20 Dec 2023 17:30:31 +0100 Subject: [PATCH 09/12] Docker server image docs (#2695) --- doc/source/how-to-install-flower.rst | 7 +- doc/source/how-to-run-flower-using-docker.rst | 144 ++++++++++++++++++ doc/source/index.rst | 1 + 3 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 doc/source/how-to-run-flower-using-docker.rst diff --git a/doc/source/how-to-install-flower.rst b/doc/source/how-to-install-flower.rst index b2efde176fc9..1107f6798b23 100644 --- a/doc/source/how-to-install-flower.rst +++ b/doc/source/how-to-install-flower.rst @@ -23,7 +23,7 @@ For simulations that use the Virtual Client Engine, ``flwr`` should be installed Verify installation ------------------- -The following command can be used to verfiy if Flower was successfully installed. If everything worked, it should print the version of Flower to the command line:: +The following command can be used to verify if Flower was successfully installed. If everything worked, it should print the version of Flower to the command line:: python -c "import flwr;print(flwr.__version__)" 1.5.0 @@ -32,6 +32,11 @@ The following command can be used to verfiy if Flower was successfully installed Advanced installation options ----------------------------- +Install via Docker +~~~~~~~~~~~~~~~~~~ + +`How to run Flower using Docker `_ + Install pre-release ~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/how-to-run-flower-using-docker.rst b/doc/source/how-to-run-flower-using-docker.rst new file mode 100644 index 000000000000..27ff61c280cb --- /dev/null +++ b/doc/source/how-to-run-flower-using-docker.rst @@ -0,0 +1,144 @@ +Run Flower using Docker +==================== + +The simplest way to get started with Flower is by using the pre-made Docker images, which you can +find on `Docker Hub `_. + +Before you start, make sure that the Docker daemon is running: + +.. code-block:: bash + + $ docker -v + Docker version 24.0.7, build afdd53b + +If you do not see the version of Docker but instead get an error saying that the command +was not found, you will need to install Docker first. You can find installation instruction +`here `_. + +.. note:: + + On Linux, Docker commands require ``sudo`` privilege. If you want to avoid using ``sudo``, + you can follow the `Post-installation steps `_ + on the official Docker website. + +Flower server +------------- + +Quickstart +~~~~~~~~~~ + +If you're looking to try out Flower, you can use the following command: + +.. code-block:: bash + + $ docker run --rm -p 9091:9091 -p 9092:9092 flwr/server:1.6.0-py3.11-ubuntu22.04 \ + --insecure + +The command will pull the Docker image with the tag ``1.6.0-py3.11-ubuntu22.04`` from Docker Hub. +The tag contains the information which Flower, Python and Ubuntu is used. In this case, it +uses Flower 1.6.0, Python 3.11 and Ubuntu 22.04. The ``--rm`` flag tells Docker to remove +the container after it exits. + +.. note:: + + By default, the Flower server keeps state in-memory. When using the Docker flag + ``--rm``, the state is not persisted between container starts. We will show below how to save the + state in a file on your host system. + +The ``-p :`` flag tells Docker to map the ports ``9091``/``9092`` of the host to +``9091``/``9092`` of the container, allowing you to access the Driver API on ``http://localhost:9091`` +and the Fleet API on ``http://localhost:9092``. Lastly, any flag that comes after the tag is passed +to the Flower server. Here, we are passing the flag ``--insecure``. + +.. attention:: + + The ``--insecure`` flag enables insecure communication (using HTTP, not HTTPS) and should only be used + for testing purposes. We strongly recommend enabling + `SSL `_ + when deploying to a production environment. + +You can use ``--help`` to view all available flags that the server supports: + +.. code-block:: bash + + $ docker run --rm flwr/server:1.6.0-py3.11-ubuntu22.04 --help + +Mounting a volume to store the state on the host system +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you want to persist the state of the server on your host system, all you need to do is specify a +path where you want to save the file on your host system and a name for the database file. In the +example below, we tell Docker via the flag ``-v`` to mount the user's home directory +(``~/`` on your host) into the ``/app/`` directory of the container. Furthermore, we use the +flag ``--database`` to specify the name of the database file. + +.. code-block:: bash + + $ docker run --rm \ + -p 9091:9091 -p 9092:9092 -v ~/:/app/ flwr/server:1.6.0-py3.11-ubuntu22.04 \ + --insecure \ + --database state.db + +As soon as the server starts, the file ``state.db`` is created in the user's home directory on +your host system. If the file already exists, the server tries to restore the state from the file. +To start the server with an empty database, simply remove the ``state.db`` file. + +Enabling SSL for secure connections +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To enable SSL, you will need a CA certificate, a server certificate and a server private key. + +.. note:: + For testing purposes, you can generate your own self-signed certificates. The + `Enable SSL connections `_ + page contains a section that will guide you through the process. + +Assuming all files we need are in the local ``certificates`` directory, we can use the flag +``-v`` to mount the local directory into the ``/app/`` directory of the container. This allows the +server to access the files within the container. Finally, we pass the names of the certificates to +the server with the ``--certificates`` flag. + +.. code-block:: bash + + $ docker run --rm \ + -p 9091:9091 -p 9092:9092 -v ./certificates/:/app/ flwr/server:1.6.0-py3.11-ubuntu22.04 \ + --certificates ca.crt server.pem server.key + +Using a different Flower or Python version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you want to use a different version of Flower or Python, you can do so by changing the tag. +All versions we provide are available on `Docker Hub `_. + +Pinning a Docker image to a specific version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It may happen that we update the images behind the tags. Such updates usually include security +updates of system dependencies that should not change the functionality of Flower. However, if you +want to ensure that you always use the same image, you can specify the hash of the image instead of +the tag. + +The following command returns the current image hash referenced by the ``server:1.6.0-py3.11-ubuntu22.04`` tag: + +.. code-block:: bash + + $ docker inspect --format='{{index .RepoDigests 0}}' flwr/server:1.6.0-py3.11-ubuntu22.04 + flwr/server@sha256:43fc389bcb016feab2b751b2ccafc9e9a906bb0885bd92b972329801086bc017 + +Next, we can pin the hash when running a new server container: + +.. code-block:: bash + + $ docker run \ + --rm flwr/server@sha256:43fc389bcb016feab2b751b2ccafc9e9a906bb0885bd92b972329801086bc017 \ + --insecure + +Setting environment variables +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To set a variable inside a Docker container, you can use the ``-e =`` flag. + +.. code-block:: bash + + $ docker run -e FLWR_TELEMETRY_ENABLED=0 \ + --rm flwr/server:1.6.0-py3.11-ubuntu22.04 --insecure diff --git a/doc/source/index.rst b/doc/source/index.rst index c4e91b100cc0..5df591d6ce05 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -92,6 +92,7 @@ Problem-oriented how-to guides show step-by-step how to achieve a specific goal. how-to-enable-ssl-connections how-to-upgrade-to-flower-1.0 how-to-use-built-in-middleware-layers + how-to-run-flower-using-docker .. toctree:: :maxdepth: 1 From 089cd30d8f002bd19170a39ab0bab5ebb4708369 Mon Sep 17 00:00:00 2001 From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com> Date: Wed, 20 Dec 2023 17:59:40 +0100 Subject: [PATCH 10/12] Migrate PyTorch Lightning to use Flower Datasets (#2456) Co-authored-by: jafermarq Co-authored-by: Taner Topal --- .../quickstart-pytorch-lightning/README.md | 77 ++++++++++++++++++- .../quickstart-pytorch-lightning/client.py | 25 +++++- .../quickstart-pytorch-lightning/mnist.py | 56 ++++++++++---- .../pyproject.toml | 1 + .../requirements.txt | 1 + examples/quickstart-pytorch-lightning/run.sh | 4 +- .../quickstart-pytorch-lightning/server.py | 2 +- 7 files changed, 143 insertions(+), 23 deletions(-) diff --git a/examples/quickstart-pytorch-lightning/README.md b/examples/quickstart-pytorch-lightning/README.md index 360efb8f6261..1287b50bca65 100644 --- a/examples/quickstart-pytorch-lightning/README.md +++ b/examples/quickstart-pytorch-lightning/README.md @@ -1 +1,76 @@ -# Flower Examples using PyTorch Lightning +# Flower Example using PyTorch Lightning + +This introductory example to Flower uses PyTorch, but deep knowledge of PyTorch Lightning is not necessarily required to run the example. However, it will help you understand how to adapt Flower to your use case. Running this example in itself is quite easy. This example uses [Flower Datasets](https://flower.dev/docs/datasets/) to download, partition and preprocess the MNIST dataset. + +## Project Setup + +Start by cloning the example project. We prepared a single-line command that you can copy into your shell which will checkout the example for you: + +```shell +git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/quickstart-pytorch-lightning . && rm -rf flower && cd quickstart-pytorch-lightning +``` + +This will create a new directory called `quickstart-pytorch-lightning` containing the following files: + +```shell +-- pyproject.toml +-- requirements.txt +-- client.py # client-side code +-- server.py # server-side code (including the strategy) +-- README.md +-- run.sh # runs server, then two clients +-- mnist.py # run a centralised version of this example +``` + +### Installing Dependencies + +Project dependencies (such as `torch` and `flwr`) are defined in `pyproject.toml` and `requirements.txt`. We recommend [Poetry](https://python-poetry.org/docs/) to install those dependencies and manage your virtual environment ([Poetry installation](https://python-poetry.org/docs/#installation)) or [pip](https://pip.pypa.io/en/latest/development/), but feel free to use a different way of installing dependencies and managing virtual environments if you have other preferences. + +#### Poetry + +```shell +poetry install +poetry shell +``` + +Poetry will install all your dependencies in a newly created virtual environment. To verify that everything works correctly you can run the following command: + +```shell +poetry run python -c "import flwr" +``` + +If you don't see any errors you're good to go! + +#### pip + +Write the command below in your terminal to install the dependencies according to the configuration file requirements.txt. + +```shell +pip install -r requirements.txt +``` + +## Run Federated Learning with PyTorch and Flower + +Afterwards you are ready to start the Flower server as well as the clients. You can simply start the server in a terminal as follows: + +```shell +python server.py +``` + +Now you are ready to start the Flower clients which will participate in the learning. We need to specify the node id to +use different partitions of the data on different nodes. To do so simply open two more terminal windows and run the +following commands. + +Start client 1 in the first terminal: + +```shell +python client.py --node-id 0 +``` + +Start client 2 in the second terminal: + +```shell +python client.py --node-id 1 +``` + +You will see that PyTorch is starting a federated training. Look at the [code](https://github.com/adap/flower/tree/main/examples/quickstart-pytorch) for a detailed explanation. diff --git a/examples/quickstart-pytorch-lightning/client.py b/examples/quickstart-pytorch-lightning/client.py index e810d639974d..8e07494b6492 100644 --- a/examples/quickstart-pytorch-lightning/client.py +++ b/examples/quickstart-pytorch-lightning/client.py @@ -1,9 +1,14 @@ -import flwr as fl -import mnist -import pytorch_lightning as pl +import argparse from collections import OrderedDict + +import pytorch_lightning as pl import torch +from datasets.utils.logging import disable_progress_bar +import flwr as fl +import mnist + +disable_progress_bar() class FlowerClient(fl.client.NumPyClient): def __init__(self, model, train_loader, val_loader, test_loader): @@ -50,9 +55,21 @@ def _set_parameters(model, parameters): def main() -> None: + + parser = argparse.ArgumentParser(description="Flower") + parser.add_argument( + "--node-id", + type=int, + choices=range(0, 10), + required=True, + help="Specifies the artificial data partition", + ) + args = parser.parse_args() + node_id = args.node_id + # Model and data model = mnist.LitAutoEncoder() - train_loader, val_loader, test_loader = mnist.load_data() + train_loader, val_loader, test_loader = mnist.load_data(node_id) # Flower client client = FlowerClient(model, train_loader, val_loader, test_loader) diff --git a/examples/quickstart-pytorch-lightning/mnist.py b/examples/quickstart-pytorch-lightning/mnist.py index c8f8374ecc04..d32a0afe2d1e 100644 --- a/examples/quickstart-pytorch-lightning/mnist.py +++ b/examples/quickstart-pytorch-lightning/mnist.py @@ -3,14 +3,13 @@ Source: pytorchlightning.ai (2021/02/04) """ - +from flwr_datasets import FederatedDataset +import pytorch_lightning as pl import torch from torch import nn from torch.nn import functional as F -from torch.utils.data import DataLoader, random_split +from torch.utils.data import DataLoader from torchvision import transforms -from torchvision.datasets import MNIST -import pytorch_lightning as pl class LitAutoEncoder(pl.LightningModule): @@ -60,25 +59,52 @@ def _evaluate(self, batch, stage=None): self.log(f"{stage}_loss", loss, prog_bar=True) -def load_data(): - # Training / validation set - trainset = MNIST("", train=True, download=True, transform=transforms.ToTensor()) - mnist_train, mnist_val = random_split(trainset, [55000, 5000]) - train_loader = DataLoader(mnist_train, batch_size=32, shuffle=True, num_workers=16) - val_loader = DataLoader(mnist_val, batch_size=32, shuffle=False, num_workers=16) +def collate_fn(batch): + """Change the dictionary to tuple to keep the exact dataloader behavior.""" + images = [item["image"] for item in batch] + labels = [item["label"] for item in batch] + + images_tensor = torch.stack(images) + labels_tensor = torch.tensor(labels) + + return images_tensor, labels_tensor + + +def apply_transforms(batch): + """Apply transforms to the partition from FederatedDataset.""" + batch["image"] = [transforms.functional.to_tensor(img) for img in batch["image"]] + return batch + - # Test set - testset = MNIST("", train=False, download=True, transform=transforms.ToTensor()) - test_loader = DataLoader(testset, batch_size=32, shuffle=False, num_workers=16) +def load_data(partition): + fds = FederatedDataset(dataset="mnist", partitioners={"train": 10}) + partition = fds.load_partition(partition, "train") - return train_loader, val_loader, test_loader + partition = partition.with_transform(apply_transforms) + # 20 % for on federated evaluation + partition_full = partition.train_test_split(test_size=0.2) + # 60 % for the federated train and 20 % for the federated validation (both in fit) + partition_train_valid = partition_full["train"].train_test_split(train_size=0.75) + trainloader = DataLoader( + partition_train_valid["train"], batch_size=32, + shuffle=True, collate_fn=collate_fn, num_workers=1 + ) + valloader = DataLoader( + partition_train_valid["test"], batch_size=32, + collate_fn=collate_fn, num_workers=1 + ) + testloader = DataLoader( + partition_full["test"], batch_size=32, + collate_fn=collate_fn, num_workers=1 + ) + return trainloader, valloader, testloader def main() -> None: """Centralized training.""" # Load data - train_loader, val_loader, test_loader = load_data() + train_loader, val_loader, test_loader = load_data(0) # Load model model = LitAutoEncoder() diff --git a/examples/quickstart-pytorch-lightning/pyproject.toml b/examples/quickstart-pytorch-lightning/pyproject.toml index 0a1e1376b8cb..853ef9c1646f 100644 --- a/examples/quickstart-pytorch-lightning/pyproject.toml +++ b/examples/quickstart-pytorch-lightning/pyproject.toml @@ -12,5 +12,6 @@ authors = ["The Flower Authors "] python = "^3.8" flwr = ">=1.0,<2.0" # flwr = { path = "../../", develop = true } # Development +flwr-datasets = { extras = ["vision"], version = ">=0.0.2,<1.0.0" } pytorch-lightning = "1.6.0" torchvision = "0.14.1" diff --git a/examples/quickstart-pytorch-lightning/requirements.txt b/examples/quickstart-pytorch-lightning/requirements.txt index 1cd0b31fa0b5..6530dcc8c52c 100644 --- a/examples/quickstart-pytorch-lightning/requirements.txt +++ b/examples/quickstart-pytorch-lightning/requirements.txt @@ -1,3 +1,4 @@ flwr>=1.0, <2.0 +flwr-datasets[vision]>=0.0.2, <1.0.0 pytorch_lightning>=1.4.7 torchvision==0.14.1 diff --git a/examples/quickstart-pytorch-lightning/run.sh b/examples/quickstart-pytorch-lightning/run.sh index 2b6507bc154c..60893a9a055b 100755 --- a/examples/quickstart-pytorch-lightning/run.sh +++ b/examples/quickstart-pytorch-lightning/run.sh @@ -4,9 +4,9 @@ echo "Starting server" python server.py & sleep 3 # Sleep for 3s to give the server enough time to start -for i in `seq 0 1`; do +for i in $(seq 0 1); do echo "Starting client $i" - python client.py & + python client.py --node-id "${i}" & done # This will allow you to use CTRL+C to stop all background processes diff --git a/examples/quickstart-pytorch-lightning/server.py b/examples/quickstart-pytorch-lightning/server.py index 370186ae1d98..a104a1fffd26 100644 --- a/examples/quickstart-pytorch-lightning/server.py +++ b/examples/quickstart-pytorch-lightning/server.py @@ -11,7 +11,7 @@ def main() -> None: # Start Flower server for three rounds of federated learning fl.server.start_server( server_address="0.0.0.0:8080", - config=fl.server.ServerConfig(num_rounds=10), + config=fl.server.ServerConfig(num_rounds=3), strategy=strategy, ) From a94fc65b54ec8d688275aaf2e297ecd96344731d Mon Sep 17 00:00:00 2001 From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com> Date: Wed, 20 Dec 2023 18:48:11 +0100 Subject: [PATCH 11/12] Migrate pytorch-from-centralized-to-federated to use Flower Datasets (#2457) Co-authored-by: jafermarq --- .../README.md | 2 +- .../__init__.py | 0 .../cifar.py | 60 ++++++++++--------- .../client.py | 48 +++++++-------- .../pyproject.toml | 1 + .../requirements.txt | 1 + .../run.sh | 4 +- .../server.py | 28 +++++++-- 8 files changed, 84 insertions(+), 60 deletions(-) delete mode 100644 examples/pytorch-from-centralized-to-federated/__init__.py diff --git a/examples/pytorch-from-centralized-to-federated/README.md b/examples/pytorch-from-centralized-to-federated/README.md index 40f7f40e5adc..fccb14158ecd 100644 --- a/examples/pytorch-from-centralized-to-federated/README.md +++ b/examples/pytorch-from-centralized-to-federated/README.md @@ -2,7 +2,7 @@ This example demonstrates how an already existing centralized PyTorch-based machine learning project can be federated with Flower. -This introductory example for Flower uses PyTorch, but you're not required to be a PyTorch expert to run the example. The example will help you to understand how Flower can be used to build federated learning use cases based on existing machine learning projects. +This introductory example for Flower uses PyTorch, but you're not required to be a PyTorch expert to run the example. The example will help you to understand how Flower can be used to build federated learning use cases based on existing machine learning projects. This example uses [Flower Datasets](https://flower.dev/docs/datasets/) to download, partition and preprocess the CIFAR-10 dataset. ## Project Setup diff --git a/examples/pytorch-from-centralized-to-federated/__init__.py b/examples/pytorch-from-centralized-to-federated/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/examples/pytorch-from-centralized-to-federated/cifar.py b/examples/pytorch-from-centralized-to-federated/cifar.py index 3c1d67d2f445..a374909c33b2 100644 --- a/examples/pytorch-from-centralized-to-federated/cifar.py +++ b/examples/pytorch-from-centralized-to-federated/cifar.py @@ -6,22 +6,20 @@ https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html """ - # mypy: ignore-errors # pylint: disable=W0223 -from typing import Tuple, Dict +from typing import Tuple import torch import torch.nn as nn import torch.nn.functional as F -import torchvision -import torchvision.transforms as transforms from torch import Tensor -from torchvision.datasets import CIFAR10 +from torch.utils.data import DataLoader +from torchvision.transforms import Compose, ToTensor, Normalize -DATA_ROOT = "./dataset" +from flwr_datasets import FederatedDataset # pylint: disable=unsubscriptable-object @@ -53,26 +51,32 @@ def forward(self, x: Tensor) -> Tensor: return x -def load_data() -> ( - Tuple[torch.utils.data.DataLoader, torch.utils.data.DataLoader, Dict] -): - """Load CIFAR-10 (training and test set).""" - transform = transforms.Compose( - [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] +def load_data(node_id: int): + """Load partition CIFAR10 data.""" + fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10}) + partition = fds.load_partition(node_id) + # Divide data on each node: 80% train, 20% test + partition_train_test = partition.train_test_split(test_size=0.2) + pytorch_transforms = Compose( + [ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] ) - trainset = CIFAR10(DATA_ROOT, train=True, download=True, transform=transform) - trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True) - testset = CIFAR10(DATA_ROOT, train=False, download=True, transform=transform) - testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False) - num_examples = {"trainset": len(trainset), "testset": len(testset)} - return trainloader, testloader, num_examples + + def apply_transforms(batch): + """Apply transforms to the partition from FederatedDataset.""" + batch["img"] = [pytorch_transforms(img) for img in batch["img"]] + return batch + + partition_train_test = partition_train_test.with_transform(apply_transforms) + trainloader = DataLoader(partition_train_test["train"], batch_size=32, shuffle=True) + testloader = DataLoader(partition_train_test["test"], batch_size=32) + return trainloader, testloader def train( - net: Net, - trainloader: torch.utils.data.DataLoader, - epochs: int, - device: torch.device, # pylint: disable=no-member + net: Net, + trainloader: torch.utils.data.DataLoader, + epochs: int, + device: torch.device, # pylint: disable=no-member ) -> None: """Train the network.""" # Define loss and optimizer @@ -87,7 +91,7 @@ def train( for epoch in range(epochs): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader, 0): - images, labels = data[0].to(device), data[1].to(device) + images, labels = data["img"].to(device), data["label"].to(device) # zero the parameter gradients optimizer.zero_grad() @@ -106,9 +110,9 @@ def train( def test( - net: Net, - testloader: torch.utils.data.DataLoader, - device: torch.device, # pylint: disable=no-member + net: Net, + testloader: torch.utils.data.DataLoader, + device: torch.device, # pylint: disable=no-member ) -> Tuple[float, float]: """Validate the network on the entire test set.""" # Define loss and metrics @@ -120,7 +124,7 @@ def test( net.eval() with torch.no_grad(): for data in testloader: - images, labels = data[0].to(device), data[1].to(device) + images, labels = data["img"].to(device), data["label"].to(device) outputs = net(images) loss += criterion(outputs, labels).item() _, predicted = torch.max(outputs.data, 1) # pylint: disable=no-member @@ -133,7 +137,7 @@ def main(): DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("Centralized PyTorch training") print("Load data") - trainloader, testloader, _ = load_data() + trainloader, testloader = load_data(0) net = Net().to(DEVICE) net.eval() print("Start training") diff --git a/examples/pytorch-from-centralized-to-federated/client.py b/examples/pytorch-from-centralized-to-federated/client.py index 88678e0569b7..df4da7c11cff 100644 --- a/examples/pytorch-from-centralized-to-federated/client.py +++ b/examples/pytorch-from-centralized-to-federated/client.py @@ -1,24 +1,22 @@ """Flower client example using PyTorch for CIFAR-10 image classification.""" - - -import os -import sys -import timeit +import argparse from collections import OrderedDict from typing import Dict, List, Tuple -import flwr as fl import numpy as np import torch -import torchvision +from datasets.utils.logging import disable_progress_bar +from torch.utils.data import DataLoader import cifar +import flwr as fl + +disable_progress_bar() + USE_FEDBN: bool = True -# pylint: disable=no-member -DEVICE: str = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") -# pylint: enable=no-member +DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Flower Client @@ -26,21 +24,20 @@ class CifarClient(fl.client.NumPyClient): """Flower client implementing CIFAR-10 image classification using PyTorch.""" def __init__( - self, - model: cifar.Net, - trainloader: torch.utils.data.DataLoader, - testloader: torch.utils.data.DataLoader, - num_examples: Dict, + self, + model: cifar.Net, + trainloader: DataLoader, + testloader: DataLoader, ) -> None: self.model = model self.trainloader = trainloader self.testloader = testloader - self.num_examples = num_examples def get_parameters(self, config: Dict[str, str]) -> List[np.ndarray]: self.model.train() if USE_FEDBN: - # Return model parameters as a list of NumPy ndarrays, excluding parameters of BN layers when using FedBN + # Return model parameters as a list of NumPy ndarrays, excluding + # parameters of BN layers when using FedBN return [ val.cpu().numpy() for name, val in self.model.state_dict().items() @@ -64,36 +61,39 @@ def set_parameters(self, parameters: List[np.ndarray]) -> None: self.model.load_state_dict(state_dict, strict=True) def fit( - self, parameters: List[np.ndarray], config: Dict[str, str] + self, parameters: List[np.ndarray], config: Dict[str, str] ) -> Tuple[List[np.ndarray], int, Dict]: # Set model parameters, train model, return updated model parameters self.set_parameters(parameters) cifar.train(self.model, self.trainloader, epochs=1, device=DEVICE) - return self.get_parameters(config={}), self.num_examples["trainset"], {} + return self.get_parameters(config={}), len(self.trainloader.dataset), {} def evaluate( - self, parameters: List[np.ndarray], config: Dict[str, str] + self, parameters: List[np.ndarray], config: Dict[str, str] ) -> Tuple[float, int, Dict]: # Set model parameters, evaluate model on local test dataset, return result self.set_parameters(parameters) loss, accuracy = cifar.test(self.model, self.testloader, device=DEVICE) - return float(loss), self.num_examples["testset"], {"accuracy": float(accuracy)} + return float(loss), len(self.testloader.dataset), {"accuracy": float(accuracy)} def main() -> None: """Load data, start CifarClient.""" + parser = argparse.ArgumentParser(description="Flower") + parser.add_argument("--node-id", type=int, required=True, choices=range(0, 10)) + args = parser.parse_args() # Load data - trainloader, testloader, num_examples = cifar.load_data() + trainloader, testloader = cifar.load_data(args.node_id) # Load model model = cifar.Net().to(DEVICE).train() # Perform a single forward pass to properly initialize BatchNorm - _ = model(next(iter(trainloader))[0].to(DEVICE)) + _ = model(next(iter(trainloader))["img"].to(DEVICE)) # Start client - client = CifarClient(model, trainloader, testloader, num_examples) + client = CifarClient(model, trainloader, testloader) fl.client.start_numpy_client(server_address="127.0.0.1:8080", client=client) diff --git a/examples/pytorch-from-centralized-to-federated/pyproject.toml b/examples/pytorch-from-centralized-to-federated/pyproject.toml index 73999a9e6cd4..6d6f138a0aea 100644 --- a/examples/pytorch-from-centralized-to-federated/pyproject.toml +++ b/examples/pytorch-from-centralized-to-federated/pyproject.toml @@ -11,5 +11,6 @@ authors = ["The Flower Authors "] [tool.poetry.dependencies] python = ">=3.8,<3.11" flwr = ">=1.0,<2.0" +flwr-datasets = { extras = ["vision"], version = ">=0.0.2,<1.0.0" } torch = "1.13.1" torchvision = "0.14.1" diff --git a/examples/pytorch-from-centralized-to-federated/requirements.txt b/examples/pytorch-from-centralized-to-federated/requirements.txt index f3caddbc875e..ba4afad9c288 100644 --- a/examples/pytorch-from-centralized-to-federated/requirements.txt +++ b/examples/pytorch-from-centralized-to-federated/requirements.txt @@ -1,3 +1,4 @@ flwr>=1.0, <2.0 +flwr-datasets[vision]>=0.0.2, <1.0.0 torch==1.13.1 torchvision==0.14.1 diff --git a/examples/pytorch-from-centralized-to-federated/run.sh b/examples/pytorch-from-centralized-to-federated/run.sh index c64f362086aa..1ed51dd787ac 100755 --- a/examples/pytorch-from-centralized-to-federated/run.sh +++ b/examples/pytorch-from-centralized-to-federated/run.sh @@ -4,9 +4,9 @@ echo "Starting server" python server.py & sleep 3 # Sleep for 3s to give the server enough time to start -for i in `seq 0 1`; do +for i in $(seq 0 1); do echo "Starting client $i" - python client.py & + python client.py --node-id $i & done # This will allow you to use CTRL+C to stop all background processes diff --git a/examples/pytorch-from-centralized-to-federated/server.py b/examples/pytorch-from-centralized-to-federated/server.py index 29cbce1884d1..42f34b3a78e9 100644 --- a/examples/pytorch-from-centralized-to-federated/server.py +++ b/examples/pytorch-from-centralized-to-federated/server.py @@ -1,10 +1,28 @@ """Flower server example.""" +from typing import List, Tuple + import flwr as fl +from flwr.common import Metrics + + +# Define metric aggregation function +def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: + # Multiply accuracy of each client by number of examples used + accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics] + examples = [num_examples for num_examples, _ in metrics] + + # Aggregate and return custom metric (weighted average) + return {"accuracy": sum(accuracies) / sum(examples)} + + +# Define strategy +strategy = fl.server.strategy.FedAvg(evaluate_metrics_aggregation_fn=weighted_average) -if __name__ == "__main__": - fl.server.start_server( - server_address="0.0.0.0:8080", - config=fl.server.ServerConfig(num_rounds=3), - ) +# Start Flower server +fl.server.start_server( + server_address="0.0.0.0:8080", + config=fl.server.ServerConfig(num_rounds=10), + strategy=strategy, +) From aded3e06c3190d5c8a736452a365672d351c613b Mon Sep 17 00:00:00 2001 From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com> Date: Wed, 20 Dec 2023 19:34:07 +0100 Subject: [PATCH 12/12] Add PyTorch integration tests with FDS (#2351) --- datasets/e2e/pytorch/pyproject.toml | 16 ++++ datasets/e2e/pytorch/pytorch_test.py | 131 +++++++++++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 datasets/e2e/pytorch/pyproject.toml create mode 100644 datasets/e2e/pytorch/pytorch_test.py diff --git a/datasets/e2e/pytorch/pyproject.toml b/datasets/e2e/pytorch/pyproject.toml new file mode 100644 index 000000000000..4565cce9f828 --- /dev/null +++ b/datasets/e2e/pytorch/pyproject.toml @@ -0,0 +1,16 @@ +[build-system] +requires = ["poetry-core>=1.4.0"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry] +name = "fds-e2e-pytorch" +version = "0.1.0" +description = "Flower Datasets with PyTorch" +authors = ["The Flower Authors "] + +[tool.poetry.dependencies] +python = "^3.8" +flwr-datasets = { path = "./../../", extras = ["vision"] } +torch = "^1.12.0" +torchvision = "^0.14.1" +parameterized = "==0.9.0" diff --git a/datasets/e2e/pytorch/pytorch_test.py b/datasets/e2e/pytorch/pytorch_test.py new file mode 100644 index 000000000000..5bac8f770f23 --- /dev/null +++ b/datasets/e2e/pytorch/pytorch_test.py @@ -0,0 +1,131 @@ +import unittest + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from datasets.utils.logging import disable_progress_bar +from parameterized import parameterized_class, parameterized +from torch import Tensor +from torch.utils.data import DataLoader +from torchvision.transforms import Compose, ToTensor, Normalize + +from flwr_datasets import FederatedDataset + + +class SimpleCNN(nn.Module): + def __init__(self): + super(SimpleCNN, self).__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + + def forward(self, x): + x = self.pool(F.relu(self.conv1(x))) + x = self.pool(F.relu(self.conv2(x))) + x = x.view(-1, 16 * 5 * 5) + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = self.fc3(x) + return x + + +# Using parameterized testing, two different sets of parameters are specified: +# 1. CIFAR10 dataset with the simple ToTensor transform. +# 2. CIFAR10 dataset with a composed transform that first converts an image to a tensor +# and then normalizes it. +@parameterized_class( + [ + {"dataset_name": "cifar10", "test_split": "test", "transforms": ToTensor()}, + {"dataset_name": "cifar10", "test_split": "test", "transforms": Compose( + [ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] + )}, + ] +) +class FdsToPyTorch(unittest.TestCase): + """Test the conversion from FDS to PyTorch Dataset and Dataloader.""" + + dataset_name = "" + test_split = "" + transforms = None + trainloader = None + expected_img_shape_after_transform = [3, 32, 32] + + @classmethod + def setUpClass(cls): + """Disable progress bar to keep the log clean. + """ + disable_progress_bar() + + def _create_trainloader(self, batch_size: int) -> DataLoader: + """Create a trainloader from the federated dataset.""" + partition_id = 0 + fds = FederatedDataset(dataset=self.dataset_name, partitioners={"train": 100}) + partition = fds.load_partition(partition_id, "train") + partition_train_test = partition.train_test_split(test_size=0.2) + partition_train_test = partition_train_test.map( + lambda img: {"img": self.transforms(img)}, input_columns="img" + ) + trainloader = DataLoader( + partition_train_test["train"].with_format("torch"), batch_size=batch_size, + shuffle=True + ) + return trainloader + + def test_create_partition_dataloader_with_transforms_shape(self) -> None: + """Test if the DataLoader returns batches with the expected shape.""" + batch_size = 16 + trainloader = self._create_trainloader(batch_size) + batch = next(iter(trainloader)) + images = batch["img"] + self.assertEqual(tuple(images.shape), + (batch_size, *self.expected_img_shape_after_transform)) + + def test_create_partition_dataloader_with_transforms_batch_type(self) -> None: + """Test if the DataLoader returns batches of type dictionary.""" + batch_size = 16 + trainloader = self._create_trainloader(batch_size) + batch = next(iter(trainloader)) + self.assertIsInstance(batch, dict) + + def test_create_partition_dataloader_with_transforms_data_type(self) -> None: + """Test to verify if the data in the DataLoader batches are of type Tensor.""" + batch_size = 16 + trainloader = self._create_trainloader(batch_size) + batch = next(iter(trainloader)) + images = batch["img"] + self.assertIsInstance(images, Tensor) + + @parameterized.expand([ + ("not_nan", torch.isnan), + ("not_inf", torch.isinf), + ]) + def test_train_model_loss_value(self, name, condition_func): + """Test if the model trains and if the loss is a correct number.""" + trainloader = self._create_trainloader(16) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + # Create the model, criterion, and optimizer + net = SimpleCNN().to(device) + criterion = nn.CrossEntropyLoss() + optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) + + # Training loop for one epoch + net.train() + loss = None + for i, data in enumerate(trainloader, 0): + inputs, labels = data['img'].to(device), data['label'].to(device) + optimizer.zero_grad() + outputs = net(inputs) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + self.assertFalse(condition_func(loss).item()) + + +if __name__ == '__main__': + unittest.main()