From dd5e317ac5070cd5a7f3118b3447e6dc96db808d Mon Sep 17 00:00:00 2001 From: William Lindskog <60471142+WilliamLindskog@users.noreply.github.com> Date: Tue, 3 Sep 2024 15:07:37 -0400 Subject: [PATCH 01/11] docs(framework) Update Flower Architecture documentation (#4131) Co-authored-by: William --- doc/source/contributor-explanation-architecture.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/contributor-explanation-architecture.rst b/doc/source/contributor-explanation-architecture.rst index a20a84313118..48b43cf3f2b8 100644 --- a/doc/source/contributor-explanation-architecture.rst +++ b/doc/source/contributor-explanation-architecture.rst @@ -1,6 +1,8 @@ Flower Architecture =================== +This document provides an overview of the Flower architecture. The architecture is designed to be modular and flexible, and can use two different types of engines: Deployment Engine and Simulation Engine. + Edge Client Engine ------------------ From 486cac3c5e68f717ad023e87e01bbb87c115c37d Mon Sep 17 00:00:00 2001 From: Robert Steiner Date: Thu, 5 Sep 2024 11:39:28 +0200 Subject: [PATCH 02/11] ci(framework:skip) Update docker/build-push-action to 6.7.0 (#4135) Signed-off-by: Robert Steiner --- .github/workflows/_docker-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_docker-build.yml b/.github/workflows/_docker-build.yml index ac88502b748a..1f36de550659 100644 --- a/.github/workflows/_docker-build.yml +++ b/.github/workflows/_docker-build.yml @@ -104,7 +104,7 @@ jobs: uses: Wandalen/wretry.action@6feedb7dedadeb826de0f45ff482b53b379a7844 # v3.5.0 id: build with: - action: docker/build-push-action@2cdde995de11925a030ce8070c3d77a52ffcf1c0 # v5.3.0 + action: docker/build-push-action@5cd11c3a4ced054e52742c5fd54dca954e0edd85 # v6.7.0 attempt_limit: 60 # 60 attempts * (9 secs delay + 1 sec retry) = ~10 mins attempt_delay: 9000 # 9 secs with: | From 43174b7201c6ea9571c3a73b0d86ebc055c9e2a6 Mon Sep 17 00:00:00 2001 From: Robert Steiner Date: Thu, 5 Sep 2024 13:27:52 +0200 Subject: [PATCH 03/11] ci(framework:skip) Replace QEMU with ARM runner (#4129) Signed-off-by: Robert Steiner --- .github/workflows/_docker-build.yml | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/.github/workflows/_docker-build.yml b/.github/workflows/_docker-build.yml index 1f36de550659..227b0d7482ae 100644 --- a/.github/workflows/_docker-build.yml +++ b/.github/workflows/_docker-build.yml @@ -36,7 +36,7 @@ permissions: jobs: build: name: Build image - runs-on: ubuntu-22.04 + runs-on: ${{ matrix.platform.runner-os }} timeout-minutes: 180 outputs: build-id: ${{ steps.build-id.outputs.id }} @@ -44,10 +44,8 @@ jobs: fail-fast: true matrix: platform: [ - # build-push action and qemu use different platform names - # therefore we create a map - { name: "amd64", qemu: "", docker: "linux/amd64" }, - { name: "arm64", qemu: "arm64", docker: "linux/arm64" }, + { name: "amd64", docker: "linux/amd64", runner-os: "ubuntu-22.04" }, + { name: "arm64", docker: "linux/arm64", runner-os: "ubuntu-4-core-arm64" }, ] steps: - name: Create build id @@ -79,12 +77,6 @@ jobs: print(build_args, file=fh) print("EOF", file=fh) - - name: Set up QEMU - if: matrix.platform.qemu != '' - uses: docker/setup-qemu-action@49b3bc8e6bdd4a60e6116a5414239cba5943d3cf # v3.2.0 - with: - platforms: ${{ matrix.platform.qemu }} - - name: Extract metadata (tags, labels) for Docker id: meta uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 # v5.5.1 From 3457573d5ea9e9de3e2c5d826cc079d8d6a707fd Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Thu, 5 Sep 2024 12:44:11 +0100 Subject: [PATCH 04/11] refactor(examples) Update xgboost quickstart example (#4088) Co-authored-by: jafermarq Co-authored-by: Taner Topal --- examples/xgboost-quickstart/README.md | 82 +++---- examples/xgboost-quickstart/client.py | 207 ------------------ examples/xgboost-quickstart/pyproject.toml | 46 +++- examples/xgboost-quickstart/run.sh | 17 -- examples/xgboost-quickstart/server.py | 48 ---- .../xgboost_quickstart/__init__.py | 1 + .../xgboost_quickstart/client_app.py | 139 ++++++++++++ .../xgboost_quickstart/server_app.py | 54 +++++ .../xgboost_quickstart/task.py | 71 ++++++ 9 files changed, 337 insertions(+), 328 deletions(-) delete mode 100644 examples/xgboost-quickstart/client.py delete mode 100755 examples/xgboost-quickstart/run.sh delete mode 100644 examples/xgboost-quickstart/server.py create mode 100644 examples/xgboost-quickstart/xgboost_quickstart/__init__.py create mode 100644 examples/xgboost-quickstart/xgboost_quickstart/client_app.py create mode 100644 examples/xgboost-quickstart/xgboost_quickstart/server_app.py create mode 100644 examples/xgboost-quickstart/xgboost_quickstart/task.py diff --git a/examples/xgboost-quickstart/README.md b/examples/xgboost-quickstart/README.md index fa3e9d0dc6fb..a7b047c090f0 100644 --- a/examples/xgboost-quickstart/README.md +++ b/examples/xgboost-quickstart/README.md @@ -4,7 +4,7 @@ dataset: [HIGGS] framework: [xgboost] --- -# Flower Example using XGBoost +# Federated Learning with XGBoost and Flower (Quickstart Example) This example demonstrates how to perform EXtreme Gradient Boosting (XGBoost) within Flower using `xgboost` package. We use [HIGGS](https://archive.ics.uci.edu/dataset/280/higgs) dataset for this example to perform a binary classification task. @@ -12,72 +12,60 @@ Tree-based with bagging method is used for aggregation on the server. This project provides a minimal code example to enable you to get started quickly. For a more comprehensive code example, take a look at [xgboost-comprehensive](https://github.com/adap/flower/tree/main/examples/xgboost-comprehensive). -## Project Setup +## Set up the project -Start by cloning the example project. We prepared a single-line command that you can copy into your shell which will checkout the example for you: +### Clone the project -```shell -git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/xgboost-quickstart . && rm -rf flower && cd xgboost-quickstart -``` - -This will create a new directory called `xgboost-quickstart` containing the following files: - -``` --- README.md <- Your're reading this right now --- server.py <- Defines the server-side logic --- client.py <- Defines the client-side logic --- run.sh <- Commands to run experiments --- pyproject.toml <- Example dependencies -``` - -### Installing Dependencies - -Project dependencies (such as `xgboost` and `flwr`) are defined in `pyproject.toml`. You can install the dependencies by invoking `pip`: +Start by cloning the example project: ```shell -# From a new python environment, run: -pip install . +git clone --depth=1 https://github.com/adap/flower.git _tmp \ + && mv _tmp/examples/xgboost-quickstart . \ + && rm -rf _tmp \ + && cd xgboost-quickstart ``` -Then, to verify that everything works correctly you can run the following command: +This will create a new directory called `xgboost-quickstart` with the following structure: ```shell -python3 -c "import flwr" +xgboost-quickstart +├── xgboost_quickstart +│ ├── __init__.py +│ ├── client_app.py # Defines your ClientApp +│ ├── server_app.py # Defines your ServerApp +│ └── task.py # Defines your utilities and data loading +├── pyproject.toml # Project metadata like dependencies and configs +└── README.md ``` -If you don't see any errors you're good to go! +### Install dependencies and project -## Run Federated Learning with XGBoost and Flower +Install the dependencies defined in `pyproject.toml` as well as the `xgboost_quickstart` package. -Afterwards you are ready to start the Flower server as well as the clients. -You can simply start the server in a terminal as follows: - -```shell -python3 server.py +```bash +pip install -e . ``` -Now you are ready to start the Flower clients which will participate in the learning. -To do so simply open two more terminal windows and run the following commands. +## Run the project -Start client 1 in the first terminal: +You can run your Flower project in both _simulation_ and _deployment_ mode without making changes to the code. If you are starting with Flower, we recommend you using the _simulation_ mode as it requires fewer components to be launched manually. By default, `flwr run` will make use of the Simulation Engine. -```shell -python3 client.py --partition-id=0 +### Run with the Simulation Engine + +```bash +flwr run . ``` -Start client 2 in the second terminal: +You can also override some of the settings for your `ClientApp` and `ServerApp` defined in `pyproject.toml`. For example: -```shell -python3 client.py --partition-id=1 +```bash +flwr run . --run-config "num-server-rounds=5 params.eta=0.05" ``` -You will see that XGBoost is starting a federated training. - -Alternatively, you can use `run.sh` to run the same experiment in a single terminal as follows: +> \[!TIP\] +> For a more detailed walk-through check our [quickstart XGBoost tutorial](https://flower.ai/docs/framework/tutorial-quickstart-xgboost.html) -```shell -poetry run ./run.sh -``` +### Run with the Deployment Engine -Look at the [code](https://github.com/adap/flower/tree/main/examples/xgboost-quickstart) -and [tutorial](https://flower.ai/docs/framework/tutorial-quickstart-xgboost.html) for a detailed explanation. +> \[!NOTE\] +> An update to this example will show how to run this Flower application with the Deployment Engine and TLS certificates, or with Docker. diff --git a/examples/xgboost-quickstart/client.py b/examples/xgboost-quickstart/client.py deleted file mode 100644 index d505a7ede785..000000000000 --- a/examples/xgboost-quickstart/client.py +++ /dev/null @@ -1,207 +0,0 @@ -import argparse -import warnings -from logging import INFO -from typing import Union - -import flwr as fl -import xgboost as xgb -from datasets import Dataset, DatasetDict -from flwr.common import ( - Code, - EvaluateIns, - EvaluateRes, - FitIns, - FitRes, - GetParametersIns, - GetParametersRes, - Parameters, - Status, -) -from flwr.common.logger import log -from flwr_datasets import FederatedDataset -from flwr_datasets.partitioner import IidPartitioner - -warnings.filterwarnings("ignore", category=UserWarning) - -# Define arguments parser for the client/partition ID. -parser = argparse.ArgumentParser() -parser.add_argument( - "--partition-id", - default=0, - type=int, - help="Partition ID used for the current client.", -) -args = parser.parse_args() - - -# Define data partitioning related functions -def train_test_split(partition: Dataset, test_fraction: float, seed: int): - """Split the data into train and validation set given split rate.""" - train_test = partition.train_test_split(test_size=test_fraction, seed=seed) - partition_train = train_test["train"] - partition_test = train_test["test"] - - num_train = len(partition_train) - num_test = len(partition_test) - - return partition_train, partition_test, num_train, num_test - - -def transform_dataset_to_dmatrix(data: Union[Dataset, DatasetDict]) -> xgb.core.DMatrix: - """Transform dataset to DMatrix format for xgboost.""" - x = data["inputs"] - y = data["label"] - new_data = xgb.DMatrix(x, label=y) - return new_data - - -# Load (HIGGS) dataset and conduct partitioning -# We use a small subset (num_partitions=30) of the dataset for demonstration to speed up the data loading process. -partitioner = IidPartitioner(num_partitions=30) -fds = FederatedDataset(dataset="jxie/higgs", partitioners={"train": partitioner}) - -# Load the partition for this `partition_id` -log(INFO, "Loading partition...") -partition = fds.load_partition(partition_id=args.partition_id, split="train") -partition.set_format("numpy") - -# Train/test splitting -train_data, valid_data, num_train, num_val = train_test_split( - partition, test_fraction=0.2, seed=42 -) - -# Reformat data to DMatrix for xgboost -log(INFO, "Reformatting data...") -train_dmatrix = transform_dataset_to_dmatrix(train_data) -valid_dmatrix = transform_dataset_to_dmatrix(valid_data) - -# Hyper-parameters for xgboost training -num_local_round = 1 -params = { - "objective": "binary:logistic", - "eta": 0.1, # Learning rate - "max_depth": 8, - "eval_metric": "auc", - "nthread": 16, - "num_parallel_tree": 1, - "subsample": 1, - "tree_method": "hist", -} - - -# Define Flower client -class XgbClient(fl.client.Client): - def __init__( - self, - train_dmatrix, - valid_dmatrix, - num_train, - num_val, - num_local_round, - params, - ): - self.train_dmatrix = train_dmatrix - self.valid_dmatrix = valid_dmatrix - self.num_train = num_train - self.num_val = num_val - self.num_local_round = num_local_round - self.params = params - - def get_parameters(self, ins: GetParametersIns) -> GetParametersRes: - _ = (self, ins) - return GetParametersRes( - status=Status( - code=Code.OK, - message="OK", - ), - parameters=Parameters(tensor_type="", tensors=[]), - ) - - def _local_boost(self, bst_input): - # Update trees based on local training data. - for i in range(self.num_local_round): - bst_input.update(self.train_dmatrix, bst_input.num_boosted_rounds()) - - # Bagging: extract the last N=num_local_round trees for sever aggregation - bst = bst_input[ - bst_input.num_boosted_rounds() - - self.num_local_round : bst_input.num_boosted_rounds() - ] - - return bst - - def fit(self, ins: FitIns) -> FitRes: - global_round = int(ins.config["global_round"]) - if global_round == 1: - # First round local training - bst = xgb.train( - self.params, - self.train_dmatrix, - num_boost_round=self.num_local_round, - evals=[(self.valid_dmatrix, "validate"), (self.train_dmatrix, "train")], - ) - else: - bst = xgb.Booster(params=self.params) - for item in ins.parameters.tensors: - global_model = bytearray(item) - - # Load global model into booster - bst.load_model(global_model) - - # Local training - bst = self._local_boost(bst) - - # Save model - local_model = bst.save_raw("json") - local_model_bytes = bytes(local_model) - - return FitRes( - status=Status( - code=Code.OK, - message="OK", - ), - parameters=Parameters(tensor_type="", tensors=[local_model_bytes]), - num_examples=self.num_train, - metrics={}, - ) - - def evaluate(self, ins: EvaluateIns) -> EvaluateRes: - # Load global model - bst = xgb.Booster(params=self.params) - for para in ins.parameters.tensors: - para_b = bytearray(para) - bst.load_model(para_b) - - # Run evaluation - eval_results = bst.eval_set( - evals=[(self.valid_dmatrix, "valid")], - iteration=bst.num_boosted_rounds() - 1, - ) - auc = round(float(eval_results.split("\t")[1].split(":")[1]), 4) - - global_round = ins.config["global_round"] - log(INFO, f"AUC = {auc} at round {global_round}") - - return EvaluateRes( - status=Status( - code=Code.OK, - message="OK", - ), - loss=0.0, - num_examples=self.num_val, - metrics={"AUC": auc}, - ) - - -# Start Flower client -fl.client.start_client( - server_address="127.0.0.1:8080", - client=XgbClient( - train_dmatrix, - valid_dmatrix, - num_train, - num_val, - num_local_round, - params, - ).to_client(), -) diff --git a/examples/xgboost-quickstart/pyproject.toml b/examples/xgboost-quickstart/pyproject.toml index f1e451fe779a..da3561bfded4 100644 --- a/examples/xgboost-quickstart/pyproject.toml +++ b/examples/xgboost-quickstart/pyproject.toml @@ -3,17 +3,45 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "quickstart-xgboost" -version = "0.1.0" -description = "XGBoost Federated Learning Quickstart with Flower" -authors = [ - { name = "The Flower Authors", email = "hello@flower.ai" }, -] +name = "xgboost_quickstart" +version = "1.0.0" +description = "Federated Learning with XGBoost and Flower (Quickstart Example)" +license = "Apache-2.0" dependencies = [ - "flwr>=1.8.0,<2.0", - "flwr-datasets>=0.1.0,<1.0.0", - "xgboost>=2.0.0,<3.0.0", + "flwr-nightly[simulation]==1.11.0.dev20240826", + "flwr-datasets>=0.3.0", + "xgboost>=2.0.0", ] [tool.hatch.build.targets.wheel] packages = ["."] + +[tool.flwr.app] +publisher = "flwrlabs" + +[tool.flwr.app.components] +serverapp = "xgboost_quickstart.server_app:app" +clientapp = "xgboost_quickstart.client_app:app" + +[tool.flwr.app.config] +# ServerApp +num-server-rounds = 3 +fraction-fit = 0.1 +fraction-evaluate = 0.1 + +# ClientApp +local-epochs = 1 +params.objective = "binary:logistic" +params.eta = 0.1 # Learning rate +params.max-depth = 8 +params.eval-metric = "auc" +params.nthread = 16 +params.num-parallel-tree = 1 +params.subsample = 1 +params.tree-method = "hist" + +[tool.flwr.federations] +default = "local-simulation" + +[tool.flwr.federations.local-simulation] +options.num-supernodes = 20 diff --git a/examples/xgboost-quickstart/run.sh b/examples/xgboost-quickstart/run.sh deleted file mode 100755 index b35af58222ab..000000000000 --- a/examples/xgboost-quickstart/run.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -set -e -cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/ - -echo "Starting server" -python server.py & -sleep 5 # Sleep for 5s to give the server enough time to start - -for i in `seq 0 1`; do - echo "Starting client $i" - python3 client.py --partition-id=$i & -done - -# Enable CTRL+C to stop all background processes -trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM -# Wait for all background processes to complete -wait diff --git a/examples/xgboost-quickstart/server.py b/examples/xgboost-quickstart/server.py deleted file mode 100644 index 2246d32686a4..000000000000 --- a/examples/xgboost-quickstart/server.py +++ /dev/null @@ -1,48 +0,0 @@ -from typing import Dict - -import flwr as fl -from flwr.server.strategy import FedXgbBagging - -# FL experimental settings -pool_size = 2 -num_rounds = 5 -num_clients_per_round = 2 -num_evaluate_clients = 2 - - -def evaluate_metrics_aggregation(eval_metrics): - """Return an aggregated metric (AUC) for evaluation.""" - total_num = sum([num for num, _ in eval_metrics]) - auc_aggregated = ( - sum([metrics["AUC"] * num for num, metrics in eval_metrics]) / total_num - ) - metrics_aggregated = {"AUC": auc_aggregated} - return metrics_aggregated - - -def config_func(rnd: int) -> Dict[str, str]: - """Return a configuration with global epochs.""" - config = { - "global_round": str(rnd), - } - return config - - -# Define strategy -strategy = FedXgbBagging( - fraction_fit=(float(num_clients_per_round) / pool_size), - min_fit_clients=num_clients_per_round, - min_available_clients=pool_size, - min_evaluate_clients=num_evaluate_clients, - fraction_evaluate=1.0, - evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation, - on_evaluate_config_fn=config_func, - on_fit_config_fn=config_func, -) - -# Start Flower server -fl.server.start_server( - server_address="0.0.0.0:8080", - config=fl.server.ServerConfig(num_rounds=num_rounds), - strategy=strategy, -) diff --git a/examples/xgboost-quickstart/xgboost_quickstart/__init__.py b/examples/xgboost-quickstart/xgboost_quickstart/__init__.py new file mode 100644 index 000000000000..470360b377a6 --- /dev/null +++ b/examples/xgboost-quickstart/xgboost_quickstart/__init__.py @@ -0,0 +1 @@ +"""xgboost_quickstart: A Flower / XGBoost app.""" diff --git a/examples/xgboost-quickstart/xgboost_quickstart/client_app.py b/examples/xgboost-quickstart/xgboost_quickstart/client_app.py new file mode 100644 index 000000000000..3aa199a10274 --- /dev/null +++ b/examples/xgboost-quickstart/xgboost_quickstart/client_app.py @@ -0,0 +1,139 @@ +"""xgboost_quickstart: A Flower / XGBoost app.""" + +import warnings + +from flwr.common.context import Context + +import xgboost as xgb +from flwr.client import Client, ClientApp +from flwr.common.config import unflatten_dict +from flwr.common import ( + Code, + EvaluateIns, + EvaluateRes, + FitIns, + FitRes, + Parameters, + Status, +) + +from xgboost_quickstart.task import load_data, replace_keys + +warnings.filterwarnings("ignore", category=UserWarning) + + +# Define Flower Client and client_fn +class FlowerClient(Client): + def __init__( + self, + train_dmatrix, + valid_dmatrix, + num_train, + num_val, + num_local_round, + params, + ): + self.train_dmatrix = train_dmatrix + self.valid_dmatrix = valid_dmatrix + self.num_train = num_train + self.num_val = num_val + self.num_local_round = num_local_round + self.params = params + + def _local_boost(self, bst_input): + # Update trees based on local training data. + for i in range(self.num_local_round): + bst_input.update(self.train_dmatrix, bst_input.num_boosted_rounds()) + + # Bagging: extract the last N=num_local_round trees for sever aggregation + bst = bst_input[ + bst_input.num_boosted_rounds() + - self.num_local_round : bst_input.num_boosted_rounds() + ] + + return bst + + def fit(self, ins: FitIns) -> FitRes: + global_round = int(ins.config["global_round"]) + if global_round == 1: + # First round local training + bst = xgb.train( + self.params, + self.train_dmatrix, + num_boost_round=self.num_local_round, + evals=[(self.valid_dmatrix, "validate"), (self.train_dmatrix, "train")], + ) + else: + bst = xgb.Booster(params=self.params) + global_model = bytearray(ins.parameters.tensors[0]) + + # Load global model into booster + bst.load_model(global_model) + + # Local training + bst = self._local_boost(bst) + + # Save model + local_model = bst.save_raw("json") + local_model_bytes = bytes(local_model) + + return FitRes( + status=Status( + code=Code.OK, + message="OK", + ), + parameters=Parameters(tensor_type="", tensors=[local_model_bytes]), + num_examples=self.num_train, + metrics={}, + ) + + def evaluate(self, ins: EvaluateIns) -> EvaluateRes: + # Load global model + bst = xgb.Booster(params=self.params) + para_b = bytearray(ins.parameters.tensors[0]) + bst.load_model(para_b) + + # Run evaluation + eval_results = bst.eval_set( + evals=[(self.valid_dmatrix, "valid")], + iteration=bst.num_boosted_rounds() - 1, + ) + auc = round(float(eval_results.split("\t")[1].split(":")[1]), 4) + + return EvaluateRes( + status=Status( + code=Code.OK, + message="OK", + ), + loss=0.0, + num_examples=self.num_val, + metrics={"AUC": auc}, + ) + + +def client_fn(context: Context): + # Load model and data + partition_id = context.node_config["partition-id"] + num_partitions = context.node_config["num-partitions"] + train_dmatrix, valid_dmatrix, num_train, num_val = load_data( + partition_id, num_partitions + ) + + cfg = replace_keys(unflatten_dict(context.run_config)) + num_local_round = cfg["local_epochs"] + + # Return Client instance + return FlowerClient( + train_dmatrix, + valid_dmatrix, + num_train, + num_val, + num_local_round, + cfg["params"], + ) + + +# Flower ClientApp +app = ClientApp( + client_fn, +) diff --git a/examples/xgboost-quickstart/xgboost_quickstart/server_app.py b/examples/xgboost-quickstart/xgboost_quickstart/server_app.py new file mode 100644 index 000000000000..6b81c6caa785 --- /dev/null +++ b/examples/xgboost-quickstart/xgboost_quickstart/server_app.py @@ -0,0 +1,54 @@ +"""xgboost_quickstart: A Flower / XGBoost app.""" + +from typing import Dict + +from flwr.common import Context, Parameters +from flwr.server import ServerApp, ServerAppComponents, ServerConfig +from flwr.server.strategy import FedXgbBagging + + +def evaluate_metrics_aggregation(eval_metrics): + """Return an aggregated metric (AUC) for evaluation.""" + total_num = sum([num for num, _ in eval_metrics]) + auc_aggregated = ( + sum([metrics["AUC"] * num for num, metrics in eval_metrics]) / total_num + ) + metrics_aggregated = {"AUC": auc_aggregated} + return metrics_aggregated + + +def config_func(rnd: int) -> Dict[str, str]: + """Return a configuration with global epochs.""" + config = { + "global_round": str(rnd), + } + return config + + +def server_fn(context: Context): + # Read from config + num_rounds = context.run_config["num-server-rounds"] + fraction_fit = context.run_config["fraction-fit"] + fraction_evaluate = context.run_config["fraction-evaluate"] + + # Init an empty Parameter + parameters = Parameters(tensor_type="", tensors=[]) + + # Define strategy + strategy = FedXgbBagging( + fraction_fit=fraction_fit, + fraction_evaluate=fraction_evaluate, + evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation, + on_evaluate_config_fn=config_func, + on_fit_config_fn=config_func, + initial_parameters=parameters, + ) + config = ServerConfig(num_rounds=num_rounds) + + return ServerAppComponents(strategy=strategy, config=config) + + +# Create ServerApp +app = ServerApp( + server_fn=server_fn, +) diff --git a/examples/xgboost-quickstart/xgboost_quickstart/task.py b/examples/xgboost-quickstart/xgboost_quickstart/task.py new file mode 100644 index 000000000000..09916d9ac04a --- /dev/null +++ b/examples/xgboost-quickstart/xgboost_quickstart/task.py @@ -0,0 +1,71 @@ +"""xgboost_quickstart: A Flower / XGBoost app.""" + +from logging import INFO + +import xgboost as xgb +from flwr.common import log +from flwr_datasets import FederatedDataset +from flwr_datasets.partitioner import IidPartitioner + + +def train_test_split(partition, test_fraction, seed): + """Split the data into train and validation set given split rate.""" + train_test = partition.train_test_split(test_size=test_fraction, seed=seed) + partition_train = train_test["train"] + partition_test = train_test["test"] + + num_train = len(partition_train) + num_test = len(partition_test) + + return partition_train, partition_test, num_train, num_test + + +def transform_dataset_to_dmatrix(data): + """Transform dataset to DMatrix format for xgboost.""" + x = data["inputs"] + y = data["label"] + new_data = xgb.DMatrix(x, label=y) + return new_data + + +fds = None # Cache FederatedDataset + + +def load_data(partition_id, num_clients): + """Load partition HIGGS data.""" + # Only initialize `FederatedDataset` once + global fds + if fds is None: + partitioner = IidPartitioner(num_partitions=num_clients) + fds = FederatedDataset( + dataset="jxie/higgs", + partitioners={"train": partitioner}, + ) + + # Load the partition for this `partition_id` + partition = fds.load_partition(partition_id, split="train") + partition.set_format("numpy") + + # Train/test splitting + train_data, valid_data, num_train, num_val = train_test_split( + partition, test_fraction=0.2, seed=42 + ) + + # Reformat data to DMatrix for xgboost + log(INFO, "Reformatting data...") + train_dmatrix = transform_dataset_to_dmatrix(train_data) + valid_dmatrix = transform_dataset_to_dmatrix(valid_data) + + return train_dmatrix, valid_dmatrix, num_train, num_val + + +def replace_keys(input_dict, match="-", target="_"): + """Recursively replace match string with target string in dictionary keys.""" + new_dict = {} + for key, value in input_dict.items(): + new_key = key.replace(match, target) + if isinstance(value, dict): + new_dict[new_key] = replace_keys(value, match, target) + else: + new_dict[new_key] = value + return new_dict From 1187c707f1894924bfa693d99611cf6f93431835 Mon Sep 17 00:00:00 2001 From: Robert Steiner Date: Thu, 5 Sep 2024 17:01:54 +0200 Subject: [PATCH 05/11] fix(framework:skip) Determine flwr version by tag instead of poetry (#4141) Signed-off-by: Robert Steiner --- .github/workflows/framework-release.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/framework-release.yml b/.github/workflows/framework-release.yml index 812d5b1e398e..e608329872de 100644 --- a/.github/workflows/framework-release.yml +++ b/.github/workflows/framework-release.yml @@ -16,6 +16,8 @@ jobs: if: ${{ github.repository == 'adap/flower' }} name: Publish release runs-on: ubuntu-22.04 + outputs: + flwr-version: ${{ steps.publish.outputs.flwr-version }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -26,10 +28,12 @@ jobs: uses: ./.github/actions/bootstrap - name: Get artifacts and publish + id: publish env: GITHUB_REF: ${{ github.ref }} run: | TAG_NAME=$(echo "${GITHUB_REF_NAME}" | cut -c2-) + echo "flwr-version=$TAG_NAME" >> "$GITHUB_OUTPUT" wheel_name="flwr-${TAG_NAME}-py3-none-any.whl" tar_name="flwr-${TAG_NAME}.tar.gz" @@ -67,8 +71,7 @@ jobs: - id: matrix run: | - FLWR_VERSION=$(poetry version -s) - python dev/build-docker-image-matrix.py --flwr-version "${FLWR_VERSION}" > matrix.json + python dev/build-docker-image-matrix.py --flwr-version "${{ needs.publish.outputs.flwr-version }}" > matrix.json echo "matrix=$(cat matrix.json)" >> $GITHUB_OUTPUT build-base-images: From f290fe2d49e12a871c33c0ce465ea1d021a4e863 Mon Sep 17 00:00:00 2001 From: Mohammad Naseri Date: Thu, 5 Sep 2024 22:31:08 +0100 Subject: [PATCH 06/11] refactor(examples) Update fl-dp-sa example (#4137) Co-authored-by: jafermarq --- examples/fl-dp-sa/README.md | 61 +++++++++++++++---- examples/fl-dp-sa/fl_dp_sa/__init__.py | 2 +- examples/fl-dp-sa/fl_dp_sa/client.py | 42 ------------- examples/fl-dp-sa/fl_dp_sa/client_app.py | 50 +++++++++++++++ .../fl_dp_sa/{server.py => server_app.py} | 59 ++++++++++-------- examples/fl-dp-sa/fl_dp_sa/task.py | 39 ++++++------ examples/fl-dp-sa/flower.toml | 13 ---- examples/fl-dp-sa/pyproject.toml | 49 ++++++++++----- examples/fl-dp-sa/requirements.txt | 4 -- 9 files changed, 188 insertions(+), 131 deletions(-) delete mode 100644 examples/fl-dp-sa/fl_dp_sa/client.py create mode 100644 examples/fl-dp-sa/fl_dp_sa/client_app.py rename examples/fl-dp-sa/fl_dp_sa/{server.py => server_app.py} (56%) delete mode 100644 examples/fl-dp-sa/flower.toml delete mode 100644 examples/fl-dp-sa/requirements.txt diff --git a/examples/fl-dp-sa/README.md b/examples/fl-dp-sa/README.md index 65c8a5b18fa8..61a6c80f3556 100644 --- a/examples/fl-dp-sa/README.md +++ b/examples/fl-dp-sa/README.md @@ -1,28 +1,63 @@ --- -tags: [basic, vision, fds] +tags: [DP, SecAgg, vision, fds] dataset: [MNIST] framework: [torch, torchvision] --- -# Example of Flower App with DP and SA +# Flower Example on MNIST with Differential Privacy and Secure Aggregation -This is a simple example that utilizes central differential privacy with client-side fixed clipping and secure aggregation. -Note: This example is designed for a small number of rounds and is intended for demonstration purposes. +This example demonstrates a federated learning setup using the Flower, incorporating central differential privacy (DP) with client-side fixed clipping and secure aggregation (SA). It is intended for a small number of rounds for demonstration purposes. -## Install dependencies +This example is similar to the [quickstart-pytorch example](https://github.com/adap/flower/tree/main/examples/quickstart-pytorch) and extends it by integrating central differential privacy and secure aggregation. For more details on differential privacy and secure aggregation in Flower, please refer to the documentation [here](https://flower.ai/docs/framework/how-to-use-differential-privacy.html) and [here](https://flower.ai/docs/framework/contributor-ref-secure-aggregation-protocols.html). -```bash -# Using pip -pip install . +## Set up the project + +### Clone the project + +Start by cloning the example project: + +```shell +git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/fl-dp-sa . && rm -rf flower && cd fl-dp-sa +``` + +This will create a new directory called `fl-dp-sa` containing the following files: -# Or using Poetry -poetry install +```shell +fl-dp-sa +├── fl_dp_sa +│ ├── client_app.py # Defines your ClientApp +│ ├── server_app.py # Defines your ServerApp +│ └── task.py # Defines your model, training, and data loading +├── pyproject.toml # Project metadata like dependencies and configs +└── README.md ``` -## Run +### Install dependencies and project -The example uses the MNIST dataset with a total of 100 clients, with 20 clients sampled in each round. The hyperparameters for DP and SecAgg are specified in `server.py`. +Install the dependencies defined in `pyproject.toml` as well as the `fl_dp_sa` package. ```shell -flower-simulation --server-app fl_dp_sa.server:app --client-app fl_dp_sa.client:app --num-supernodes 100 +# From a new python environment, run: +pip install -e . +``` + +## Run the project + +You can run your Flower project in both _simulation_ and _deployment_ mode without making changes to the code. If you are starting with Flower, we recommend you using the _simulation_ mode as it requires fewer components to be launched manually. By default, `flwr run` will make use of the Simulation Engine. + +### Run with the Simulation Engine + +```bash +flwr run . +``` + +You can also override some of the settings for your `ClientApp` and `ServerApp` defined in `pyproject.toml`. For example: + +```bash +flwr run . --run-config "noise-multiplier=0.1 clipping-norm=5" ``` + +### Run with the Deployment Engine + +> \[!NOTE\] +> An update to this example will show how to run this Flower project with the Deployment Engine and TLS certificates, or with Docker. diff --git a/examples/fl-dp-sa/fl_dp_sa/__init__.py b/examples/fl-dp-sa/fl_dp_sa/__init__.py index 741260348ab8..c5c9a7e9581c 100644 --- a/examples/fl-dp-sa/fl_dp_sa/__init__.py +++ b/examples/fl-dp-sa/fl_dp_sa/__init__.py @@ -1 +1 @@ -"""fl_dp_sa: A Flower / PyTorch app.""" +"""fl_dp_sa: Flower Example using Differential Privacy and Secure Aggregation.""" diff --git a/examples/fl-dp-sa/fl_dp_sa/client.py b/examples/fl-dp-sa/fl_dp_sa/client.py deleted file mode 100644 index b3b02c6e9d61..000000000000 --- a/examples/fl-dp-sa/fl_dp_sa/client.py +++ /dev/null @@ -1,42 +0,0 @@ -"""fl_dp_sa: A Flower / PyTorch app.""" - -from flwr.client import ClientApp, NumPyClient -from flwr.client.mod import fixedclipping_mod, secaggplus_mod - -from fl_dp_sa.task import DEVICE, Net, get_weights, load_data, set_weights, test, train - -# Load model and data (simple CNN, CIFAR-10) -net = Net().to(DEVICE) - - -# Define FlowerClient and client_fn -class FlowerClient(NumPyClient): - def __init__(self, trainloader, testloader) -> None: - self.trainloader = trainloader - self.testloader = testloader - - def fit(self, parameters, config): - set_weights(net, parameters) - results = train(net, self.trainloader, self.testloader, epochs=1, device=DEVICE) - return get_weights(net), len(self.trainloader.dataset), results - - def evaluate(self, parameters, config): - set_weights(net, parameters) - loss, accuracy = test(net, self.testloader) - return loss, len(self.testloader.dataset), {"accuracy": accuracy} - - -def client_fn(cid: str): - """Create and return an instance of Flower `Client`.""" - trainloader, testloader = load_data(partition_id=int(cid)) - return FlowerClient(trainloader, testloader).to_client() - - -# Flower ClientApp -app = ClientApp( - client_fn=client_fn, - mods=[ - secaggplus_mod, - fixedclipping_mod, - ], -) diff --git a/examples/fl-dp-sa/fl_dp_sa/client_app.py b/examples/fl-dp-sa/fl_dp_sa/client_app.py new file mode 100644 index 000000000000..5630d4f4d14f --- /dev/null +++ b/examples/fl-dp-sa/fl_dp_sa/client_app.py @@ -0,0 +1,50 @@ +"""fl_dp_sa: Flower Example using Differential Privacy and Secure Aggregation.""" + +import torch +from flwr.client import ClientApp, NumPyClient +from flwr.common import Context +from flwr.client.mod import fixedclipping_mod, secaggplus_mod + +from fl_dp_sa.task import Net, get_weights, load_data, set_weights, test, train + + +class FlowerClient(NumPyClient): + def __init__(self, trainloader, testloader) -> None: + self.net = Net() + self.trainloader = trainloader + self.testloader = testloader + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + def fit(self, parameters, config): + set_weights(self.net, parameters) + results = train( + self.net, + self.trainloader, + self.testloader, + epochs=1, + device=self.device, + ) + return get_weights(self.net), len(self.trainloader.dataset), results + + def evaluate(self, parameters, config): + set_weights(self.net, parameters) + loss, accuracy = test(self.net, self.testloader, self.device) + return loss, len(self.testloader.dataset), {"accuracy": accuracy} + + +def client_fn(context: Context): + partition_id = context.node_config["partition-id"] + trainloader, testloader = load_data( + partition_id=partition_id, num_partitions=context.node_config["num-partitions"] + ) + return FlowerClient(trainloader, testloader).to_client() + + +# Flower ClientApp +app = ClientApp( + client_fn=client_fn, + mods=[ + secaggplus_mod, + fixedclipping_mod, + ], +) diff --git a/examples/fl-dp-sa/fl_dp_sa/server.py b/examples/fl-dp-sa/fl_dp_sa/server_app.py similarity index 56% rename from examples/fl-dp-sa/fl_dp_sa/server.py rename to examples/fl-dp-sa/fl_dp_sa/server_app.py index 3ec0ba757b0d..1704b4942ff8 100644 --- a/examples/fl-dp-sa/fl_dp_sa/server.py +++ b/examples/fl-dp-sa/fl_dp_sa/server_app.py @@ -1,20 +1,22 @@ -"""fl_dp_sa: A Flower / PyTorch app.""" +"""fl_dp_sa: Flower Example using Differential Privacy and Secure Aggregation.""" from typing import List, Tuple from flwr.common import Context, Metrics, ndarrays_to_parameters -from flwr.server import Driver, LegacyContext, ServerApp, ServerConfig +from flwr.server import ( + Driver, + LegacyContext, + ServerApp, + ServerConfig, +) from flwr.server.strategy import DifferentialPrivacyClientSideFixedClipping, FedAvg from flwr.server.workflow import DefaultWorkflow, SecAggPlusWorkflow from fl_dp_sa.task import Net, get_weights -# Define metric aggregation function def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: examples = [num_examples for num_examples, _ in metrics] - - # Multiply accuracy of each client by number of examples used train_losses = [num_examples * m["train_loss"] for num_examples, m in metrics] train_accuracies = [ num_examples * m["train_accuracy"] for num_examples, m in metrics @@ -22,7 +24,6 @@ def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: val_losses = [num_examples * m["val_loss"] for num_examples, m in metrics] val_accuracies = [num_examples * m["val_accuracy"] for num_examples, m in metrics] - # Aggregate and return custom metric (weighted average) return { "train_loss": sum(train_losses) / sum(examples), "train_accuracy": sum(train_accuracies) / sum(examples), @@ -31,30 +32,36 @@ def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: } -# Initialize model parameters -ndarrays = get_weights(Net()) -parameters = ndarrays_to_parameters(ndarrays) +app = ServerApp() -# Define strategy -strategy = FedAvg( - fraction_fit=0.2, - fraction_evaluate=0.0, # Disable evaluation for demo purpose - min_fit_clients=20, - min_available_clients=20, - fit_metrics_aggregation_fn=weighted_average, - initial_parameters=parameters, -) -strategy = DifferentialPrivacyClientSideFixedClipping( - strategy, noise_multiplier=0.2, clipping_norm=10, num_sampled_clients=20 -) +@app.main() +def main(driver: Driver, context: Context) -> None: + # Initialize global model + model_weights = get_weights(Net()) + parameters = ndarrays_to_parameters(model_weights) + + # Note: The fraction_fit value is configured based on the DP hyperparameter `num-sampled-clients`. + strategy = FedAvg( + fraction_fit=0.2, + fraction_evaluate=0.0, + min_fit_clients=20, + fit_metrics_aggregation_fn=weighted_average, + initial_parameters=parameters, + ) -app = ServerApp() + noise_multiplier = context.run_config["noise-multiplier"] + clipping_norm = context.run_config["clipping-norm"] + num_sampled_clients = context.run_config["num-sampled-clients"] + strategy = DifferentialPrivacyClientSideFixedClipping( + strategy, + noise_multiplier=noise_multiplier, + clipping_norm=clipping_norm, + num_sampled_clients=num_sampled_clients, + ) -@app.main() -def main(driver: Driver, context: Context) -> None: # Construct the LegacyContext context = LegacyContext( context=context, @@ -65,8 +72,8 @@ def main(driver: Driver, context: Context) -> None: # Create the train/evaluate workflow workflow = DefaultWorkflow( fit_workflow=SecAggPlusWorkflow( - num_shares=7, - reconstruction_threshold=4, + num_shares=context.run_config["num-shares"], + reconstruction_threshold=context.run_config["reconstruction-threshold"], ) ) diff --git a/examples/fl-dp-sa/fl_dp_sa/task.py b/examples/fl-dp-sa/fl_dp_sa/task.py index 5b4fd7dee592..c145cebe1378 100644 --- a/examples/fl-dp-sa/fl_dp_sa/task.py +++ b/examples/fl-dp-sa/fl_dp_sa/task.py @@ -1,24 +1,22 @@ -"""fl_dp_sa: A Flower / PyTorch app.""" +"""fl_dp_sa: Flower Example using Differential Privacy and Secure Aggregation.""" from collections import OrderedDict -from logging import INFO import torch import torch.nn as nn import torch.nn.functional as F -from flwr.common.logger import log from flwr_datasets import FederatedDataset +from flwr_datasets.partitioner import IidPartitioner from torch.utils.data import DataLoader from torchvision.transforms import Compose, Normalize, ToTensor -DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") +fds = None # Cache FederatedDataset -class Net(nn.Module): - """Model.""" +class Net(nn.Module): def __init__(self) -> None: - super(Net, self).__init__() + super().__init__() self.conv1 = nn.Conv2d(1, 6, 3, padding=1) self.pool = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(6, 16, 5) @@ -36,9 +34,16 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return self.fc3(x) -def load_data(partition_id): +def load_data(partition_id: int, num_partitions: int): """Load partition MNIST data.""" - fds = FederatedDataset(dataset="mnist", partitioners={"train": 100}) + + global fds + if fds is None: + partitioner = IidPartitioner(num_partitions=num_partitions) + fds = FederatedDataset( + dataset="ylecun/mnist", + partitioners={"train": partitioner}, + ) partition = fds.load_partition(partition_id) # Divide data on each node: 80% train, 20% test partition_train_test = partition.train_test_split(test_size=0.2, seed=42) @@ -70,8 +75,8 @@ def train(net, trainloader, valloader, epochs, device): loss.backward() optimizer.step() - train_loss, train_acc = test(net, trainloader) - val_loss, val_acc = test(net, valloader) + train_loss, train_acc = test(net, trainloader, device) + val_loss, val_acc = test(net, valloader, device) results = { "train_loss": train_loss, @@ -82,17 +87,17 @@ def train(net, trainloader, valloader, epochs, device): return results -def test(net, testloader): +def test(net, testloader, device): """Validate the model on the test set.""" - net.to(DEVICE) + net.to(device) criterion = torch.nn.CrossEntropyLoss() correct, loss = 0, 0.0 with torch.no_grad(): for batch in testloader: - images = batch["image"].to(DEVICE) - labels = batch["label"].to(DEVICE) - outputs = net(images.to(DEVICE)) - labels = labels.to(DEVICE) + images = batch["image"].to(device) + labels = batch["label"].to(device) + outputs = net(images.to(device)) + labels = labels.to(device) loss += criterion(outputs, labels).item() correct += (torch.max(outputs.data, 1)[1] == labels).sum().item() accuracy = correct / len(testloader.dataset) diff --git a/examples/fl-dp-sa/flower.toml b/examples/fl-dp-sa/flower.toml deleted file mode 100644 index ea2e98206791..000000000000 --- a/examples/fl-dp-sa/flower.toml +++ /dev/null @@ -1,13 +0,0 @@ -[project] -name = "fl_dp_sa" -version = "1.0.0" -description = "" -license = "Apache-2.0" -authors = [ - "The Flower Authors ", -] -readme = "README.md" - -[flower.components] -serverapp = "fl_dp_sa.server:app" -clientapp = "fl_dp_sa.client:app" diff --git a/examples/fl-dp-sa/pyproject.toml b/examples/fl-dp-sa/pyproject.toml index 1ca343b072d9..fbb463cc1c05 100644 --- a/examples/fl-dp-sa/pyproject.toml +++ b/examples/fl-dp-sa/pyproject.toml @@ -1,21 +1,40 @@ [build-system] -requires = ["poetry-core>=1.4.0"] -build-backend = "poetry.core.masonry.api" +requires = ["hatchling"] +build-backend = "hatchling.build" -[tool.poetry] +[project] name = "fl-dp-sa" -version = "0.1.0" -description = "" +version = "1.0.0" +description = "Central Differential Privacy and Secure Aggregation in Flower" license = "Apache-2.0" -authors = [ - "The Flower Authors ", +dependencies = [ + "flwr[simulation]>=1.11.0", + "flwr-datasets[vision]>=0.3.0", + "torch==2.2.1", + "torchvision==0.17.1", ] -readme = "README.md" -[tool.poetry.dependencies] -python = "^3.9" -# Mandatory dependencies -flwr = { version = "^1.8.0", extras = ["simulation"] } -flwr-datasets = { version = "0.0.2", extras = ["vision"] } -torch = "2.2.1" -torchvision = "0.17.1" +[tool.hatch.build.targets.wheel] +packages = ["."] + +[tool.flwr.app] +publisher = "flwrlabs" + +[tool.flwr.app.components] +serverapp = "fl_dp_sa.server_app:app" +clientapp = "fl_dp_sa.client_app:app" + +[tool.flwr.app.config] +# Parameters for the DP +noise-multiplier = 0.2 +clipping-norm = 10 +num-sampled-clients = 20 +# Parameters for the SecAgg+ protocol +num-shares = 7 +reconstruction-threshold = 4 + +[tool.flwr.federations] +default = "local-simulation" + +[tool.flwr.federations.local-simulation] +options.num-supernodes = 100 \ No newline at end of file diff --git a/examples/fl-dp-sa/requirements.txt b/examples/fl-dp-sa/requirements.txt deleted file mode 100644 index f20b9d71e339..000000000000 --- a/examples/fl-dp-sa/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -flwr[simulation]>=1.8.0 -flwr-datasets[vision]==0.0.2 -torch==2.2.1 -torchvision==0.17.1 From 4bdcf612daa8a86092a7a3f4a683694b6347924f Mon Sep 17 00:00:00 2001 From: Robert Steiner Date: Fri, 6 Sep 2024 11:33:12 +0200 Subject: [PATCH 07/11] feat(framework:skip) Allow to flwr via a direct reference (#4144) Signed-off-by: Robert Steiner Co-authored-by: Taner Topal --- src/docker/base/alpine/Dockerfile | 17 +++++++++++++---- src/docker/base/ubuntu/Dockerfile | 15 +++++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/docker/base/alpine/Dockerfile b/src/docker/base/alpine/Dockerfile index 3e6a246e53c1..ee1e11b2d070 100644 --- a/src/docker/base/alpine/Dockerfile +++ b/src/docker/base/alpine/Dockerfile @@ -33,6 +33,8 @@ RUN apk add --no-cache \ # require for compiling grpcio on ARM64 g++ \ libffi-dev \ + # required for installing flwr via git + git \ # create virtual env && python -m venv /python/venv @@ -42,12 +44,19 @@ ENV PATH=/python/venv/bin:$PATH # Install specific version of pip, setuptools and flwr ARG PIP_VERSION ARG SETUPTOOLS_VERSION -ARG FLWR_VERSION -ARG FLWR_PACKAGE=flwr RUN pip install -U --no-cache-dir \ pip==${PIP_VERSION} \ - setuptools==${SETUPTOOLS_VERSION} \ - ${FLWR_PACKAGE}==${FLWR_VERSION} + setuptools==${SETUPTOOLS_VERSION} + +ARG FLWR_VERSION +ARG FLWR_VERSION_REF +ARG FLWR_PACKAGE=flwr +# hadolint ignore=DL3013 +RUN if [ -z "${FLWR_VERSION_REF}" ]; then \ + pip install -U --no-cache-dir ${FLWR_PACKAGE}==${FLWR_VERSION}; \ + else \ + pip install -U --no-cache-dir ${FLWR_PACKAGE}@${FLWR_VERSION_REF}; \ + fi FROM python:${PYTHON_VERSION}-${DISTRO}${DISTRO_VERSION} AS base diff --git a/src/docker/base/ubuntu/Dockerfile b/src/docker/base/ubuntu/Dockerfile index ddc662a0ae98..47655b1a52a1 100644 --- a/src/docker/base/ubuntu/Dockerfile +++ b/src/docker/base/ubuntu/Dockerfile @@ -60,12 +60,19 @@ RUN pip install -U --no-cache-dir pip==${PIP_VERSION} setuptools==${SETUPTOOLS_V && python -m venv /python/venv ENV PATH=/python/venv/bin:$PATH -ARG FLWR_VERSION -ARG FLWR_PACKAGE=flwr RUN pip install -U --no-cache-dir \ pip==${PIP_VERSION} \ - setuptools==${SETUPTOOLS_VERSION} \ - ${FLWR_PACKAGE}==${FLWR_VERSION} + setuptools==${SETUPTOOLS_VERSION} + +ARG FLWR_VERSION +ARG FLWR_VERSION_REF +ARG FLWR_PACKAGE=flwr +# hadolint ignore=DL3013 +RUN if [ -z "${FLWR_VERSION_REF}" ]; then \ + pip install -U --no-cache-dir ${FLWR_PACKAGE}==${FLWR_VERSION}; \ + else \ + pip install -U --no-cache-dir ${FLWR_PACKAGE}@${FLWR_VERSION_REF}; \ + fi FROM $DISTRO:$DISTRO_VERSION AS base From a0b4b06c6aabe05910588c985e72c2bada3fa2d0 Mon Sep 17 00:00:00 2001 From: Robert Steiner Date: Fri, 6 Sep 2024 12:28:49 +0200 Subject: [PATCH 08/11] feat(framework) Add Docker Compose distributed files (#3924) Signed-off-by: Robert Steiner --- src/docker/distributed/.gitignore | 3 + src/docker/distributed/certs.yml | 6 + src/docker/distributed/client/compose.yml | 128 ++++++++++++++++++++++ src/docker/distributed/server/compose.yml | 67 +++++++++++ 4 files changed, 204 insertions(+) create mode 100644 src/docker/distributed/.gitignore create mode 100644 src/docker/distributed/certs.yml create mode 100644 src/docker/distributed/client/compose.yml create mode 100644 src/docker/distributed/server/compose.yml diff --git a/src/docker/distributed/.gitignore b/src/docker/distributed/.gitignore new file mode 100644 index 000000000000..1a11330c6e95 --- /dev/null +++ b/src/docker/distributed/.gitignore @@ -0,0 +1,3 @@ +superexec-certificates +superlink-certificates +server/state diff --git a/src/docker/distributed/certs.yml b/src/docker/distributed/certs.yml new file mode 100644 index 000000000000..48e157582e40 --- /dev/null +++ b/src/docker/distributed/certs.yml @@ -0,0 +1,6 @@ +services: + gen-certs: + build: + args: + SUPERLINK_IP: ${SUPERLINK_IP:-127.0.0.1} + SUPEREXEC_IP: ${SUPEREXEC_IP:-127.0.0.1} diff --git a/src/docker/distributed/client/compose.yml b/src/docker/distributed/client/compose.yml new file mode 100644 index 000000000000..ef69e40cc425 --- /dev/null +++ b/src/docker/distributed/client/compose.yml @@ -0,0 +1,128 @@ +services: + supernode-1: + image: flwr/supernode:${FLWR_VERSION:-1.11.0} + command: + - --superlink + - ${SUPERLINK_IP:-127.0.0.1}:9092 + - --supernode-address + - 0.0.0.0:9094 + - --isolation + - process + - --node-config + - "partition-id=0 num-partitions=2" + - --root-certificates + - certificates/ca.crt + secrets: + - source: superlink-ca-certfile + target: /app/certificates/ca.crt + + supernode-2: + image: flwr/supernode:${FLWR_VERSION:-1.11.0} + command: + - --superlink + - ${SUPERLINK_IP:-127.0.0.1}:9092 + - --supernode-address + - 0.0.0.0:9095 + - --isolation + - process + - --node-config + - "partition-id=1 num-partitions=2" + - --root-certificates + - certificates/ca.crt + secrets: + - source: superlink-ca-certfile + target: /app/certificates/ca.crt + + # uncomment to add another SuperNode + # + # supernode-3: + # image: flwr/supernode:${FLWR_VERSION:-1.11.0} + # command: + # - --superlink + # - ${SUPERLINK_IP:-127.0.0.1}:9092 + # - --supernode-address + # - 0.0.0.0:9096 + # - --isolation + # - process + # - --node-config + # - "partition-id=1 num-partitions=2" + # - --root-certificates + # - certificates/ca.crt + # secrets: + # - source: superlink-ca-certfile + # target: /app/certificates/ca.crt + + clientapp-1: + build: + context: ${PROJECT_DIR:-.} + dockerfile_inline: | + FROM flwr/clientapp:${FLWR_VERSION:-1.11.0} + + WORKDIR /app + COPY --chown=app:app pyproject.toml . + RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ + && python -m pip install -U --no-cache-dir . + + ENTRYPOINT ["flwr-clientapp"] + command: + - --supernode + - supernode-1:9094 + deploy: + resources: + limits: + cpus: "2" + stop_signal: SIGINT + depends_on: + - supernode-1 + + clientapp-2: + build: + context: ${PROJECT_DIR:-.} + dockerfile_inline: | + FROM flwr/clientapp:${FLWR_VERSION:-1.11.0} + + WORKDIR /app + COPY --chown=app:app pyproject.toml . + RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ + && python -m pip install -U --no-cache-dir . + + ENTRYPOINT ["flwr-clientapp"] + command: + - --supernode + - supernode-2:9095 + deploy: + resources: + limits: + cpus: "2" + stop_signal: SIGINT + depends_on: + - supernode-2 + + # uncomment to add another ClientApp + # + # clientapp-3: + # build: + # context: ${PROJECT_DIR:-.} + # dockerfile_inline: | + # FROM flwr/clientapp:${FLWR_VERSION:-1.11.0} + + # WORKDIR /app + # COPY --chown=app:app pyproject.toml . + # RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ + # && python -m pip install -U --no-cache-dir . + + # ENTRYPOINT ["flwr-clientapp"] + # command: + # - --supernode + # - supernode-3:9096 + # deploy: + # resources: + # limits: + # cpus: "2" + # stop_signal: SIGINT + # depends_on: + # - supernode-3 + +secrets: + superlink-ca-certfile: + file: ../superlink-certificates/ca.crt diff --git a/src/docker/distributed/server/compose.yml b/src/docker/distributed/server/compose.yml new file mode 100644 index 000000000000..fc6dd6f58717 --- /dev/null +++ b/src/docker/distributed/server/compose.yml @@ -0,0 +1,67 @@ +services: + superlink: + image: flwr/superlink:${FLWR_VERSION:-1.11.0} + command: + - --ssl-ca-certfile=certificates/ca.crt + - --ssl-certfile=certificates/server.pem + - --ssl-keyfile=certificates/server.key + - --database=state/state.db + volumes: + - ./state/:/app/state/:rw + secrets: + - source: superlink-ca-certfile + target: /app/certificates/ca.crt + - source: superlink-certfile + target: /app/certificates/server.pem + - source: superlink-keyfile + target: /app/certificates/server.key + ports: + - 9092:9092 + + superexec: + build: + context: ${PROJECT_DIR:-.} + dockerfile_inline: | + FROM flwr/superexec:${FLWR_VERSION:-1.11.0} + + WORKDIR /app + COPY --chown=app:app pyproject.toml . + RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ + && python -m pip install -U --no-cache-dir . + + ENTRYPOINT ["flower-superexec"] + command: + - --executor + - flwr.superexec.deployment:executor + - --executor-config + - superlink="superlink:9091" root-certificates="certificates/superlink-ca.crt" + - --ssl-ca-certfile=certificates/ca.crt + - --ssl-certfile=certificates/server.pem + - --ssl-keyfile=certificates/server.key + secrets: + - source: superlink-ca-certfile + target: /app/certificates/superlink-ca.crt + - source: superexec-ca-certfile + target: /app/certificates/ca.crt + - source: superexec-certfile + target: /app/certificates/server.pem + - source: superexec-keyfile + target: /app/certificates/server.key + ports: + - 9093:9093 + depends_on: + - superlink + +secrets: + superlink-ca-certfile: + file: ../superlink-certificates/ca.crt + superlink-certfile: + file: ../superlink-certificates/server.pem + superlink-keyfile: + file: ../superlink-certificates/server.key + superexec-ca-certfile: + file: ../superexec-certificates/ca.crt + superexec-certfile: + file: ../superexec-certificates/server.pem + superexec-keyfile: + file: ../superexec-certificates/server.key From 9f51f3e983af448ab3ec0660cc851a4d070c6b53 Mon Sep 17 00:00:00 2001 From: Robert Steiner Date: Fri, 6 Sep 2024 12:38:08 +0200 Subject: [PATCH 09/11] ci(*:skip) New Docker codeowners (#4149) Signed-off-by: Robert Steiner --- .github/CODEOWNERS | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ccf031344f67..3e314c8d1de5 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -35,3 +35,4 @@ README.md @jafermarq @tanertopal @danieljanes /.devcontainer @Robert-Steiner @Moep90 **/Dockerfile @Robert-Steiner @Moep90 **/*.Dockerfile @Robert-Steiner @Moep90 +src/docker @Robert-Steiner @Moep90 From f7b01dda6635a4e23d4a076353af2499f86508fd Mon Sep 17 00:00:00 2001 From: Javier Date: Fri, 6 Sep 2024 14:59:51 +0200 Subject: [PATCH 10/11] docs(framework:skip) Fix passing multiple arguments to --run-config in docs (#4150) --- doc/source/tutorial-quickstart-mlx.rst | 2 +- doc/source/tutorial-quickstart-pytorch.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/tutorial-quickstart-mlx.rst b/doc/source/tutorial-quickstart-mlx.rst index 0999bf44d3b7..675a08502d26 100644 --- a/doc/source/tutorial-quickstart-mlx.rst +++ b/doc/source/tutorial-quickstart-mlx.rst @@ -109,7 +109,7 @@ You can also override the parameters defined in .. code:: shell # Override some arguments - $ flwr run . --run-config num-server-rounds=5,lr=0.05 + $ flwr run . --run-config "num-server-rounds=5 lr=0.05" What follows is an explanation of each component in the project you just created: dataset partition, the model, defining the ``ClientApp`` and diff --git a/doc/source/tutorial-quickstart-pytorch.rst b/doc/source/tutorial-quickstart-pytorch.rst index 4515e8d0eeb5..d00b9efbe16b 100644 --- a/doc/source/tutorial-quickstart-pytorch.rst +++ b/doc/source/tutorial-quickstart-pytorch.rst @@ -108,7 +108,7 @@ You can also override the parameters defined in the .. code:: shell # Override some arguments - $ flwr run . --run-config num-server-rounds=5,local-epochs=3 + $ flwr run . --run-config "num-server-rounds=5 local-epochs=3" What follows is an explanation of each component in the project you just created: dataset partition, the model, defining the ``ClientApp`` and From ceb39033d882353bc1bef014a79a3502db198c9b Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Fri, 6 Sep 2024 14:06:54 +0100 Subject: [PATCH 11/11] ci(*:skip) Refactor client-auth e2e CI (#4148) Co-authored-by: Daniel Nata Nugraha --- .github/workflows/e2e.yml | 2 +- e2e/test_superlink.sh | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 49e5b7bf1b36..53015310dcc1 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -172,7 +172,7 @@ jobs: run: ./../test_superlink.sh bare sqlite - name: Run driver test with client authentication if: ${{ matrix.directory == 'e2e-bare-auth' }} - run: ./../test_superlink.sh bare client-auth + run: ./../test_superlink.sh "${{ matrix.directory }}" client-auth - name: Run reconnection test with SQLite database if: ${{ matrix.directory == 'e2e-bare' }} run: ./../test_reconnection.sh sqlite diff --git a/e2e/test_superlink.sh b/e2e/test_superlink.sh index 684f386bd388..2016f6da1933 100755 --- a/e2e/test_superlink.sh +++ b/e2e/test_superlink.sh @@ -2,7 +2,7 @@ set -e case "$1" in - e2e-bare-https) + e2e-bare-https | e2e-bare-auth) ./generate.sh server_arg="--ssl-ca-certfile certificates/ca.crt --ssl-certfile certificates/server.pem --ssl-keyfile certificates/server.key" client_arg="--root-certificates certificates/ca.crt" @@ -37,14 +37,11 @@ case "$2" in client_auth_2="" ;; client-auth) - ./generate.sh rest_arg_superlink="" rest_arg_supernode="" server_address="127.0.0.1:9092" server_app_address="127.0.0.1:9091" db_arg="--database :flwr-in-memory-state:" - server_arg="--ssl-ca-certfile certificates/ca.crt --ssl-certfile certificates/server.pem --ssl-keyfile certificates/server.key" - client_arg="--root-certificates certificates/ca.crt" server_auth="--auth-list-public-keys keys/client_public_keys.csv --auth-superlink-private-key keys/server_credentials --auth-superlink-public-key keys/server_credentials.pub" client_auth_1="--auth-supernode-private-key keys/client_credentials_1 --auth-supernode-public-key keys/client_credentials_1.pub" client_auth_2="--auth-supernode-private-key keys/client_credentials_2 --auth-supernode-public-key keys/client_credentials_2.pub"