Merge branch 'main' into update-url-code

adap · Feb 21, 2024 · 2a77d28 · 2a77d28
2 parents aae2106 + f4672b7
commit 2a77d28
Show file tree

Hide file tree

Showing 45 changed files with 1,249 additions and 382 deletions.
diff --git a/README.md b/README.md
@@ -149,6 +149,7 @@ Other [examples](https://github.com/adap/flower/tree/main/examples):
 - Single-Machine Simulation of Federated Learning Systems ([PyTorch](https://github.com/adap/flower/tree/main/examples/simulation-pytorch)) ([Tensorflow](https://github.com/adap/flower/tree/main/examples/simulation-tensorflow))
 - [Comprehensive Flower+XGBoost](https://github.com/adap/flower/tree/main/examples/xgboost-comprehensive)
 - [Flower through Docker Compose and with Grafana dashboard](https://github.com/adap/flower/tree/main/examples/flower-via-docker-compose)
+- [Flower with KaplanMeierFitter from the lifelines library](https://github.com/adap/flower/tree/main/examples/federated-kaplna-meier-fitter)
 
 ## Community
 

diff --git a/baselines/heterofl/heterofl/models.py b/baselines/heterofl/heterofl/models.py
@@ -62,7 +62,7 @@ def __init__(
         self.blocks = nn.Sequential(*blocks)
 
     def _get_norm(self, j: int):
-        """Return the relavant norm."""
+        """Return the relevant norm."""
         if self.model_config["norm"] == "bn":
             norm = nn.BatchNorm2d(
                 self.model_config["hidden_size"][j],
@@ -85,7 +85,7 @@ def _get_norm(self, j: int):
         return norm
 
     def _get_scale(self):
-        """Return the relavant scaler."""
+        """Return the relevant scaler."""
         if self.model_config["scale"]:
             scaler = _Scaler(self.model_config["rate"])
         else:

diff --git a/e2e/bare/client.py b/e2e/bare/client.py
@@ -3,7 +3,7 @@
 import flwr as fl
 import numpy as np
 
-from flwr.common.configsrecord import ConfigsRecord
+from flwr.common import ConfigsRecord
 
 SUBSET_SIZE = 1000
 STATE_VAR = 'timestamp'

diff --git a/e2e/pytorch/client.py b/e2e/pytorch/client.py
@@ -11,7 +11,7 @@
 from tqdm import tqdm
 
 import flwr as fl
-from flwr.common.configsrecord import ConfigsRecord
+from flwr.common import ConfigsRecord
 
 # #############################################################################
 # 1. Regular PyTorch pipeline: nn.Module, train, test, and DataLoader

diff --git a/examples/federated-kaplan-meier-fitter/README.md b/examples/federated-kaplan-meier-fitter/README.md
@@ -0,0 +1,97 @@
+# Flower Example using KaplanMeierFitter
+
+This is an introductory example on **federated survival analysis** using [Flower](https://flower.dev/)
+and [lifelines](https://lifelines.readthedocs.io/en/stable/index.html) library.
+
+The aim of this example is to estimate the survival function using the
+[Kaplan-Meier Estimate](https://en.wikipedia.org/wiki/Kaplan%E2%80%93Meier_estimator) implemented in
+lifelines library (see [KaplanMeierFitter](https://lifelines.readthedocs.io/en/stable/fitters/univariate/KaplanMeierFitter.html#lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter)). The distributed/federated aspect of this example
+is the data sending to the server. You can think of it as a federated analytics example. However, it's worth noting that this procedure violates privacy since the raw data is exchanged.
+
+Finally, many other estimators beyond KaplanMeierFitter can be used with the provided strategy:
+AalenJohansenFitter, GeneralizedGammaFitter, LogLogisticFitter,
+SplineFitter, and WeibullFitter.
+
+We also use the [NatualPartitioner](https://flower.dev/docs/datasets/ref-api/flwr_datasets.partitioner.NaturalIdPartitioner.html#flwr_datasets.partitioner.NaturalIdPartitioner) from [Flower Datasets](https://flower.dev/docs/datasets/) to divide the data according to
+the group it comes from therefore to simulate the division that might occur.
+
+<p style="text-align:center;">
+<img src="_static/survival_function_federated.png" alt="Survival Function" width="600"/>
+</p>
+
+## Project Setup
+
+Start by cloning the example project. We prepared a single-line command that you can copy into your shell which will checkout the example for you:
+
+```shell
+$ git clone --depth=1 https://github.com/adap/flower.git _tmp && mv _tmp/examples/federated-kaplan-meier-fitter . && rm -rf _tmp && cd federated-kaplan-meier-fitter
+```
+
+This will create a new directory called `federated-kaplan-meier-fitter` containing the following files:
+
+```shell
+-- pyproject.toml
+-- requirements.txt
+-- client.py
+-- server.py
+-- centralized.py
+-- run.sh
+-- README.md
+```
+
+### Installing Dependencies
+
+Project dependencies (such as `lifelines` and `flwr`) are defined in `pyproject.toml` and `requirements.txt`. We recommend [Poetry](https://python-poetry.org/docs/) to install those dependencies and manage your virtual environment ([Poetry installation](https://python-poetry.org/docs/#installation)) or [pip](https://pip.pypa.io/en/latest/development/), but feel free to use a different way of installing dependencies and managing virtual environments if you have other preferences.
+
+#### Poetry
+
+```shell
+poetry install
+poetry shell
+```
+
+Poetry will install all your dependencies in a newly created virtual environment. To verify that everything works correctly you can run the following command:
+
+```shell
+poetry run python3 -c "import flwr"
+```
+
+If you don't see any errors you're good to go!
+
+#### pip
+
+Write the command below in your terminal to install the dependencies according to the configuration file requirements.txt.
+
+```shell
+pip install -r requirements.txt
+```
+
+## Run Federated Survival Analysis with Flower and lifelines's KaplanMeierFitter
+
+Afterwards you are ready to start the Flower server as well as the clients. You can simply start the server in a terminal as follows:
+
+```shell
+$ python3 server.py
+```
+
+Now you are ready to start the Flower clients which will participate in the learning. To do so simply open two more terminal windows and run the following commands.
+
+Start client 1 in the first terminal:
+
+```shell
+$ python3 client.py --node-id 0
+```
+
+Start client 2 in the second terminal:
+
+```shell
+$ python3 client.py --node-id 1
+```
+
+You will see that the server is printing survival function, median survival time and saves the plot with the survival function.
+
+You can also check that the results match the centralized version.
+
+```shell
+$ python3 centralized.py
+```
diff --git a/examples/federated-kaplan-meier-fitter/_static/survival_function_centralized.png b/examples/federated-kaplan-meier-fitter/_static/survival_function_centralized.png
diff --git a/examples/federated-kaplan-meier-fitter/_static/survival_function_federated.png b/examples/federated-kaplan-meier-fitter/_static/survival_function_federated.png
diff --git a/examples/federated-kaplan-meier-fitter/centralized.py b/examples/federated-kaplan-meier-fitter/centralized.py
@@ -0,0 +1,16 @@
+import matplotlib.pyplot as plt
+from lifelines import KaplanMeierFitter
+from lifelines.datasets import load_waltons
+
+if __name__ == "__main__":
+    X = load_waltons()
+    fitter = KaplanMeierFitter()
+    fitter.fit(X["T"], X["E"])
+    print("Survival function")
+    print(fitter.survival_function_)
+    print("Mean survival time:")
+    print(fitter.median_survival_time_)
+    fitter.plot_survival_function()
+    plt.title("Survival function of fruit flies (Walton's data)", fontsize=16)
+    plt.savefig("./survival_function_centralized.png", dpi=200)
+    print("Centralized survival function saved.")
diff --git a/examples/federated-kaplan-meier-fitter/client.py b/examples/federated-kaplan-meier-fitter/client.py
@@ -0,0 +1,68 @@
+import argparse
+from typing import Dict, List, Tuple
+
+import flwr as fl
+import numpy as np
+from datasets import Dataset
+from flwr.common import NDArray, NDArrays
+from flwr_datasets.partitioner import NaturalIdPartitioner
+from lifelines.datasets import load_waltons
+
+
+class FlowerClient(fl.client.NumPyClient):
+    """Flower client that holds and sends the events and times data.
+
+    Parameters
+    ----------
+    times: NDArray
+        Times of the `events`.
+    events: NDArray
+        Events represented by 0 - no event, 1 - event occurred.
+
+    Raises
+    ------
+    ValueError
+        If the `times` and `events` are not the same shape.
+    """
+
+    def __init__(self, times: NDArray, events: NDArray):
+        if len(times) != len(events):
+            raise ValueError("The times and events arrays have to be same shape.")
+        self._times = times
+        self._events = events
+
+    def fit(
+        self, parameters: List[np.ndarray], config: Dict[str, str]
+    ) -> Tuple[NDArrays, int, Dict]:
+        return (
+            [self._times, self._events],
+            len(self._times),
+            {},
+        )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Flower")
+    parser.add_argument(
+        "--node-id",
+        type=int,
+        required=True,
+        help="Node id. Each node holds different part of the dataset.",
+    )
+    args = parser.parse_args()
+    partition_id = args.node_id
+
+    # Prepare data
+    X = load_waltons()
+    partitioner = NaturalIdPartitioner(partition_by="group")
+    partitioner.dataset = Dataset.from_pandas(X)
+    partition = partitioner.load_partition(partition_id).to_pandas()
+    events = partition["E"].values
+    times = partition["T"].values
+
+    # Start Flower client
+    client = FlowerClient(times=times, events=events).to_client()
+    fl.client.start_client(
+        server_address="127.0.0.1:8080",
+        client=client,
+    )
diff --git a/examples/federated-kaplan-meier-fitter/pyproject.toml b/examples/federated-kaplan-meier-fitter/pyproject.toml
@@ -0,0 +1,18 @@
+[build-system]
+requires = ["poetry-core>=1.4.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.poetry]
+name = "federated-kaplan-meier-fitter"
+version = "0.1.0"
+description = "Federated Kaplan Meier Fitter with Flower"
+authors = ["The Flower Authors <[email protected]>"]
+maintainers = ["The Flower Authors <[email protected]>"]
+
+[tool.poetry.dependencies]
+python = ">=3.9,<3.11"
+flwr = ">=1.0,<2.0"
+flwr-datasets = ">=0.0.2,<1.0.0"
+numpy = ">=1.23.2"
+pandas = ">=2.0.0"
+lifelines = ">=0.28.0"
diff --git a/examples/federated-kaplan-meier-fitter/requirements.txt b/examples/federated-kaplan-meier-fitter/requirements.txt
@@ -0,0 +1,5 @@
+flwr>=1.0, <2.0
+flwr-datasets>=0.0.2, <1.0.0
+numpy>=1.23.2
+pandas>=2.0.0
+lifelines>=0.28.0
diff --git a/examples/federated-kaplan-meier-fitter/run.sh b/examples/federated-kaplan-meier-fitter/run.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+set -e
+cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/
+
+echo "Starting server"
+python server.py &
+sleep 3  # Sleep for 3s to give the server enough time to start
+
+for i in $(seq 0 1); do
+    echo "Starting client $i"
+    python client.py --node-id "${i}" &
+done
+
+# This will allow you to use CTRL+C to stop all background processes
+trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM
+# Wait for all background processes to complete
+wait