adap · gubertoli · Feb 2, 2024 · Feb 2, 2024 · Feb 2, 2024 · Feb 6, 2024
@@ -0,0 +1,12 @@
+.. _federated-analytics:
+
+
+Federated Analytics
+===================
+
+.. meta::
+   :description: Check out this how-to for using Flower to perform Federated Analytics.
+
+Let's build a federated analytics system using Flower!
+
+Please refer to the `full code example <https://github.com/adap/flower/tree/main/examples/federated-analytics>`_ to learn more.
@@ -50,7 +50,6 @@ A learning-oriented series of federated learning tutorials, the best place to st
    tutorial-quickstart-tensorflow
    tutorial-quickstart-huggingface
    tutorial-quickstart-jax
-   tutorial-quickstart-pandas
    tutorial-quickstart-fastai
    tutorial-quickstart-pytorch-lightning
    tutorial-quickstart-mxnet
@@ -93,6 +92,7 @@ Problem-oriented how-to guides show step-by-step how to achieve a specific goal.
    how-to-upgrade-to-flower-1.0
    how-to-use-built-in-middleware-layers
    how-to-run-flower-using-docker
+   how-to-federated-analytics
 
 .. toctree::
    :maxdepth: 1

@@ -0,0 +1,86 @@
+# Flower Federated Analytics Example
+
+This introductory Flower example demonstrates a Federated Analytics application. It will help you understand how to adapt Flower to your Federated Analytics use cases through a custom strategy. This example uses [Flower Datasets](https://flower.dev/docs/datasets/) to
+download, partition and preprocess the dataset.
+
+In this example, we use the Iris dataset splitted between two clients. The subset of each client contains only the features sepal length, and sepal width. Then, a federated analytics task is performed to calculated for each client and each feature its 10-bins histogram, then those values are aggregated and a global histogram is obtained for sepal length, and sepal width.
+
+To learn more about Federated Analytics you can check [this article](https://ai.googleblog.com/2020/05/federated-analytics-collaborative-data.html) by Google. There is also a previous Flower blog post about [this example](https://flower.dev/blog/2023-01-24-federated-analytics-pandas/).
+
+Running this example in itself is quite easy.
+
+## Project Setup
+
+Start by cloning the example project. We prepared a single-line command that you can copy into your shell which will checkout the example for you:
+
+```shell
+$ git clone --depth=1 https://github.com/adap/flower.git _tmp && mv _tmp/examples/federated-analytics . && rm -rf _tmp && cd federated-analytics
+```
+
+This will create a new directory called `federated-analytics` containing the following files:
+
+```shell
+-- pyproject.toml
+-- requirements.txt
+-- client.py
+-- server.py
+-- start.sh
+-- README.md
+```
+
+### Installing Dependencies
+
+Project dependencies (such as `flwr`) are defined in `pyproject.toml` and `requirements.txt`. We recommend [Poetry](https://python-poetry.org/docs/) to install those dependencies and manage your virtual environment ([Poetry installation](https://python-poetry.org/docs/#installation)) or [pip](https://pip.pypa.io/en/latest/development/), but feel free to use a different way of installing dependencies and managing virtual environments if you have other preferences.
+
+#### Poetry
+
+```shell
+poetry install
+poetry shell
+```
+
+Poetry will install all your dependencies in a newly created virtual environment. To verify that everything works correctly you can run the following command:
+
+```shell
+poetry run python3 -c "import flwr"
+```
+
+If you don't see any errors you're good to go!
+
+#### pip
+
+Write the command below in your terminal to install the dependencies according to the configuration file requirements.txt.
+
+```shell
+pip install -r requirements.txt
+```
+
+## Run Federated Analytics with Flower
+
+After all dependencies installed, you are ready to run this example with the `run.sh` script.
+
+```shell
+$ ./run.sh
+```
+
+If you don't plan on using the `run.sh` script that automates the run. You can simply start the server in a terminal as follows:
+
+```shell
+$ python3 server.py
+```
+
+Now you are ready to start the Flower clients which will participate in the learning. To do so simply open two more terminal windows and run the following commands.
+
+Start client 1 in the first terminal:
+
+```shell
+$ python3 client.py --node-id 0
+```
+
+Start client 2 in the second terminal:
+
+```shell
+$ python3 client.py --node-id 1
+```
+
+You will see that the server is printing aggregated statistics about the dataset distributed amongst clients. Have a look to the [Flower Quickstarter documentation](https://flower.dev/docs/quickstart-pandas.html) for a detailed explanation.
@@ -2,7 +2,6 @@
 from typing import Dict, List, Tuple
 
 import numpy as np
-import pandas as pd
 
 import flwr as fl
 
@@ -12,27 +11,27 @@
 column_names = ["sepal_length", "sepal_width"]
 
 
-def compute_hist(df: pd.DataFrame, col_name: str) -> np.ndarray:
-    freqs, _ = np.histogram(df[col_name])
+def compute_hist(column: List[np.ndarray]) -> np.ndarray:
+    freqs, _ = np.histogram(column)
     return freqs
 
 
 # Define Flower client
 class FlowerClient(fl.client.NumPyClient):
-    def __init__(self, X: pd.DataFrame):
+    def __init__(self, X: List[np.ndarray]):
         self.X = X
 
     def fit(
         self, parameters: List[np.ndarray], config: Dict[str, str]
     ) -> Tuple[List[np.ndarray], int, Dict]:
         hist_list = []
         # Execute query locally
-        for c in self.X.columns:
-            hist = compute_hist(self.X, c)
+        for column in range(len(column_names)):
+            hist = compute_hist(X[column])
             hist_list.append(hist)
         return (
             hist_list,
-            len(self.X),
+            len(self.X[0]),  # get the length of one column
             {},
         )
 
@@ -54,9 +53,12 @@ def fit(
     # Load the partition data
     fds = FederatedDataset(dataset="hitorilabs/iris", partitioners={"train": N_CLIENTS})
 
-    dataset = fds.load_partition(partition_id, "train").with_format("pandas")[:]
+    dataset = fds.load_partition(partition_id, "train")
+
+    X = []
     # Use just the specified columns
-    X = dataset[column_names]
+    for column in column_names:
+        X.append(dataset[column])
 
     # Start Flower client
     fl.client.start_client(

@@ -3,9 +3,9 @@ requires = ["poetry-core>=1.4.0"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
-name = "quickstart-pandas"
+name = "federated-analytics"
 version = "0.1.0"
-description = "Pandas Federated Analytics Quickstart with Flower"
+description = "Federated Analytics with Flower"
 authors = ["Ragy Haddad <[email protected]>"]
 maintainers = ["The Flower Authors <[email protected]>"]
 
@@ -14,4 +14,3 @@ python = ">=3.8,<3.11"
 flwr = ">=1.0,<2.0"
 flwr-datasets = { extras = ["vision"], version = ">=0.0.2,<1.0.0" }
 numpy = "1.23.2"
-pandas = "2.0.0"
@@ -1,4 +1,3 @@
 flwr>=1.0, <2.0
 flwr-datasets[vision]>=0.0.2, <1.0.0
 numpy==1.23.2
-pandas==2.0.0