From 1f4064207b27efad705c23cb675d746d8a31fa49 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 10:46:56 +0000 Subject: [PATCH 01/22] Init --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2b789fc3d623..a3887bc0293c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,7 +64,7 @@ flower-client-app = "flwr.client.supernode:run_client_app" # Deprecated [tool.poetry.dependencies] python = "^3.9" # Mandatory dependencies -numpy = "^1.21.0" +numpy = "^2.0.0" grpcio = "^1.60.0,!=1.64.2,!=1.65.1,!=1.65.2,!=1.65.4,!=1.65.5,!=1.66.0,!=1.66.1" protobuf = "^4.25.2" cryptography = "^42.0.4" From 7bbca13b5207a67a52acc4da3c47c5b23839c240 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 10:54:34 +0000 Subject: [PATCH 02/22] Change np.float_ to np.float64 --- datasets/flwr_datasets/common/typing.py | 2 +- .../flwr_datasets/partitioner/dirichlet_partitioner_test.py | 4 ++-- src/py/flwr/common/typing.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datasets/flwr_datasets/common/typing.py b/datasets/flwr_datasets/common/typing.py index d6d37b468494..6b76e7b22eea 100644 --- a/datasets/flwr_datasets/common/typing.py +++ b/datasets/flwr_datasets/common/typing.py @@ -22,5 +22,5 @@ NDArray = npt.NDArray[Any] NDArrayInt = npt.NDArray[np.int_] -NDArrayFloat = npt.NDArray[np.float_] +NDArrayFloat = npt.NDArray[np.float64] NDArrays = list[NDArray] diff --git a/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py b/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py index ed38e8ee2a41..2c8eb0d29c06 100644 --- a/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py @@ -20,16 +20,16 @@ from typing import Union import numpy as np +from flwr_datasets.partitioner.dirichlet_partitioner import DirichletPartitioner from numpy.typing import NDArray from parameterized import parameterized from datasets import Dataset -from flwr_datasets.partitioner.dirichlet_partitioner import DirichletPartitioner def _dummy_setup( num_partitions: int, - alpha: Union[float, NDArray[np.float_]], + alpha: Union[float, NDArray[np.float64]], num_rows: int, partition_by: str, self_balancing: bool = True, diff --git a/src/py/flwr/common/typing.py b/src/py/flwr/common/typing.py index 6b07fe2c1c38..ec8e092548fb 100644 --- a/src/py/flwr/common/typing.py +++ b/src/py/flwr/common/typing.py @@ -24,7 +24,7 @@ NDArray = npt.NDArray[Any] NDArrayInt = npt.NDArray[np.int_] -NDArrayFloat = npt.NDArray[np.float_] +NDArrayFloat = npt.NDArray[np.float64] NDArrays = list[NDArray] # The following union type contains Python types corresponding to ProtoBuf types that From 9c60283333dc30db9578cb48933e3e7007eae938 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 11:33:42 +0000 Subject: [PATCH 03/22] Update can_cast type --- src/py/flwr/server/strategy/aggregate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/py/flwr/server/strategy/aggregate.py b/src/py/flwr/server/strategy/aggregate.py index 94beacba0087..16a6c1faba96 100644 --- a/src/py/flwr/server/strategy/aggregate.py +++ b/src/py/flwr/server/strategy/aggregate.py @@ -48,12 +48,12 @@ def aggregate_inplace(results: list[tuple[ClientProxy, FitRes]]) -> NDArrays: num_examples_total = sum(fit_res.num_examples for (_, fit_res) in results) # Compute scaling factors for each result - scaling_factors = [ - fit_res.num_examples / num_examples_total for _, fit_res in results - ] + scaling_factors = np.asarray( + [fit_res.num_examples / num_examples_total for _, fit_res in results] + ) def _try_inplace( - x: NDArray, y: Union[NDArray, float], np_binary_op: np.ufunc + x: NDArray, y: Union[NDArray, np.float64], np_binary_op: np.ufunc ) -> NDArray: return ( # type: ignore[no-any-return] np_binary_op(x, y, out=x) From 6021a0f46ebfec4c8f20bef6915ca84c86849627 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 11:44:08 +0000 Subject: [PATCH 04/22] Add flwr_datasets as known_first_party --- e2e/docker/client.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/e2e/docker/client.py b/e2e/docker/client.py index 44313c7c3af6..98a343f239f3 100644 --- a/e2e/docker/client.py +++ b/e2e/docker/client.py @@ -4,12 +4,12 @@ import torch import torch.nn as nn import torch.nn.functional as F -from flwr_datasets import FederatedDataset from torch.utils.data import DataLoader, Subset from torchvision.transforms import Compose, Normalize, ToTensor from flwr.client import ClientApp, NumPyClient from flwr.common import Context +from flwr_datasets import FederatedDataset # ############################################################################# # 1. Regular PyTorch pipeline: nn.Module, train, test, and DataLoader diff --git a/pyproject.toml b/pyproject.toml index a3887bc0293c..88aa2ebad211 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -147,7 +147,7 @@ extend_exclude = [ [tool.isort] profile = "black" -known_first_party = ["flwr", "flwr_tool"] +known_first_party = ["flwr", "flwr_tool", "flwr_datasets"] [tool.black] line-length = 88 From 96572a60d62200f40ace0b432f802ba999a28bd6 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 11:51:12 +0000 Subject: [PATCH 05/22] Revert import sort --- .../flwr_datasets/partitioner/dirichlet_partitioner_test.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py b/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py index 2c8eb0d29c06..693e0d6a5aa6 100644 --- a/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py @@ -20,11 +20,11 @@ from typing import Union import numpy as np -from flwr_datasets.partitioner.dirichlet_partitioner import DirichletPartitioner from numpy.typing import NDArray from parameterized import parameterized from datasets import Dataset +from flwr_datasets.partitioner.dirichlet_partitioner import DirichletPartitioner def _dummy_setup( diff --git a/pyproject.toml b/pyproject.toml index 88aa2ebad211..a3887bc0293c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -147,7 +147,7 @@ extend_exclude = [ [tool.isort] profile = "black" -known_first_party = ["flwr", "flwr_tool", "flwr_datasets"] +known_first_party = ["flwr", "flwr_tool"] [tool.black] line-length = 88 From d1faff916856fb9a06848505fc27fe540b8ffc95 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 11:52:02 +0000 Subject: [PATCH 06/22] Revert import sort --- e2e/docker/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e/docker/client.py b/e2e/docker/client.py index 98a343f239f3..44313c7c3af6 100644 --- a/e2e/docker/client.py +++ b/e2e/docker/client.py @@ -4,12 +4,12 @@ import torch import torch.nn as nn import torch.nn.functional as F +from flwr_datasets import FederatedDataset from torch.utils.data import DataLoader, Subset from torchvision.transforms import Compose, Normalize, ToTensor from flwr.client import ClientApp, NumPyClient from flwr.common import Context -from flwr_datasets import FederatedDataset # ############################################################################# # 1. Regular PyTorch pipeline: nn.Module, train, test, and DataLoader From 0b2a3cfce30a4f0cac9723c42f1bd039a1fe16f4 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 11:59:27 +0000 Subject: [PATCH 07/22] Upgrade numpy to >=2.0.0 for examples and e2e --- e2e/e2e-jax/pyproject.toml | 2 +- e2e/e2e-pandas/pyproject.toml | 2 +- examples/sklearn-logreg-mnist/pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/e2e/e2e-jax/pyproject.toml b/e2e/e2e-jax/pyproject.toml index b259f66a7bc3..65d55a243884 100644 --- a/e2e/e2e-jax/pyproject.toml +++ b/e2e/e2e-jax/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ "jax==0.4.13", "jaxlib==0.4.13", "scikit-learn>=1.1.1,<2.0.0", - "numpy>=1.21.4,<2.0.0", + "numpy>=2.0.0", ] [tool.hatch.build.targets.wheel] diff --git a/e2e/e2e-pandas/pyproject.toml b/e2e/e2e-pandas/pyproject.toml index f10b05b44756..120e9b8e6d35 100644 --- a/e2e/e2e-pandas/pyproject.toml +++ b/e2e/e2e-pandas/pyproject.toml @@ -11,7 +11,7 @@ authors = [{ name = "Ragy Haddad", email = "ragy202@gmail.com" }] maintainers = [{ name = "The Flower Authors", email = "hello@flower.ai" }] dependencies = [ "flwr[simulation] @ {root:parent:parent:uri}", - "numpy>=1.21.0,<2.0.0", + "numpy>=2.0.0", "pandas>=2.0.0,<3.0.0", "scikit-learn>=1.1.1,<2.0.0", ] diff --git a/examples/sklearn-logreg-mnist/pyproject.toml b/examples/sklearn-logreg-mnist/pyproject.toml index 75dae57a0a40..73e089eb62db 100644 --- a/examples/sklearn-logreg-mnist/pyproject.toml +++ b/examples/sklearn-logreg-mnist/pyproject.toml @@ -14,7 +14,7 @@ authors = [ dependencies = [ "flwr[simulation]>=1.12.0", "flwr-datasets[vision]>=0.3.0", - "numpy<2.0.0", + "numpy>=2.0.0", "scikit-learn~=1.2.2", ] From 24813a6b76984efc3640773c1ce7dac2b00856c2 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 12:09:49 +0000 Subject: [PATCH 08/22] Fix deps --- e2e/e2e-pytorch/pyproject.toml | 4 ++-- e2e/e2e-scikit-learn/pyproject.toml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/e2e/e2e-pytorch/pyproject.toml b/e2e/e2e-pytorch/pyproject.toml index 0e48334693d3..9e2029aecefb 100644 --- a/e2e/e2e-pytorch/pyproject.toml +++ b/e2e/e2e-pytorch/pyproject.toml @@ -9,8 +9,8 @@ description = "PyTorch Federated Learning E2E test with Flower" license = "Apache-2.0" dependencies = [ "flwr[simulation] @ {root:parent:parent:uri}", - "torch>=1.12.0,<2.0.0", - "torchvision>=0.14.1,<0.15.0", + "torch>=2.5.0,<3.0.0", + "torchvision>=0.20.1,<0.21.0", "tqdm>=4.63.0,<5.0.0", ] diff --git a/e2e/e2e-scikit-learn/pyproject.toml b/e2e/e2e-scikit-learn/pyproject.toml index aef9a4a8a00b..03f5540aa15d 100644 --- a/e2e/e2e-scikit-learn/pyproject.toml +++ b/e2e/e2e-scikit-learn/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "flwr[simulation,rest] @ {root:parent:parent:uri}", "scikit-learn>=1.1.1,<2.0.0", "openml>=0.14.0,<0.15.0", + "numpy<2.0.0", ] [tool.hatch.build.targets.wheel] From a38443c2603e3eb2bfe61c7707a65f2c054911d5 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 12:48:23 +0000 Subject: [PATCH 09/22] Use flwr_datasets for e2e scikit_learn --- .github/workflows/e2e.yml | 3 -- .../e2e_scikit_learn/client_app.py | 6 +-- .../e2e_scikit_learn/utils.py | 48 ++++++++----------- e2e/e2e-scikit-learn/pyproject.toml | 2 - 4 files changed, 23 insertions(+), 36 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 5e93da349602..d1e9f24607f3 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -156,9 +156,6 @@ jobs: - directory: e2e-scikit-learn e2e: e2e_scikit_learn - dataset: | - import openml - openml.datasets.get_dataset(554) - directory: e2e-fastai e2e: e2e_fastai diff --git a/e2e/e2e-scikit-learn/e2e_scikit_learn/client_app.py b/e2e/e2e-scikit-learn/e2e_scikit_learn/client_app.py index cd87aeb15fab..5ee4b0599e07 100644 --- a/e2e/e2e-scikit-learn/e2e_scikit_learn/client_app.py +++ b/e2e/e2e-scikit-learn/e2e_scikit_learn/client_app.py @@ -8,12 +8,10 @@ from flwr.client import ClientApp, NumPyClient, start_client from flwr.common import Context -# Load MNIST dataset from https://www.openml.org/d/554 -(X_train, y_train), (X_test, y_test) = utils.load_mnist() - # Split train set into 10 partitions and randomly use one for training. partition_id = np.random.choice(10) -(X_train, y_train) = utils.partition(X_train, y_train, 10)[partition_id] +X_train, X_test, y_train, y_test = utils.load_data(partition_id, num_partitions=10) + # Create LogisticRegression Model model = LogisticRegression( diff --git a/e2e/e2e-scikit-learn/e2e_scikit_learn/utils.py b/e2e/e2e-scikit-learn/e2e_scikit_learn/utils.py index b7b202f7b760..a44e0981b94c 100644 --- a/e2e/e2e-scikit-learn/e2e_scikit_learn/utils.py +++ b/e2e/e2e-scikit-learn/e2e_scikit_learn/utils.py @@ -1,7 +1,8 @@ from typing import List, Tuple, Union import numpy as np -import openml +from flwr_datasets import FederatedDataset +from flwr_datasets.partitioner import IidPartitioner from sklearn.linear_model import LogisticRegression XY = Tuple[np.ndarray, np.ndarray] @@ -50,30 +51,23 @@ def set_initial_params(model: LogisticRegression): model.intercept_ = np.zeros((n_classes,)) -def load_mnist() -> Dataset: - """Loads the MNIST dataset using OpenML. +fds = None # Cache FederatedDataset - OpenML dataset link: https://www.openml.org/d/554 - """ - mnist_openml = openml.datasets.get_dataset(554) - Xy, _, _, _ = mnist_openml.get_data(dataset_format="array") - X = Xy[:, :-1] # the last column contains labels - y = Xy[:, -1] - # First 60000 samples consist of the train set - x_train, y_train = X[:1000], y[:1000] - x_test, y_test = X[60000:62000], y[60000:62000] - return (x_train, y_train), (x_test, y_test) - - -def shuffle(X: np.ndarray, y: np.ndarray) -> XY: - """Shuffle X and y.""" - rng = np.random.default_rng() - idx = rng.permutation(len(X)) - return X[idx], y[idx] - - -def partition(X: np.ndarray, y: np.ndarray, num_partitions: int) -> XYList: - """Split X and y into a number of partitions.""" - return list( - zip(np.array_split(X, num_partitions), np.array_split(y, num_partitions)) - ) + +def load_data(partition_id: int, num_partitions: int): + # Only initialize `FederatedDataset` once + global fds + if fds is None: + partitioner = IidPartitioner(num_partitions=num_partitions) + fds = FederatedDataset( + dataset="ylecun/mnist", + partitioners={"train": partitioner}, + ) + + dataset = fds.load_partition(partition_id, "train").with_format("numpy") + X, y = dataset["image"].reshape((len(dataset), -1)), dataset["label"] + # Split the on edge data: 80% train, 20% test + X_train, X_test = X[: int(0.8 * len(X))], X[int(0.8 * len(X)) :] + y_train, y_test = y[: int(0.8 * len(y))], y[int(0.8 * len(y)) :] + + return X_train, X_test, y_train, y_test diff --git a/e2e/e2e-scikit-learn/pyproject.toml b/e2e/e2e-scikit-learn/pyproject.toml index 03f5540aa15d..322d918f5c44 100644 --- a/e2e/e2e-scikit-learn/pyproject.toml +++ b/e2e/e2e-scikit-learn/pyproject.toml @@ -14,8 +14,6 @@ authors = [ dependencies = [ "flwr[simulation,rest] @ {root:parent:parent:uri}", "scikit-learn>=1.1.1,<2.0.0", - "openml>=0.14.0,<0.15.0", - "numpy<2.0.0", ] [tool.hatch.build.targets.wheel] From 487917010179ef7faf50560652e5ad9a8cf77d53 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 13:10:15 +0000 Subject: [PATCH 10/22] Fix deps in e2e-scikit-learn --- e2e/e2e-pytorch-lightning/pyproject.toml | 4 ++-- e2e/e2e-scikit-learn/pyproject.toml | 1 + pyproject.toml | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/e2e/e2e-pytorch-lightning/pyproject.toml b/e2e/e2e-pytorch-lightning/pyproject.toml index 66ecbb6296d0..efb0eb1bebf1 100644 --- a/e2e/e2e-pytorch-lightning/pyproject.toml +++ b/e2e/e2e-pytorch-lightning/pyproject.toml @@ -9,8 +9,8 @@ description = "Federated Learning E2E test with Flower and PyTorch Lightning" license = "Apache-2.0" dependencies = [ "flwr[simulation] @ {root:parent:parent:uri}", - "pytorch-lightning==2.2.4", - "torchvision==0.14.1", + "pytorch-lightning==2.4.0", + "torchvision>=0.20.1,<0.21.0", ] [tool.hatch.build.targets.wheel] diff --git a/e2e/e2e-scikit-learn/pyproject.toml b/e2e/e2e-scikit-learn/pyproject.toml index 322d918f5c44..e2445938a9b4 100644 --- a/e2e/e2e-scikit-learn/pyproject.toml +++ b/e2e/e2e-scikit-learn/pyproject.toml @@ -14,6 +14,7 @@ authors = [ dependencies = [ "flwr[simulation,rest] @ {root:parent:parent:uri}", "scikit-learn>=1.1.1,<2.0.0", + "numpy<2.0.0", ] [tool.hatch.build.targets.wheel] diff --git a/pyproject.toml b/pyproject.toml index a3887bc0293c..b2a3a79f981c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,7 +64,7 @@ flower-client-app = "flwr.client.supernode:run_client_app" # Deprecated [tool.poetry.dependencies] python = "^3.9" # Mandatory dependencies -numpy = "^2.0.0" +numpy = ">=1.0.0,<3.0.0" grpcio = "^1.60.0,!=1.64.2,!=1.65.1,!=1.65.2,!=1.65.4,!=1.65.5,!=1.66.0,!=1.66.1" protobuf = "^4.25.2" cryptography = "^42.0.4" From e809904916083d6cc284545f79c9f2e050f1ffdc Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 15:57:36 +0000 Subject: [PATCH 11/22] Add flwr-datasets --- e2e/e2e-scikit-learn/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/e2e/e2e-scikit-learn/pyproject.toml b/e2e/e2e-scikit-learn/pyproject.toml index e2445938a9b4..85a9745457df 100644 --- a/e2e/e2e-scikit-learn/pyproject.toml +++ b/e2e/e2e-scikit-learn/pyproject.toml @@ -13,6 +13,7 @@ authors = [ ] dependencies = [ "flwr[simulation,rest] @ {root:parent:parent:uri}", + "flwr-datasets[vision]>=0.3.0", "scikit-learn>=1.1.1,<2.0.0", "numpy<2.0.0", ] From 08138cf2551fa9ba2735cb046e768d08dce6d347 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 16:05:08 +0000 Subject: [PATCH 12/22] Restore openml --- .github/workflows/e2e.yml | 3 ++ .../e2e_scikit_learn/client_app.py | 6 ++- .../e2e_scikit_learn/utils.py | 48 +++++++++++-------- e2e/e2e-scikit-learn/pyproject.toml | 1 - 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index d1e9f24607f3..5e93da349602 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -156,6 +156,9 @@ jobs: - directory: e2e-scikit-learn e2e: e2e_scikit_learn + dataset: | + import openml + openml.datasets.get_dataset(554) - directory: e2e-fastai e2e: e2e_fastai diff --git a/e2e/e2e-scikit-learn/e2e_scikit_learn/client_app.py b/e2e/e2e-scikit-learn/e2e_scikit_learn/client_app.py index 5ee4b0599e07..cd87aeb15fab 100644 --- a/e2e/e2e-scikit-learn/e2e_scikit_learn/client_app.py +++ b/e2e/e2e-scikit-learn/e2e_scikit_learn/client_app.py @@ -8,10 +8,12 @@ from flwr.client import ClientApp, NumPyClient, start_client from flwr.common import Context +# Load MNIST dataset from https://www.openml.org/d/554 +(X_train, y_train), (X_test, y_test) = utils.load_mnist() + # Split train set into 10 partitions and randomly use one for training. partition_id = np.random.choice(10) -X_train, X_test, y_train, y_test = utils.load_data(partition_id, num_partitions=10) - +(X_train, y_train) = utils.partition(X_train, y_train, 10)[partition_id] # Create LogisticRegression Model model = LogisticRegression( diff --git a/e2e/e2e-scikit-learn/e2e_scikit_learn/utils.py b/e2e/e2e-scikit-learn/e2e_scikit_learn/utils.py index a44e0981b94c..b7b202f7b760 100644 --- a/e2e/e2e-scikit-learn/e2e_scikit_learn/utils.py +++ b/e2e/e2e-scikit-learn/e2e_scikit_learn/utils.py @@ -1,8 +1,7 @@ from typing import List, Tuple, Union import numpy as np -from flwr_datasets import FederatedDataset -from flwr_datasets.partitioner import IidPartitioner +import openml from sklearn.linear_model import LogisticRegression XY = Tuple[np.ndarray, np.ndarray] @@ -51,23 +50,30 @@ def set_initial_params(model: LogisticRegression): model.intercept_ = np.zeros((n_classes,)) -fds = None # Cache FederatedDataset +def load_mnist() -> Dataset: + """Loads the MNIST dataset using OpenML. - -def load_data(partition_id: int, num_partitions: int): - # Only initialize `FederatedDataset` once - global fds - if fds is None: - partitioner = IidPartitioner(num_partitions=num_partitions) - fds = FederatedDataset( - dataset="ylecun/mnist", - partitioners={"train": partitioner}, - ) - - dataset = fds.load_partition(partition_id, "train").with_format("numpy") - X, y = dataset["image"].reshape((len(dataset), -1)), dataset["label"] - # Split the on edge data: 80% train, 20% test - X_train, X_test = X[: int(0.8 * len(X))], X[int(0.8 * len(X)) :] - y_train, y_test = y[: int(0.8 * len(y))], y[int(0.8 * len(y)) :] - - return X_train, X_test, y_train, y_test + OpenML dataset link: https://www.openml.org/d/554 + """ + mnist_openml = openml.datasets.get_dataset(554) + Xy, _, _, _ = mnist_openml.get_data(dataset_format="array") + X = Xy[:, :-1] # the last column contains labels + y = Xy[:, -1] + # First 60000 samples consist of the train set + x_train, y_train = X[:1000], y[:1000] + x_test, y_test = X[60000:62000], y[60000:62000] + return (x_train, y_train), (x_test, y_test) + + +def shuffle(X: np.ndarray, y: np.ndarray) -> XY: + """Shuffle X and y.""" + rng = np.random.default_rng() + idx = rng.permutation(len(X)) + return X[idx], y[idx] + + +def partition(X: np.ndarray, y: np.ndarray, num_partitions: int) -> XYList: + """Split X and y into a number of partitions.""" + return list( + zip(np.array_split(X, num_partitions), np.array_split(y, num_partitions)) + ) diff --git a/e2e/e2e-scikit-learn/pyproject.toml b/e2e/e2e-scikit-learn/pyproject.toml index 85a9745457df..e2445938a9b4 100644 --- a/e2e/e2e-scikit-learn/pyproject.toml +++ b/e2e/e2e-scikit-learn/pyproject.toml @@ -13,7 +13,6 @@ authors = [ ] dependencies = [ "flwr[simulation,rest] @ {root:parent:parent:uri}", - "flwr-datasets[vision]>=0.3.0", "scikit-learn>=1.1.1,<2.0.0", "numpy<2.0.0", ] From 1b97342554ea0ff3933446a3285125cd96d8d645 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 16:07:05 +0000 Subject: [PATCH 13/22] Use NDArrayFloat --- .../flwr_datasets/partitioner/dirichlet_partitioner_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py b/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py index 693e0d6a5aa6..2f881fdd63ce 100644 --- a/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py @@ -20,7 +20,7 @@ from typing import Union import numpy as np -from numpy.typing import NDArray +from flwr.common.typing import NDArrayFloat from parameterized import parameterized from datasets import Dataset @@ -29,7 +29,7 @@ def _dummy_setup( num_partitions: int, - alpha: Union[float, NDArray[np.float64]], + alpha: Union[float, NDArrayFloat], num_rows: int, partition_by: str, self_balancing: bool = True, From b4bcc6d6e7aa65cc3769d48e1998150f178cc09c Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 16:14:20 +0000 Subject: [PATCH 14/22] Add openml --- e2e/e2e-scikit-learn/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/e2e/e2e-scikit-learn/pyproject.toml b/e2e/e2e-scikit-learn/pyproject.toml index e2445938a9b4..03f5540aa15d 100644 --- a/e2e/e2e-scikit-learn/pyproject.toml +++ b/e2e/e2e-scikit-learn/pyproject.toml @@ -14,6 +14,7 @@ authors = [ dependencies = [ "flwr[simulation,rest] @ {root:parent:parent:uri}", "scikit-learn>=1.1.1,<2.0.0", + "openml>=0.14.0,<0.15.0", "numpy<2.0.0", ] From 4eccee36945349086f86f673a7eed111d205bea1 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 16:37:57 +0000 Subject: [PATCH 15/22] Remove flwr typing from flwr datasets --- .../flwr_datasets/partitioner/dirichlet_partitioner_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py b/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py index 2f881fdd63ce..693e0d6a5aa6 100644 --- a/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/dirichlet_partitioner_test.py @@ -20,7 +20,7 @@ from typing import Union import numpy as np -from flwr.common.typing import NDArrayFloat +from numpy.typing import NDArray from parameterized import parameterized from datasets import Dataset @@ -29,7 +29,7 @@ def _dummy_setup( num_partitions: int, - alpha: Union[float, NDArrayFloat], + alpha: Union[float, NDArray[np.float64]], num_rows: int, partition_by: str, self_balancing: bool = True, From 5d52b65842efebbb702356a84f1118ca6a0b7b64 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 20:43:29 +0000 Subject: [PATCH 16/22] Temporary remove --- .github/workflows/e2e.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 5e93da349602..fb56eee6a72c 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -154,11 +154,11 @@ jobs: from torchvision.datasets import MNIST MNIST('./data', download=True) - - directory: e2e-scikit-learn - e2e: e2e_scikit_learn - dataset: | - import openml - openml.datasets.get_dataset(554) + # - directory: e2e-scikit-learn + # e2e: e2e_scikit_learn + # dataset: | + # import openml + # openml.datasets.get_dataset(554) - directory: e2e-fastai e2e: e2e_fastai From efba19458cfafd3f147d0ad596fb8a8d16c293b0 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 6 Nov 2024 21:41:13 +0000 Subject: [PATCH 17/22] Rollback e2e-jax --- e2e/e2e-jax/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e/e2e-jax/pyproject.toml b/e2e/e2e-jax/pyproject.toml index 65d55a243884..b259f66a7bc3 100644 --- a/e2e/e2e-jax/pyproject.toml +++ b/e2e/e2e-jax/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ "jax==0.4.13", "jaxlib==0.4.13", "scikit-learn>=1.1.1,<2.0.0", - "numpy>=2.0.0", + "numpy>=1.21.4,<2.0.0", ] [tool.hatch.build.targets.wheel] From 60571caf5a18776ab5396ca7d98711b22b8ad5ee Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Thu, 7 Nov 2024 07:49:30 +0000 Subject: [PATCH 18/22] Reinstate e2e-scikit-learn --- .github/workflows/e2e.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index fb56eee6a72c..5e93da349602 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -154,11 +154,11 @@ jobs: from torchvision.datasets import MNIST MNIST('./data', download=True) - # - directory: e2e-scikit-learn - # e2e: e2e_scikit_learn - # dataset: | - # import openml - # openml.datasets.get_dataset(554) + - directory: e2e-scikit-learn + e2e: e2e_scikit_learn + dataset: | + import openml + openml.datasets.get_dataset(554) - directory: e2e-fastai e2e: e2e_fastai From b38513303c035f5b280ce7e8ceecbd870011f621 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Thu, 7 Nov 2024 10:05:59 +0000 Subject: [PATCH 19/22] Add NumPy 1.26 CI test --- .github/workflows/e2e.yml | 56 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 5e93da349602..5bd6f95edd81 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -347,3 +347,59 @@ jobs: cd tmp-${{ matrix.framework }} flwr build flwr install *.fab + + numpy: + runs-on: ubuntu-22.04 + timeout-minutes: 10 + needs: wheel + strategy: + fail-fast: false + matrix: + numpy-version: ["1.26"] + python-version: ["3.11"] + directory: [e2e-bare-auth] + connection: [insecure] + engine: [deployment-engine, simulation-engine] + authentication: [no-auth] + name: | + NumPy ${{ matrix.numpy-version }} / + Python ${{ matrix.python-version }} / + ${{ matrix.connection }} / + ${{ matrix.authentication }} / + ${{ matrix.engine }} + defaults: + run: + working-directory: e2e/${{ matrix.directory }} + steps: + - uses: actions/checkout@v4 + - name: Bootstrap + uses: ./.github/actions/bootstrap + with: + python-version: ${{ matrix.python-version }} + poetry-skip: 'true' + - name: Install Flower from repo + if: ${{ github.repository != 'adap/flower' || github.event.pull_request.head.repo.fork || github.actor == 'dependabot[bot]' }} + working-directory: ./ + run: | + if [[ "${{ matrix.engine }}" == "simulation-engine" ]]; then + python -m pip install ".[simulation]" "numpy>=${{ matrix.numpy-version }},<2.0" + else + python -m pip install . "numpy>=${{ matrix.numpy-version }},<2.0" + fi + - name: Download and install Flower wheel from artifact store + if: ${{ github.repository == 'adap/flower' && !github.event.pull_request.head.repo.fork && github.actor != 'dependabot[bot]' }} + run: | + # Define base URL for wheel file + WHEEL_URL="https://${{ env.ARTIFACT_BUCKET }}/py/${{ needs.wheel.outputs.dir }}/${{ needs.wheel.outputs.short_sha }}/${{ needs.wheel.outputs.whl_path }}" + if [[ "${{ matrix.engine }}" == "simulation-engine" ]]; then + python -m pip install "flwr[simulation] @ ${WHEEL_URL}" "numpy>=${{ matrix.numpy-version }},<2.0" + else + python -m pip install "${WHEEL_URL}" "numpy>=${{ matrix.numpy-version }},<2.0" + fi + - name: > + Run Flower - NumPy 1.26 test / + ${{ matrix.connection }} / + ${{ matrix.authentication }} / + ${{ matrix.engine }} + working-directory: e2e/${{ matrix.directory }} + run: ./../test_exec_api.sh "${{ matrix.connection }}" "${{ matrix.authentication}}" "${{ matrix.engine }}" From ff86b2b1903858381b2e2cc5cf06678a02eec691 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Thu, 7 Nov 2024 11:20:43 +0000 Subject: [PATCH 20/22] Bump lower range of NumPy --- datasets/pyproject.toml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datasets/pyproject.toml b/datasets/pyproject.toml index af7c1f1bde2a..d1cd7ab7868c 100644 --- a/datasets/pyproject.toml +++ b/datasets/pyproject.toml @@ -52,7 +52,7 @@ exclude = [ [tool.poetry.dependencies] python = "^3.9" -numpy = "^1.21.0" +numpy = ">=1.26.0,<3.0.0" datasets = ">=2.14.6 <=3.1.0" pillow = { version = ">=6.2.1", optional = true } soundfile = { version = ">=0.12.1", optional = true } diff --git a/pyproject.toml b/pyproject.toml index b2a3a79f981c..f6ffa3dbadaf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,7 +64,7 @@ flower-client-app = "flwr.client.supernode:run_client_app" # Deprecated [tool.poetry.dependencies] python = "^3.9" # Mandatory dependencies -numpy = ">=1.0.0,<3.0.0" +numpy = ">=1.26.0,<3.0.0" grpcio = "^1.60.0,!=1.64.2,!=1.65.1,!=1.65.2,!=1.65.4,!=1.65.5,!=1.66.0,!=1.66.1" protobuf = "^4.25.2" cryptography = "^42.0.4" From 667549adf7404fc4b8eb5f1163c75470908f18e1 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Thu, 7 Nov 2024 11:24:40 +0000 Subject: [PATCH 21/22] Bump tensorboard version with compatibility updates for NumPy 2.0 --- examples/custom-mods/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/custom-mods/pyproject.toml b/examples/custom-mods/pyproject.toml index ff36398ef157..429a7c2f1b9c 100644 --- a/examples/custom-mods/pyproject.toml +++ b/examples/custom-mods/pyproject.toml @@ -11,7 +11,7 @@ authors = ["The Flower Authors "] [tool.poetry.dependencies] python = ">=3.9,<3.11" flwr = { path = "../../", develop = true, extras = ["simulation"] } -tensorboard = "2.16.2" +tensorboard = "2.18.0" torch = "1.13.1" torchvision = "0.14.1" tqdm = "4.65.0" From ddca56e8936803d0076adc6001d4b11424b967f5 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Thu, 7 Nov 2024 11:51:52 +0000 Subject: [PATCH 22/22] Fix torch and torchvision range for flwr-datasets e2e test --- datasets/e2e/pytorch/pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datasets/e2e/pytorch/pyproject.toml b/datasets/e2e/pytorch/pyproject.toml index 3f1f12d5f4b3..d42409ca1195 100644 --- a/datasets/e2e/pytorch/pyproject.toml +++ b/datasets/e2e/pytorch/pyproject.toml @@ -11,6 +11,6 @@ authors = ["The Flower Authors "] [tool.poetry.dependencies] python = "^3.9" flwr-datasets = { path = "./../../", extras = ["vision"] } -torch = "^1.12.0" -torchvision = "^0.14.1" +torch = ">=1.12.0,<3.0.0" +torchvision = ">=0.19.0,<1.0.0" parameterized = "==0.9.0"