From 7a7b7e196c2310ac88a5202584ab9bd38724e42d Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Mon, 30 Sep 2024 11:05:09 +0100 Subject: [PATCH 01/10] refactor(benchmarks) Update Flower Discuss info in FlowerTune LLM Leaderboard readme (#4258) --- benchmarks/flowertune-llm/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/flowertune-llm/README.md b/benchmarks/flowertune-llm/README.md index 45cd8a828a89..8587d0d9754d 100644 --- a/benchmarks/flowertune-llm/README.md +++ b/benchmarks/flowertune-llm/README.md @@ -64,5 +64,5 @@ following the `README.md` in [`evaluation`](https://github.com/adap/flower/tree/ > [!NOTE] -> If you have any questions about running FlowerTune LLM challenges or evaluation, please feel free to make posts at [Flower Discuss](https://discuss.flower.ai) forum, +> If you have any questions about running FlowerTune LLM challenges or evaluation, please feel free to make posts at our dedicated [FlowerTune Category](https://discuss.flower.ai/c/flowertune-llm-leaderboard/) on [Flower Discuss](https://discuss.flower.ai) forum, or join our [Slack channel](https://flower.ai/join-slack/) to ask questions in the `#flowertune-llm-leaderboard` channel. From 408c4462ac4863f9ef2dd2286653b19611e249c3 Mon Sep 17 00:00:00 2001 From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com> Date: Mon, 30 Sep 2024 17:07:44 +0200 Subject: [PATCH 02/10] docs(datasets) Move out the HF space embedding to the bottom of the page (#4266) --- datasets/doc/source/index.rst | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/datasets/doc/source/index.rst b/datasets/doc/source/index.rst index 070655550fa1..d6b51fc84ad6 100644 --- a/datasets/doc/source/index.rst +++ b/datasets/doc/source/index.rst @@ -3,14 +3,7 @@ Flower Datasets Flower Datasets (``flwr-datasets``) is a library that enables the quick and easy creation of datasets for federated learning/analytics/evaluation. It enables heterogeneity (non-iidness) simulation and division of datasets with the preexisting notion of IDs. The library was created by the ``Flower Labs`` team that also created `Flower `_ : A Friendly Federated Learning Framework. -.. raw:: html - - - - +Try out an interactive demo to generate code and visualize heterogeneous divisions at the :ref:`bottom of this page`. Flower Datasets Framework ------------------------- @@ -142,7 +135,6 @@ What makes Flower Datasets stand out from other libraries? * New custom partitioning schemes (``Partitioner`` subclasses) integrated with the whole ecosystem. - Join the Flower Community ------------------------- @@ -153,3 +145,16 @@ The Flower Community is growing quickly - we're a friendly group of researchers, :shadow: Join us on Slack + +.. _demo: +Demo +---- + +.. raw:: html + + + + From d121f6060643757e529d1fa9185e6d2ab056f0c8 Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Mon, 30 Sep 2024 21:07:08 +0100 Subject: [PATCH 03/10] refactor(benchmarks) Update FlowerTune LLM Leaderboard instructions (#4272) --- benchmarks/flowertune-llm/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/flowertune-llm/README.md b/benchmarks/flowertune-llm/README.md index 8587d0d9754d..cab9b9156514 100644 --- a/benchmarks/flowertune-llm/README.md +++ b/benchmarks/flowertune-llm/README.md @@ -13,13 +13,13 @@ As the first step, please register for a Flower account on [flower.ai/login](htt Then, create a new Python environment and install Flower. > [!TIP] -> We recommend using `pyenv` with the `virtualenv` plugin to create your environment. Other managers, such as Conda, will likely work as well. Check the [documentation](https://flower.ai/docs/framework/how-to-install-flower.html) for alternative ways to install Flower. +> We recommend using `pyenv` with the `virtualenv` plugin to create your environment with Python >= 3.10.0. Other managers, such as Conda, will likely work as well. Check the [documentation](https://flower.ai/docs/framework/how-to-install-flower.html) for alternative ways to install Flower. ```shell pip install flwr ``` -In the new environment, create a new Flower project using the `FlowerTune` template. You will be prompted for a name to give to your project, your username, and for your choice of LLM challenge: +In the new environment, create a new Flower project using the `FlowerTune` template. You will be prompted for a name to give to your app/project, your username, and for your choice of LLM challenge: ```shell flwr new --framework=FlowerTune ``` From d92356f2353a6dce68372fbbb23a1cdd85704ad5 Mon Sep 17 00:00:00 2001 From: Heng Pan Date: Tue, 1 Oct 2024 12:04:25 +0100 Subject: [PATCH 04/10] feat(framework) Add Flower package version/name and gRPC message/module to `GrpcAdapter` metadata (#4275) Co-authored-by: Daniel J. Beutel --- .../client/grpc_rere_client/grpc_adapter.py | 17 ++++++++++++++--- src/py/flwr/common/constant.py | 9 ++++++--- .../grpc_adapter/grpc_adapter_servicer.py | 19 +++++++++++++++++-- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/py/flwr/client/grpc_rere_client/grpc_adapter.py b/src/py/flwr/client/grpc_rere_client/grpc_adapter.py index 3dce14c14956..69ea29d5b7b3 100644 --- a/src/py/flwr/client/grpc_rere_client/grpc_adapter.py +++ b/src/py/flwr/client/grpc_rere_client/grpc_adapter.py @@ -24,10 +24,14 @@ from flwr.common import log from flwr.common.constant import ( + GRPC_ADAPTER_METADATA_FLOWER_PACKAGE_NAME_KEY, + GRPC_ADAPTER_METADATA_FLOWER_PACKAGE_VERSION_KEY, GRPC_ADAPTER_METADATA_FLOWER_VERSION_KEY, + GRPC_ADAPTER_METADATA_MESSAGE_MODULE_KEY, + GRPC_ADAPTER_METADATA_MESSAGE_QUALNAME_KEY, GRPC_ADAPTER_METADATA_SHOULD_EXIT_KEY, ) -from flwr.common.version import package_version +from flwr.common.version import package_name, package_version from flwr.proto.fab_pb2 import GetFabRequest, GetFabResponse # pylint: disable=E0611 from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611 CreateNodeRequest, @@ -62,9 +66,16 @@ def _send_and_receive( self, request: GrpcMessage, response_type: type[T], **kwargs: Any ) -> T: # Serialize request + req_cls = request.__class__ container_req = MessageContainer( - metadata={GRPC_ADAPTER_METADATA_FLOWER_VERSION_KEY: package_version}, - grpc_message_name=request.__class__.__qualname__, + metadata={ + GRPC_ADAPTER_METADATA_FLOWER_PACKAGE_NAME_KEY: package_name, + GRPC_ADAPTER_METADATA_FLOWER_PACKAGE_VERSION_KEY: package_version, + GRPC_ADAPTER_METADATA_FLOWER_VERSION_KEY: package_version, + GRPC_ADAPTER_METADATA_MESSAGE_MODULE_KEY: req_cls.__module__, + GRPC_ADAPTER_METADATA_MESSAGE_QUALNAME_KEY: req_cls.__qualname__, + }, + grpc_message_name=req_cls.__qualname__, grpc_message_content=request.SerializeToString(), ) diff --git a/src/py/flwr/common/constant.py b/src/py/flwr/common/constant.py index eabe324f41c5..ffd58478aa48 100644 --- a/src/py/flwr/common/constant.py +++ b/src/py/flwr/common/constant.py @@ -60,8 +60,6 @@ # IDs RUN_ID_NUM_BYTES = 8 NODE_ID_NUM_BYTES = 8 -GRPC_ADAPTER_METADATA_FLOWER_VERSION_KEY = "flower-version" -GRPC_ADAPTER_METADATA_SHOULD_EXIT_KEY = "should-exit" # Constants for FAB APP_DIR = "apps" @@ -72,8 +70,13 @@ PARTITION_ID_KEY = "partition-id" NUM_PARTITIONS_KEY = "num-partitions" -GRPC_ADAPTER_METADATA_FLOWER_VERSION_KEY = "flower-version" +# Constants for keys in `metadata` of `MessageContainer` in `grpc-adapter` +GRPC_ADAPTER_METADATA_FLOWER_PACKAGE_NAME_KEY = "flower-package-name" +GRPC_ADAPTER_METADATA_FLOWER_PACKAGE_VERSION_KEY = "flower-package-version" +GRPC_ADAPTER_METADATA_FLOWER_VERSION_KEY = "flower-version" # Deprecated GRPC_ADAPTER_METADATA_SHOULD_EXIT_KEY = "should-exit" +GRPC_ADAPTER_METADATA_MESSAGE_MODULE_KEY = "grpc-message-module" +GRPC_ADAPTER_METADATA_MESSAGE_QUALNAME_KEY = "grpc-message-qualname" class MessageType: diff --git a/src/py/flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py b/src/py/flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py index dbfbb236a7e4..75aa6d370511 100644 --- a/src/py/flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +++ b/src/py/flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py @@ -21,7 +21,15 @@ import grpc from google.protobuf.message import Message as GrpcMessage +from flwr.common.constant import ( + GRPC_ADAPTER_METADATA_FLOWER_PACKAGE_NAME_KEY, + GRPC_ADAPTER_METADATA_FLOWER_PACKAGE_VERSION_KEY, + GRPC_ADAPTER_METADATA_FLOWER_VERSION_KEY, + GRPC_ADAPTER_METADATA_MESSAGE_MODULE_KEY, + GRPC_ADAPTER_METADATA_MESSAGE_QUALNAME_KEY, +) from flwr.common.logger import log +from flwr.common.version import package_name, package_version from flwr.proto import grpcadapter_pb2_grpc # pylint: disable=E0611 from flwr.proto.fab_pb2 import GetFabRequest, GetFabResponse # pylint: disable=E0611 from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611 @@ -52,9 +60,16 @@ def _handle( ) -> MessageContainer: req = request_type.FromString(msg_container.grpc_message_content) res = handler(req) + res_cls = res.__class__ return MessageContainer( - metadata={}, - grpc_message_name=res.__class__.__qualname__, + metadata={ + GRPC_ADAPTER_METADATA_FLOWER_PACKAGE_NAME_KEY: package_name, + GRPC_ADAPTER_METADATA_FLOWER_PACKAGE_VERSION_KEY: package_version, + GRPC_ADAPTER_METADATA_FLOWER_VERSION_KEY: package_version, + GRPC_ADAPTER_METADATA_MESSAGE_MODULE_KEY: res_cls.__module__, + GRPC_ADAPTER_METADATA_MESSAGE_QUALNAME_KEY: res_cls.__qualname__, + }, + grpc_message_name=res_cls.__qualname__, grpc_message_content=res.SerializeToString(), ) From 2d91b4d348a4762200b7e668c3d017258630abf0 Mon Sep 17 00:00:00 2001 From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com> Date: Tue, 1 Oct 2024 13:11:49 +0200 Subject: [PATCH 05/10] docs(glossary) Add Flower Datasets glossary entry (#4235) Co-authored-by: Yan Gao --- glossary/flower-datasets.mdx | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 glossary/flower-datasets.mdx diff --git a/glossary/flower-datasets.mdx b/glossary/flower-datasets.mdx new file mode 100644 index 000000000000..24537dfe223b --- /dev/null +++ b/glossary/flower-datasets.mdx @@ -0,0 +1,27 @@ +--- +title: "Flower Datasets" +description: "Flower Datasets is a library that enables the creation of datasets for federated learning by partitioning centralized datasets to exhibit heterogeneity or using naturally partitioned datasets." +date: "2024-05-24" +author: + name: "Adam Narożniak" + position: "ML Engineer at Flower Labs" + website: "https://discuss.flower.ai/u/adam.narozniak/summary" +related: + - text: "Flower Datasets documentation" + link: "https://flower.ai/docs/datasets/" + - text: "Flower Datasets GitHub page" + link: "https://github.com/adap/flower/tree/main/datasets" +--- + +Flower Datasets is a library that enables the creation of datasets for federated learning/analytics/evaluation by partitioning centralized datasets to exhibit heterogeneity or using naturally partitioned datasets. It was created by the Flower Labs team, which also created Flower - a Friendly Federated Learning Framework. + +The key features include: +* downloading datasets (HuggingFace `datasets` are used under the hood), +* partitioning (simulate different levels of heterogeneity by using one of the implemented partitioning schemes or create your own), +* creating centralized datasets (easily utilize centralized versions of the datasets), +* reproducibility (repeat the experiments with the same results), +* visualization (display the created partitions), +* ML agnostic (easy integration with all popular ML frameworks). + + +It is a supplementary library to Flower, with which it integrates easily. From 44beefb12dbb45836d8609713ed12e0315eb9577 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Tue, 1 Oct 2024 13:57:29 +0100 Subject: [PATCH 06/10] ci(*:skip) Enable SuperExec CI from fork (#4274) --- .github/workflows/e2e.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index a7c099d8101f..355037668f7f 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -82,6 +82,14 @@ jobs: with: python-version: ${{ matrix.python-version }} poetry-skip: 'true' + - name: Install Flower from repo + if: ${{ github.repository != 'adap/flower' || github.event.pull_request.head.repo.fork || github.actor == 'dependabot[bot]' }} + run: | + if [[ "${{ matrix.engine }}" == "simulation-engine" ]]; then + python -m pip install ".[simulation]" + else + python -m pip install . + fi - name: Download and install Flower wheel from artifact store if: ${{ github.repository == 'adap/flower' && !github.event.pull_request.head.repo.fork && github.actor != 'dependabot[bot]' }} run: | From 1a4da37b6eb565eb2d62e0496d9f385f908e30f8 Mon Sep 17 00:00:00 2001 From: Charles Beauville Date: Tue, 1 Oct 2024 17:35:54 +0200 Subject: [PATCH 07/10] ci(framework) Update typer to ^0.12.5 (#4268) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 81c1369f6552..87059cf5c867 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,7 @@ protobuf = "^4.25.2" cryptography = "^42.0.4" pycryptodome = "^3.18.0" iterators = "^0.0.2" -typer = { version = "^0.9.0", extras = ["all"] } +typer = "^0.12.5" tomli = "^2.0.1" tomli-w = "^1.0.0" pathspec = "^0.12.1" From 917d4068edb1739630f14fd5455363b153865228 Mon Sep 17 00:00:00 2001 From: Adam Narozniak <51029327+adam-narozniak@users.noreply.github.com> Date: Tue, 1 Oct 2024 22:56:21 +0200 Subject: [PATCH 08/10] fix(datasets) Fix pathological partitioner on string labels (#4253) --- .../partitioner/pathological_partitioner.py | 2 +- .../pathological_partitioner_test.py | 26 ++++++++++++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/datasets/flwr_datasets/partitioner/pathological_partitioner.py b/datasets/flwr_datasets/partitioner/pathological_partitioner.py index 350383f344e7..d114ccbda02f 100644 --- a/datasets/flwr_datasets/partitioner/pathological_partitioner.py +++ b/datasets/flwr_datasets/partitioner/pathological_partitioner.py @@ -225,7 +225,7 @@ def _determine_partition_id_to_unique_labels(self) -> None: if self._class_assignment_mode == "first-deterministic": # if self._first_class_deterministic_assignment: for partition_id in range(self._num_partitions): - label = partition_id % num_unique_classes + label = self._unique_labels[partition_id % num_unique_classes] self._partition_id_to_unique_labels[partition_id].append(label) while ( diff --git a/datasets/flwr_datasets/partitioner/pathological_partitioner_test.py b/datasets/flwr_datasets/partitioner/pathological_partitioner_test.py index 18707a56bd98..5a3b13bb1436 100644 --- a/datasets/flwr_datasets/partitioner/pathological_partitioner_test.py +++ b/datasets/flwr_datasets/partitioner/pathological_partitioner_test.py @@ -18,7 +18,7 @@ import unittest import numpy as np -from parameterized import parameterized +from parameterized import parameterized, parameterized_class import datasets from datasets import Dataset @@ -26,7 +26,10 @@ def _dummy_dataset_setup( - num_samples: int, partition_by: str, num_unique_classes: int + num_samples: int, + partition_by: str, + num_unique_classes: int, + string_partition_by: bool = False, ) -> Dataset: """Create a dummy dataset for testing.""" data = { @@ -35,6 +38,8 @@ def _dummy_dataset_setup( )[:num_samples], "features": np.random.randn(num_samples), } + if string_partition_by: + data[partition_by] = data[partition_by].astype(str) return Dataset.from_dict(data) @@ -51,6 +56,7 @@ def _dummy_heterogeneous_dataset_setup( return Dataset.from_dict(data) +@parameterized_class(("string_partition_by",), [(False,), (True,)]) class TestClassConstrainedPartitioner(unittest.TestCase): """Unit tests for PathologicalPartitioner.""" @@ -94,7 +100,8 @@ def test_first_class_deterministic_assignment(self) -> None: Test if all the classes are used (which has to be the case, given num_partitions >= than the number of unique classes). """ - dataset = _dummy_dataset_setup(100, "labels", 10) + partition_by = "labels" + dataset = _dummy_dataset_setup(100, partition_by, 10) partitioner = PathologicalPartitioner( num_partitions=10, partition_by="labels", @@ -103,7 +110,12 @@ def test_first_class_deterministic_assignment(self) -> None: ) partitioner.dataset = dataset partitioner.load_partition(0) - expected_classes = set(range(10)) + expected_classes = set( + range(10) + # pylint: disable=unsubscriptable-object + if isinstance(dataset[partition_by][0], int) + else [str(i) for i in range(10)] + ) actual_classes = set() for pid in range(10): partition = partitioner.load_partition(pid) @@ -141,6 +153,9 @@ def test_deterministic_class_assignment( for i in range(num_classes_per_partition) ] ) + # pylint: disable=unsubscriptable-object + if isinstance(dataset["labels"][0], str): + expected_labels = [str(label) for label in expected_labels] actual_labels = sorted(np.unique(partition["labels"])) self.assertTrue( np.array_equal(expected_labels, actual_labels), @@ -166,6 +181,9 @@ def test_too_many_partitions_for_a_class( "labels": np.array([num_unique_classes - 1] * (num_samples // 2)), "features": np.random.randn(num_samples // 2), } + # pylint: disable=unsubscriptable-object + if isinstance(dataset_1["labels"][0], str): + data["labels"] = data["labels"].astype(str) dataset_2 = Dataset.from_dict(data) dataset = datasets.concatenate_datasets([dataset_1, dataset_2]) From fdfae1b6c7988a3f5ae7dd0007abe8bba9b7edef Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 2 Oct 2024 08:47:29 +0100 Subject: [PATCH 09/10] ci(*:skip) Add working directory in SuperExec CI (#4276) --- .github/workflows/e2e.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 355037668f7f..7ac339aa43c8 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -84,6 +84,7 @@ jobs: poetry-skip: 'true' - name: Install Flower from repo if: ${{ github.repository != 'adap/flower' || github.event.pull_request.head.repo.fork || github.actor == 'dependabot[bot]' }} + working-directory: ./ run: | if [[ "${{ matrix.engine }}" == "simulation-engine" ]]; then python -m pip install ".[simulation]" From f4b2da2cf789f313f9adb2ae451f73f538805c16 Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Wed, 2 Oct 2024 15:55:24 +0100 Subject: [PATCH 10/10] fix(framework) Add Flower path to SuperNode argument (#4278) --- src/py/flwr/client/supernode/app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/py/flwr/client/supernode/app.py b/src/py/flwr/client/supernode/app.py index d9af001bba53..ea7613667baa 100644 --- a/src/py/flwr/client/supernode/app.py +++ b/src/py/flwr/client/supernode/app.py @@ -79,6 +79,7 @@ def run_supernode() -> None: node_config=parse_config_args( [args.node_config] if args.node_config else args.node_config ), + flwr_path=args.flwr_dir, isolation=args.isolation, supernode_address=args.supernode_address, )