From 53bce1742957723145050217cc33f57c0ac7245a Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Thu, 29 Aug 2024 15:41:03 +0200 Subject: [PATCH 1/5] Add examples section to concatenate divisions --- datasets/flwr_datasets/utils.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py index 32904ded2861..58dfccca9bd8 100644 --- a/datasets/flwr_datasets/utils.py +++ b/datasets/flwr_datasets/utils.py @@ -293,6 +293,29 @@ def concatenate_divisions( ------- concatenated_divisions : Dataset A dataset created as concatenation of the divisions from all partitions. + + Examples + -------- + Use `concatenate_divisions` with division specified as a list. + + >>> from flwr_datasets import FederatedDataset + >>> from flwr_datasets.utils import concatenate_divisions + >>> + >>> fds = FederatedDataset(dataset="mnist", partitioners={"train": 100}) + >>> concatenated_divisions = concatenate_divisions( + ... partitioner=fds["train"], partition_division=[0.8, 0.2], division_id=1 + ... ) + + Use `concatenate_divisions` with division specified as a dict. + + >>> from flwr_datasets import FederatedDataset + >>> from flwr_datasets.utils import concatenate_divisions + >>> + >>> fds = FederatedDataset(dataset="mnist", partitioners={"train": 100}) + >>> concatenated_divisions = concatenate_divisions( + ... partitioner=fds["train"], partition_division={"train": 0.8, "test": 0.2}, + ... division_id="test" + ... ) """ _check_division_config_correctness(partition_division) divisions = [] From e73b7f5867fb1d01433bc8d405277de46dc9276b Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Thu, 29 Aug 2024 15:54:42 +0200 Subject: [PATCH 2/5] Update formatting and fix code --- datasets/flwr_datasets/utils.py | 47 +++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py index 58dfccca9bd8..c14484826787 100644 --- a/datasets/flwr_datasets/utils.py +++ b/datasets/flwr_datasets/utils.py @@ -134,23 +134,29 @@ def divide_dataset( Use `divide_dataset` with division specified as a list. >>> from flwr_datasets import FederatedDataset - >>> from flwr_datasets.utils import divide_dataset + >>> from flwr_datasets.utils import concatenate_divisions >>> >>> fds = FederatedDataset(dataset="mnist", partitioners={"train": 100}) - >>> partition = fds.load_partition(0) - >>> division = [0.8, 0.2] - >>> train, test = divide_dataset(dataset=partition, division=division) + >>> concatenated_divisions = concatenate_divisions( + ... partitioner=fds.partitioners["train"], + ... partition_division=[0.8, 0.2], + ... division_id=1 + ... ) + >>> print(concatenated_divisions) - Use `divide_dataset` with division specified as a dict. + Use `divide_dataset` with division specified as a dict + (this accomplishes the same goal as the example with a list above). >>> from flwr_datasets import FederatedDataset - >>> from flwr_datasets.utils import divide_dataset + >>> from flwr_datasets.utils import concatenate_divisions >>> >>> fds = FederatedDataset(dataset="mnist", partitioners={"train": 100}) - >>> partition = fds.load_partition(0) - >>> division = {"train": 0.8, "test": 0.2} - >>> train_test = divide_dataset(dataset=partition, division=division) - >>> train, test = train_test["train"], train_test["test"] + >>> concatenated_divisions = concatenate_divisions( + ... partitioner=fds.partitioners["train"], + ... partition_division={"train": 0.8, "test": 0.2}, + ... division_id="test", + ... ) + >>> print(concatenated_divisions) """ _check_division_config_correctness(division) dataset_length = len(dataset) @@ -268,11 +274,11 @@ def concatenate_divisions( partition_division: Union[List[float], Tuple[float, ...], Dict[str, float]], division_id: Union[int, str], ) -> Dataset: - """Create a dataset by concatenation of all partitions in the same division. + """Create a dataset by concatenation of divisions from all partitions. The divisions are created based on the `partition_division` and accessed based - on the `division_id`. It can be used to create e.g. centralized dataset from - federated on-edge test sets. + on the `division_id`. This fuction can be used to create e.g. centralized dataset + from federated on-edge test sets. Parameters ---------- @@ -298,13 +304,14 @@ def concatenate_divisions( -------- Use `concatenate_divisions` with division specified as a list. - >>> from flwr_datasets import FederatedDataset - >>> from flwr_datasets.utils import concatenate_divisions - >>> - >>> fds = FederatedDataset(dataset="mnist", partitioners={"train": 100}) - >>> concatenated_divisions = concatenate_divisions( - ... partitioner=fds["train"], partition_division=[0.8, 0.2], division_id=1 - ... ) +from flwr_datasets import FederatedDataset +from flwr_datasets.utils import concatenate_divisions + +fds = FederatedDataset(dataset="mnist", partitioners={"train": 100}) +concatenated_divisions = concatenate_divisions( + partitioner=fds.partitioners["train"], partition_division=[0.8, 0.2], division_id=1 +) +print(concatenated_divisions) Use `concatenate_divisions` with division specified as a dict. From 1fc80ad87fefcfb2e458e9dc4dd9fcbfb5817551 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Thu, 29 Aug 2024 15:57:38 +0200 Subject: [PATCH 3/5] Update formatting and fix code --- datasets/flwr_datasets/utils.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py index c14484826787..17fc263078a8 100644 --- a/datasets/flwr_datasets/utils.py +++ b/datasets/flwr_datasets/utils.py @@ -304,25 +304,30 @@ def concatenate_divisions( -------- Use `concatenate_divisions` with division specified as a list. -from flwr_datasets import FederatedDataset -from flwr_datasets.utils import concatenate_divisions - -fds = FederatedDataset(dataset="mnist", partitioners={"train": 100}) -concatenated_divisions = concatenate_divisions( - partitioner=fds.partitioners["train"], partition_division=[0.8, 0.2], division_id=1 -) -print(concatenated_divisions) + >>> from flwr_datasets import FederatedDataset + >>> from flwr_datasets.utils import concatenate_divisions + >>> + >>> fds = FederatedDataset(dataset="mnist", partitioners={"train": 100}) + >>> concatenated_divisions = concatenate_divisions( + ... partitioner=fds.partitioners["train"], + ... partition_division=[0.8, 0.2], + ... division_id=1 + ... ) + >>> print(concatenated_divisions) Use `concatenate_divisions` with division specified as a dict. + This accomplishes the same goal as the example with a list above. >>> from flwr_datasets import FederatedDataset >>> from flwr_datasets.utils import concatenate_divisions >>> >>> fds = FederatedDataset(dataset="mnist", partitioners={"train": 100}) >>> concatenated_divisions = concatenate_divisions( - ... partitioner=fds["train"], partition_division={"train": 0.8, "test": 0.2}, + ... partitioner=fds["train"], + ... partition_division={"train": 0.8, "test": 0.2}, ... division_id="test" ... ) + >>> print(concatenated_divisions) """ _check_division_config_correctness(partition_division) divisions = [] From 54c19de3105fe5c868eba91aae2028de89162074 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Thu, 29 Aug 2024 16:01:19 +0200 Subject: [PATCH 4/5] Fix divide_dataset fnc docs --- datasets/flwr_datasets/utils.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py index 17fc263078a8..ac3beae66a9c 100644 --- a/datasets/flwr_datasets/utils.py +++ b/datasets/flwr_datasets/utils.py @@ -134,29 +134,23 @@ def divide_dataset( Use `divide_dataset` with division specified as a list. >>> from flwr_datasets import FederatedDataset - >>> from flwr_datasets.utils import concatenate_divisions + >>> from flwr_datasets.utils import divide_dataset >>> >>> fds = FederatedDataset(dataset="mnist", partitioners={"train": 100}) - >>> concatenated_divisions = concatenate_divisions( - ... partitioner=fds.partitioners["train"], - ... partition_division=[0.8, 0.2], - ... division_id=1 - ... ) - >>> print(concatenated_divisions) + >>> partition = fds.load_partition(0) + >>> division = [0.8, 0.2] + >>> train, test = divide_dataset(dataset=partition, division=division) Use `divide_dataset` with division specified as a dict (this accomplishes the same goal as the example with a list above). >>> from flwr_datasets import FederatedDataset - >>> from flwr_datasets.utils import concatenate_divisions + >>> from flwr_datasets.utils import divide_dataset >>> - >>> fds = FederatedDataset(dataset="mnist", partitioners={"train": 100}) - >>> concatenated_divisions = concatenate_divisions( - ... partitioner=fds.partitioners["train"], - ... partition_division={"train": 0.8, "test": 0.2}, - ... division_id="test", - ... ) - >>> print(concatenated_divisions) + >>> partition = fds.load_partition(0) + >>> division = {"train": 0.8, "test": 0.2} + >>> train_test = divide_dataset(dataset=partition, division=division) + >>> train, test = train_test["train"], train_test["test"] """ _check_division_config_correctness(division) dataset_length = len(dataset) From c9d8e5d9351f0b498fd29cbb96a49a9699e219e2 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Thu, 29 Aug 2024 16:02:03 +0200 Subject: [PATCH 5/5] Fix divide_dataset fnc docs --- datasets/flwr_datasets/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py index ac3beae66a9c..98c859b5412f 100644 --- a/datasets/flwr_datasets/utils.py +++ b/datasets/flwr_datasets/utils.py @@ -147,6 +147,7 @@ def divide_dataset( >>> from flwr_datasets import FederatedDataset >>> from flwr_datasets.utils import divide_dataset >>> + >>> fds = FederatedDataset(dataset="mnist", partitioners={"train": 100}) >>> partition = fds.load_partition(0) >>> division = {"train": 0.8, "test": 0.2} >>> train_test = divide_dataset(dataset=partition, division=division)