From 4211941eaccd2b021e63b4f950b00d7877f0c54f Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Mon, 11 Mar 2024 09:32:43 +0100 Subject: [PATCH 1/2] Add num_partition property --- datasets/flwr_datasets/partitioner/dirichlet_partitioner.py | 5 +++++ datasets/flwr_datasets/partitioner/iid_partitioner.py | 5 +++++ .../flwr_datasets/partitioner/inner_dirichlet_partitioner.py | 5 +++++ datasets/flwr_datasets/partitioner/natural_id_partitioner.py | 5 +++++ datasets/flwr_datasets/partitioner/partitioner.py | 5 +++++ datasets/flwr_datasets/partitioner/shard_partitioner.py | 5 +++++ datasets/flwr_datasets/partitioner/size_partitioner.py | 5 +++++ 7 files changed, 35 insertions(+) diff --git a/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py b/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py index 5f1df71991bb..aa7c5d5f5d37 100644 --- a/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py +++ b/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py @@ -132,6 +132,11 @@ def load_partition(self, node_id: int) -> datasets.Dataset: self._determine_node_id_to_indices_if_needed() return self.dataset.select(self._node_id_to_indices[node_id]) + @property + def num_partitions(self) -> int: + """Total number of partitions.""" + return self._num_partitions + def _initialize_alpha( self, alpha: Union[int, float, List[float], NDArrayFloat] ) -> NDArrayFloat: diff --git a/datasets/flwr_datasets/partitioner/iid_partitioner.py b/datasets/flwr_datasets/partitioner/iid_partitioner.py index c72b34f081f2..faa1dfa10615 100644 --- a/datasets/flwr_datasets/partitioner/iid_partitioner.py +++ b/datasets/flwr_datasets/partitioner/iid_partitioner.py @@ -50,3 +50,8 @@ def load_partition(self, node_id: int) -> datasets.Dataset: return self.dataset.shard( num_shards=self._num_partitions, index=node_id, contiguous=True ) + + @property + def num_partitions(self) -> int: + """Total number of partitions.""" + return self._num_partitions diff --git a/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py b/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py index c25a9b059d18..2f3688bf0979 100644 --- a/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py +++ b/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py @@ -119,6 +119,11 @@ def load_partition(self, node_id: int) -> datasets.Dataset: self._determine_node_id_to_indices_if_needed() return self.dataset.select(self._node_id_to_indices[node_id]) + @property + def num_partitions(self) -> int: + """Total number of partitions.""" + return self._num_partitions + def _initialize_alpha_if_needed( self, alpha: Union[int, float, List[float], NDArrayFloat] ) -> NDArrayFloat: diff --git a/datasets/flwr_datasets/partitioner/natural_id_partitioner.py b/datasets/flwr_datasets/partitioner/natural_id_partitioner.py index b8f28696f3b7..26e4d0e10985 100644 --- a/datasets/flwr_datasets/partitioner/natural_id_partitioner.py +++ b/datasets/flwr_datasets/partitioner/natural_id_partitioner.py @@ -65,6 +65,11 @@ def load_partition(self, node_id: int) -> datasets.Dataset: lambda row: row[self._partition_by] == self._node_id_to_natural_id[node_id] ) + @property + def num_partitions(self) -> int: + """Total number of partitions.""" + return len(self._node_id_to_natural_id) + @property def node_id_to_natural_id(self) -> Dict[int, str]: """Node id to corresponding natural id present. diff --git a/datasets/flwr_datasets/partitioner/partitioner.py b/datasets/flwr_datasets/partitioner/partitioner.py index 92405152efc6..73eb6f4a17b3 100644 --- a/datasets/flwr_datasets/partitioner/partitioner.py +++ b/datasets/flwr_datasets/partitioner/partitioner.py @@ -79,3 +79,8 @@ def is_dataset_assigned(self) -> bool: True if a dataset is assigned, otherwise False. """ return self._dataset is not None + + @property + @abstractmethod + def num_partitions(self) -> int: + """Total number of partitions.""" diff --git a/datasets/flwr_datasets/partitioner/shard_partitioner.py b/datasets/flwr_datasets/partitioner/shard_partitioner.py index 7c86570fe487..667f9f064cb2 100644 --- a/datasets/flwr_datasets/partitioner/shard_partitioner.py +++ b/datasets/flwr_datasets/partitioner/shard_partitioner.py @@ -179,6 +179,11 @@ def load_partition(self, node_id: int) -> datasets.Dataset: self._determine_node_id_to_indices_if_needed() return self.dataset.select(self._node_id_to_indices[node_id]) + @property + def num_partitions(self) -> int: + """Total number of partitions.""" + return self._num_partitions + def _determine_node_id_to_indices_if_needed(self) -> None: # pylint: disable=R0914 """Assign sample indices to each node id. diff --git a/datasets/flwr_datasets/partitioner/size_partitioner.py b/datasets/flwr_datasets/partitioner/size_partitioner.py index 35ca750949ee..07ee34b18fc9 100644 --- a/datasets/flwr_datasets/partitioner/size_partitioner.py +++ b/datasets/flwr_datasets/partitioner/size_partitioner.py @@ -84,6 +84,11 @@ def load_partition(self, node_id: int) -> datasets.Dataset: self._determine_node_id_to_indices_if_needed() return self.dataset.select(self._node_id_to_indices[node_id]) + @property + def num_partitions(self) -> int: + """Total number of partitions.""" + return self._num_partitions + @property def node_id_to_size(self) -> Dict[int, int]: """Node id to the number of samples.""" From 02411f77e9e32573261352050248f83821fe0fca Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Mon, 11 Mar 2024 10:37:21 +0100 Subject: [PATCH 2/2] Trigger the partitioning in the num_partitions --- datasets/flwr_datasets/partitioner/dirichlet_partitioner.py | 2 ++ .../partitioner/inner_dirichlet_partitioner.py | 6 ++++++ .../flwr_datasets/partitioner/natural_id_partitioner.py | 2 ++ datasets/flwr_datasets/partitioner/shard_partitioner.py | 4 ++++ datasets/flwr_datasets/partitioner/size_partitioner.py | 1 + 5 files changed, 15 insertions(+) diff --git a/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py b/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py index aa7c5d5f5d37..5271aad74a1e 100644 --- a/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py +++ b/datasets/flwr_datasets/partitioner/dirichlet_partitioner.py @@ -135,6 +135,8 @@ def load_partition(self, node_id: int) -> datasets.Dataset: @property def num_partitions(self) -> int: """Total number of partitions.""" + self._check_num_partitions_correctness_if_needed() + self._determine_node_id_to_indices_if_needed() return self._num_partitions def _initialize_alpha( diff --git a/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py b/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py index 2f3688bf0979..bf07ab3591f5 100644 --- a/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py +++ b/datasets/flwr_datasets/partitioner/inner_dirichlet_partitioner.py @@ -122,6 +122,12 @@ def load_partition(self, node_id: int) -> datasets.Dataset: @property def num_partitions(self) -> int: """Total number of partitions.""" + self._check_num_partitions_correctness_if_needed() + self._check_partition_sizes_correctness_if_needed() + self._check_the_sum_of_partition_sizes() + self._determine_num_unique_classes_if_needed() + self._alpha = self._initialize_alpha_if_needed(self._initial_alpha) + self._determine_node_id_to_indices_if_needed() return self._num_partitions def _initialize_alpha_if_needed( diff --git a/datasets/flwr_datasets/partitioner/natural_id_partitioner.py b/datasets/flwr_datasets/partitioner/natural_id_partitioner.py index 26e4d0e10985..947501965cc6 100644 --- a/datasets/flwr_datasets/partitioner/natural_id_partitioner.py +++ b/datasets/flwr_datasets/partitioner/natural_id_partitioner.py @@ -68,6 +68,8 @@ def load_partition(self, node_id: int) -> datasets.Dataset: @property def num_partitions(self) -> int: """Total number of partitions.""" + if len(self._node_id_to_natural_id) == 0: + self._create_int_node_id_to_natural_id() return len(self._node_id_to_natural_id) @property diff --git a/datasets/flwr_datasets/partitioner/shard_partitioner.py b/datasets/flwr_datasets/partitioner/shard_partitioner.py index 667f9f064cb2..05444f537c8c 100644 --- a/datasets/flwr_datasets/partitioner/shard_partitioner.py +++ b/datasets/flwr_datasets/partitioner/shard_partitioner.py @@ -182,6 +182,10 @@ def load_partition(self, node_id: int) -> datasets.Dataset: @property def num_partitions(self) -> int: """Total number of partitions.""" + self._check_num_partitions_correctness_if_needed() + self._check_possibility_of_partitions_creation() + self._sort_dataset_if_needed() + self._determine_node_id_to_indices_if_needed() return self._num_partitions def _determine_node_id_to_indices_if_needed(self) -> None: # pylint: disable=R0914 diff --git a/datasets/flwr_datasets/partitioner/size_partitioner.py b/datasets/flwr_datasets/partitioner/size_partitioner.py index 07ee34b18fc9..29fc2e5b1add 100644 --- a/datasets/flwr_datasets/partitioner/size_partitioner.py +++ b/datasets/flwr_datasets/partitioner/size_partitioner.py @@ -87,6 +87,7 @@ def load_partition(self, node_id: int) -> datasets.Dataset: @property def num_partitions(self) -> int: """Total number of partitions.""" + self._determine_node_id_to_indices_if_needed() return self._num_partitions @property