Merge remote-tracking branch 'origin/main' into fds-rename-node-id-to…

…-partition-id
adap · Mar 13, 2024 · 7f10ebf · 7f10ebf
2 parents 660060f + 8654bfb
commit 7f10ebf
Show file tree

Hide file tree

Showing 24 changed files with 38 additions and 38 deletions.
diff --git a/datasets/README.md b/datasets/README.md
@@ -59,7 +59,7 @@ If you plan to change the type of the dataset to run the code with your ML frame
 
 # Usage
 
-Flower Datasets exposes the `FederatedDataset` abstraction to represent the dataset needed for federated learning/evaluation/analytics. It has two powerful methods that let you handle the dataset preprocessing: `load_partition(partition_id, split)` and `load_full(split)`.
+Flower Datasets exposes the `FederatedDataset` abstraction to represent the dataset needed for federated learning/evaluation/analytics. It has two powerful methods that let you handle the dataset preprocessing: `load_partition(partition_id, split)` and `load_split(split)`.
 
 Here's a basic quickstart example of how to partition the MNIST dataset:
 
@@ -71,7 +71,7 @@ mnist_fds = FederatedDataset("mnist", partitioners={"train": 100}
 
 mnist_partition_0 = mnist_fds.load_partition(0, "train")
 
-centralized_data = mnist_fds.load_full("test")
+centralized_data = mnist_fds.load_split("test")
 ```
 
 For more details, please refer to the specific how-to guides or tutorial. They showcase customization and more advanced features.

diff --git a/datasets/doc/source/how-to-use-with-numpy.rst b/datasets/doc/source/how-to-use-with-numpy.rst
@@ -9,7 +9,7 @@ Create a ``FederatedDataset``::
 
   fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
   partition = fds.load_partition(0, "train")
-  centralized_dataset = fds.load_full("test")
+  centralized_dataset = fds.load_split("test")
 
 Inspect the names of the features::
 

diff --git a/datasets/doc/source/how-to-use-with-pytorch.rst b/datasets/doc/source/how-to-use-with-pytorch.rst
@@ -8,7 +8,7 @@ Standard setup - download the dataset, choose the partitioning::
 
   fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
   partition = fds.load_partition(0, "train")
-  centralized_dataset = fds.load_full("test")
+  centralized_dataset = fds.load_split("test")
 
 Determine the names of the features (you can alternatively do that directly on the Hugging Face website). The name can
 vary e.g. "img" or "image", "label" or "labels"::

diff --git a/datasets/doc/source/how-to-use-with-tensorflow.rst b/datasets/doc/source/how-to-use-with-tensorflow.rst
@@ -11,7 +11,7 @@ Create a ``FederatedDataset``::
 
   fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
   partition = fds.load_partition(0, "train")
-  centralized_dataset = fds.load_full("test")
+  centralized_dataset = fds.load_split("test")
 
 Inspect the names of the features::
 

diff --git a/datasets/doc/source/tutorial-quickstart.rst b/datasets/doc/source/tutorial-quickstart.rst
@@ -38,7 +38,7 @@ To iid partition your dataset, choose the split you want to partition and the nu
 
   fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
   partition = fds.load_partition(0, "train")
-  centralized_dataset = fds.load_full("test")
+  centralized_dataset = fds.load_split("test")
 
 Now you're ready to go. You have ten partitions created from the train split of the CIFAR10 dataset and the test split
 for the centralized evaluation. We will convert the type of the dataset from Hugging Face's `Dataset` type to the one

diff --git a/datasets/flwr_datasets/federated_dataset.py b/datasets/flwr_datasets/federated_dataset.py
@@ -83,7 +83,7 @@ class FederatedDataset:
     >>> # Load partition for client with ID 10.
     >>> partition = mnist_fds.load_partition(10, "train")
     >>> # Use test split for centralized evaluation.
-    >>> centralized = mnist_fds.load_full("test")
+    >>> centralized = mnist_fds.load_split("test")
 
     Automatically divde the data returned from `load_partition`
     >>> mnist_fds = FederatedDataset(
@@ -131,9 +131,9 @@ def __init__(
         self._shuffle = shuffle
         self._seed = seed
         #  _dataset is prepared lazily on the first call to `load_partition`
-        #  or `load_full`. See _prepare_datasets for more details
+        #  or `load_split`. See _prepare_datasets for more details
         self._dataset: Optional[DatasetDict] = None
-        # Indicate if the dataset is prepared for `load_partition` or `load_full`
+        # Indicate if the dataset is prepared for `load_partition` or `load_split`
         self._dataset_prepared: bool = False
 
     def load_partition(
@@ -144,7 +144,7 @@ def load_partition(
         """Load the partition specified by the idx in the selected split.
 
         The dataset is downloaded only when the first call to `load_partition` or
-        `load_full` is made.
+        `load_split` is made.
 
         Parameters
         ----------
@@ -190,11 +190,11 @@ def load_partition(
         )
         return divided_partition
 
-    def load_full(self, split: str) -> Dataset:
+    def load_split(self, split: str) -> Dataset:
         """Load the full split of the dataset.
 
         The dataset is downloaded only when the first call to `load_partition` or
-        `load_full` is made.
+        `load_split` is made.
 
         Parameters
         ----------

diff --git a/datasets/flwr_datasets/federated_dataset_test.py b/datasets/flwr_datasets/federated_dataset_test.py
@@ -109,12 +109,12 @@ def test_divide_partition_integration_size(
         else:
             self.assertEqual(len(partition), expected_length)
 
-    def test_load_full(self) -> None:
-        """Test if the load_full works with the correct split name."""
+    def test_load_split(self) -> None:
+        """Test if the load_split works with the correct split name."""
         dataset_fds = FederatedDataset(
             dataset=self.dataset_name, partitioners={"train": 100}
         )
-        dataset_fds_test = dataset_fds.load_full(self.test_split)
+        dataset_fds_test = dataset_fds.load_split(self.test_split)
         dataset_test = datasets.load_dataset(self.dataset_name)[self.test_split]
         self.assertEqual(len(dataset_fds_test), len(dataset_test))
 
@@ -158,7 +158,7 @@ def test_resplit_dataset_into_one(self) -> None:
             partitioners={"train": 100},
             resplitter={"full": ("train", self.test_split)},
         )
-        full = fds.load_full("full")
+        full = fds.load_split("full")
         self.assertEqual(dataset_length, len(full))
 
     # pylint: disable=protected-access
@@ -193,7 +193,7 @@ def resplit(dataset: DatasetDict) -> DatasetDict:
         fds = FederatedDataset(
             dataset=self.dataset_name, partitioners={"train": 100}, resplitter=resplit
         )
-        full = fds.load_full("full")
+        full = fds.load_split("full")
         dataset = datasets.load_dataset(self.dataset_name)
         dataset_length = sum([len(ds) for ds in dataset.values()])
         self.assertEqual(len(full), dataset_length)
@@ -227,7 +227,7 @@ def test_shuffling_applied(self, mock_func: Mock) -> None:
         fds = FederatedDataset(
             dataset="does-not-matter", partitioners={"train": 10}, shuffle=True, seed=42
         )
-        train = fds.load_full("train")
+        train = fds.load_split("train")
         # This should be shuffled
         result = train["features"]
 
@@ -245,7 +245,7 @@ def test_shuffling_not_applied(self, mock_func: Mock) -> None:
             partitioners={"train": 10},
             shuffle=False,
         )
-        train = fds.load_full("train")
+        train = fds.load_split("train")
         # This should not be shuffled
         result = train["features"]
 
@@ -278,7 +278,7 @@ def resplit(dataset: DatasetDict) -> DatasetDict:
             resplitter=resplit,
             shuffle=True,
         )
-        train = fds.load_full("train")
+        train = fds.load_split("train")
         # This should not be shuffled
         result = train["features"]
 

diff --git a/doc/source/tutorial-quickstart-xgboost.rst b/doc/source/tutorial-quickstart-xgboost.rst
@@ -884,7 +884,7 @@ After importing all required packages, we define a :code:`main()` function to pe
     # Load centralised test set
     if args.centralised_eval or args.centralised_eval_client:
         log(INFO, "Loading centralised test set...")
-        test_data = fds.load_full("test")
+        test_data = fds.load_split("test")
         test_data.set_format("numpy")
         num_test = test_data.shape[0]
         test_dmatrix = transform_dataset_to_dmatrix(test_data)

diff --git a/doc/source/tutorial-series-get-started-with-flower-pytorch.ipynb b/doc/source/tutorial-series-get-started-with-flower-pytorch.ipynb
@@ -148,7 +148,7 @@
     "        partition = partition.train_test_split(train_size=0.8)\n",
     "        trainloaders.append(DataLoader(partition[\"train\"], batch_size=BATCH_SIZE))\n",
     "        valloaders.append(DataLoader(partition[\"test\"], batch_size=BATCH_SIZE))\n",
-    "    testset = fds.load_full(\"test\").with_transform(apply_transforms)\n",
+    "    testset = fds.load_split(\"test\").with_transform(apply_transforms)\n",
     "    testloader = DataLoader(testset, batch_size=BATCH_SIZE)\n",
     "    return trainloaders, valloaders, testloader\n",
     "\n",

diff --git a/examples/advanced-pytorch/utils.py b/examples/advanced-pytorch/utils.py
@@ -21,7 +21,7 @@ def load_partition(partition_id, toy: bool = False):
 
 def load_centralized_data():
     fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
-    centralized_data = fds.load_full("test")
+    centralized_data = fds.load_split("test")
     centralized_data = centralized_data.with_transform(apply_transforms)
     return centralized_data
 

diff --git a/examples/advanced-tensorflow/server.py b/examples/advanced-tensorflow/server.py
@@ -47,7 +47,7 @@ def get_evaluate_fn(model):
 
     # Load data here to avoid the overhead of doing it in `evaluate` itself
     fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
-    test = fds.load_full("test")
+    test = fds.load_split("test")
     test.set_format("numpy")
     x_test, y_test = test["img"] / 255.0, test["label"]
 

diff --git a/examples/custom-metrics/client.py b/examples/custom-metrics/client.py
@@ -17,8 +17,8 @@
 
 # Load data with Flower Datasets (CIFAR-10)
 fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
-train = fds.load_full("train")
-test = fds.load_full("test")
+train = fds.load_split("train")
+test = fds.load_split("test")
 
 # Using Numpy format
 train_np = train.with_format("numpy")

diff --git a/examples/embedded-devices/client_pytorch.py b/examples/embedded-devices/client_pytorch.py
@@ -112,7 +112,7 @@ def apply_transforms(batch):
         partition = partition.with_transform(apply_transforms)
         trainsets.append(partition["train"])
         validsets.append(partition["test"])
-    testset = fds.load_full("test")
+    testset = fds.load_split("test")
     testset = testset.with_transform(apply_transforms)
     return trainsets, validsets, testset
 

diff --git a/examples/embedded-devices/client_tf.py b/examples/embedded-devices/client_tf.py
@@ -51,7 +51,7 @@ def prepare_dataset(use_mnist: bool):
         )
         x_test, y_test = partition["test"][img_key] / 255.0, partition["test"]["label"]
         partitions.append(((x_train, y_train), (x_test, y_test)))
-    data_centralized = fds.load_full("test")
+    data_centralized = fds.load_split("test")
     data_centralized.set_format("numpy")
     x_centralized = data_centralized[img_key] / 255.0
     y_centralized = data_centralized["label"]

diff --git a/examples/quickstart-sklearn-tabular/client.py b/examples/quickstart-sklearn-tabular/client.py
@@ -28,7 +28,7 @@
     dataset = fds.load_partition(partition_id, "train").with_format("pandas")[:]
     X = dataset[["petal_length", "petal_width", "sepal_length", "sepal_width"]]
     y = dataset["species"]
-    unique_labels = fds.load_full("train").unique("species")
+    unique_labels = fds.load_split("train").unique("species")
     # Split the on edge data: 80% train, 20% test
     X_train, X_test = X[: int(0.8 * len(X))], X[int(0.8 * len(X)) :]
     y_train, y_test = y[: int(0.8 * len(y))], y[int(0.8 * len(y)) :]

diff --git a/examples/simulation-pytorch/sim.ipynb b/examples/simulation-pytorch/sim.ipynb
@@ -197,7 +197,7 @@
     "# Download MNIST dataset and partition the \"train\" partition (so one can be assigned to each client)\n",
     "mnist_fds = FederatedDataset(dataset=\"mnist\", partitioners={\"train\": NUM_CLIENTS})\n",
     "# Let's keep the test set as is, and use it to evaluate the global model on the server\n",
-    "centralized_testset = mnist_fds.load_full(\"test\")"
+    "centralized_testset = mnist_fds.load_split(\"test\")"
    ]
   },
   {

diff --git a/examples/simulation-pytorch/sim.py b/examples/simulation-pytorch/sim.py
@@ -169,7 +169,7 @@ def evaluate(
 
 # Download MNIST dataset and partition it
 mnist_fds = FederatedDataset(dataset="mnist", partitioners={"train": NUM_CLIENTS})
-centralized_testset = mnist_fds.load_full("test")
+centralized_testset = mnist_fds.load_split("test")
 
 # Configure the strategy
 strategy = fl.server.strategy.FedAvg(

diff --git a/examples/simulation-tensorflow/sim.ipynb b/examples/simulation-tensorflow/sim.ipynb
@@ -247,7 +247,7 @@
     "# Download MNIST dataset and partition it\n",
     "mnist_fds = FederatedDataset(dataset=\"mnist\", partitioners={\"train\": NUM_CLIENTS})\n",
     "# Get the whole test set for centralised evaluation\n",
-    "centralized_testset = mnist_fds.load_full(\"test\").to_tf_dataset(\n",
+    "centralized_testset = mnist_fds.load_split(\"test\").to_tf_dataset(\n",
     "    columns=\"image\", label_cols=\"label\", batch_size=64\n",
     ")\n",
     "\n",

diff --git a/examples/simulation-tensorflow/sim.py b/examples/simulation-tensorflow/sim.py
@@ -131,7 +131,7 @@ def evaluate(
 # Download MNIST dataset and partition it
 mnist_fds = FederatedDataset(dataset="mnist", partitioners={"train": NUM_CLIENTS})
 # Get the whole test set for centralised evaluation
-centralized_testset = mnist_fds.load_full("test").to_tf_dataset(
+centralized_testset = mnist_fds.load_split("test").to_tf_dataset(
     columns="image", label_cols="label", batch_size=64
 )
 

diff --git a/examples/sklearn-logreg-mnist/server.py b/examples/sklearn-logreg-mnist/server.py
@@ -17,7 +17,7 @@ def get_evaluate_fn(model: LogisticRegression):
 
     # Load test data here to avoid the overhead of doing it in `evaluate` itself
     fds = FederatedDataset(dataset="mnist", partitioners={"train": 10})
-    dataset = fds.load_full("test").with_format("numpy")
+    dataset = fds.load_split("test").with_format("numpy")
     X_test, y_test = dataset["image"].reshape((len(dataset), -1)), dataset["label"]
 
     # The `evaluate` function will be called after every round

diff --git a/examples/vit-finetune/dataset.py b/examples/vit-finetune/dataset.py
@@ -21,7 +21,7 @@ def get_dataset_with_partitions(num_partitions: int):
         dataset="nelorth/oxford-flowers", partitioners={"train": num_partitions}
     )
 
-    centralized_testset = ox_flowers_fds.load_full("test")
+    centralized_testset = ox_flowers_fds.load_split("test")
     return ox_flowers_fds, centralized_testset
 
 

diff --git a/examples/xgboost-comprehensive/client.py b/examples/xgboost-comprehensive/client.py
@@ -43,7 +43,7 @@
 if args.centralised_eval:
     # Use centralised test set for evaluation
     train_data = partition
-    valid_data = fds.load_full("test")
+    valid_data = fds.load_split("test")
     valid_data.set_format("numpy")
     num_train = train_data.shape[0]
     num_val = valid_data.shape[0]

diff --git a/examples/xgboost-comprehensive/server.py b/examples/xgboost-comprehensive/server.py
@@ -35,7 +35,7 @@
         dataset="jxie/higgs", partitioners={"train": 20}, resplitter=resplit
     )
     log(INFO, "Loading centralised test set...")
-    test_set = fds.load_full("test")
+    test_set = fds.load_split("test")
     test_set.set_format("numpy")
     test_dmatrix = transform_dataset_to_dmatrix(test_set)
 

diff --git a/examples/xgboost-comprehensive/sim.py b/examples/xgboost-comprehensive/sim.py
@@ -86,7 +86,7 @@ def main():
     # Load centralised test set
     if args.centralised_eval or args.centralised_eval_client:
         log(INFO, "Loading centralised test set...")
-        test_data = fds.load_full("test")
+        test_data = fds.load_split("test")
         test_data.set_format("numpy")
         num_test = test_data.shape[0]
         test_dmatrix = transform_dataset_to_dmatrix(test_data)