Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into fds-rename-node-id-to…
Browse files Browse the repository at this point in the history
…-partition-id
  • Loading branch information
adam-narozniak committed Mar 13, 2024
2 parents 660060f + 8654bfb commit 7f10ebf
Show file tree
Hide file tree
Showing 24 changed files with 38 additions and 38 deletions.
4 changes: 2 additions & 2 deletions datasets/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ If you plan to change the type of the dataset to run the code with your ML frame

# Usage

Flower Datasets exposes the `FederatedDataset` abstraction to represent the dataset needed for federated learning/evaluation/analytics. It has two powerful methods that let you handle the dataset preprocessing: `load_partition(partition_id, split)` and `load_full(split)`.
Flower Datasets exposes the `FederatedDataset` abstraction to represent the dataset needed for federated learning/evaluation/analytics. It has two powerful methods that let you handle the dataset preprocessing: `load_partition(partition_id, split)` and `load_split(split)`.

Here's a basic quickstart example of how to partition the MNIST dataset:

Expand All @@ -71,7 +71,7 @@ mnist_fds = FederatedDataset("mnist", partitioners={"train": 100}
mnist_partition_0 = mnist_fds.load_partition(0, "train")
centralized_data = mnist_fds.load_full("test")
centralized_data = mnist_fds.load_split("test")
```

For more details, please refer to the specific how-to guides or tutorial. They showcase customization and more advanced features.
Expand Down
2 changes: 1 addition & 1 deletion datasets/doc/source/how-to-use-with-numpy.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Create a ``FederatedDataset``::

fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
partition = fds.load_partition(0, "train")
centralized_dataset = fds.load_full("test")
centralized_dataset = fds.load_split("test")

Inspect the names of the features::

Expand Down
2 changes: 1 addition & 1 deletion datasets/doc/source/how-to-use-with-pytorch.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Standard setup - download the dataset, choose the partitioning::

fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
partition = fds.load_partition(0, "train")
centralized_dataset = fds.load_full("test")
centralized_dataset = fds.load_split("test")

Determine the names of the features (you can alternatively do that directly on the Hugging Face website). The name can
vary e.g. "img" or "image", "label" or "labels"::
Expand Down
2 changes: 1 addition & 1 deletion datasets/doc/source/how-to-use-with-tensorflow.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Create a ``FederatedDataset``::

fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
partition = fds.load_partition(0, "train")
centralized_dataset = fds.load_full("test")
centralized_dataset = fds.load_split("test")

Inspect the names of the features::

Expand Down
2 changes: 1 addition & 1 deletion datasets/doc/source/tutorial-quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ To iid partition your dataset, choose the split you want to partition and the nu

fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
partition = fds.load_partition(0, "train")
centralized_dataset = fds.load_full("test")
centralized_dataset = fds.load_split("test")

Now you're ready to go. You have ten partitions created from the train split of the CIFAR10 dataset and the test split
for the centralized evaluation. We will convert the type of the dataset from Hugging Face's `Dataset` type to the one
Expand Down
12 changes: 6 additions & 6 deletions datasets/flwr_datasets/federated_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class FederatedDataset:
>>> # Load partition for client with ID 10.
>>> partition = mnist_fds.load_partition(10, "train")
>>> # Use test split for centralized evaluation.
>>> centralized = mnist_fds.load_full("test")
>>> centralized = mnist_fds.load_split("test")
Automatically divde the data returned from `load_partition`
>>> mnist_fds = FederatedDataset(
Expand Down Expand Up @@ -131,9 +131,9 @@ def __init__(
self._shuffle = shuffle
self._seed = seed
# _dataset is prepared lazily on the first call to `load_partition`
# or `load_full`. See _prepare_datasets for more details
# or `load_split`. See _prepare_datasets for more details
self._dataset: Optional[DatasetDict] = None
# Indicate if the dataset is prepared for `load_partition` or `load_full`
# Indicate if the dataset is prepared for `load_partition` or `load_split`
self._dataset_prepared: bool = False

def load_partition(
Expand All @@ -144,7 +144,7 @@ def load_partition(
"""Load the partition specified by the idx in the selected split.
The dataset is downloaded only when the first call to `load_partition` or
`load_full` is made.
`load_split` is made.
Parameters
----------
Expand Down Expand Up @@ -190,11 +190,11 @@ def load_partition(
)
return divided_partition

def load_full(self, split: str) -> Dataset:
def load_split(self, split: str) -> Dataset:
"""Load the full split of the dataset.
The dataset is downloaded only when the first call to `load_partition` or
`load_full` is made.
`load_split` is made.
Parameters
----------
Expand Down
16 changes: 8 additions & 8 deletions datasets/flwr_datasets/federated_dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,12 @@ def test_divide_partition_integration_size(
else:
self.assertEqual(len(partition), expected_length)

def test_load_full(self) -> None:
"""Test if the load_full works with the correct split name."""
def test_load_split(self) -> None:
"""Test if the load_split works with the correct split name."""
dataset_fds = FederatedDataset(
dataset=self.dataset_name, partitioners={"train": 100}
)
dataset_fds_test = dataset_fds.load_full(self.test_split)
dataset_fds_test = dataset_fds.load_split(self.test_split)
dataset_test = datasets.load_dataset(self.dataset_name)[self.test_split]
self.assertEqual(len(dataset_fds_test), len(dataset_test))

Expand Down Expand Up @@ -158,7 +158,7 @@ def test_resplit_dataset_into_one(self) -> None:
partitioners={"train": 100},
resplitter={"full": ("train", self.test_split)},
)
full = fds.load_full("full")
full = fds.load_split("full")
self.assertEqual(dataset_length, len(full))

# pylint: disable=protected-access
Expand Down Expand Up @@ -193,7 +193,7 @@ def resplit(dataset: DatasetDict) -> DatasetDict:
fds = FederatedDataset(
dataset=self.dataset_name, partitioners={"train": 100}, resplitter=resplit
)
full = fds.load_full("full")
full = fds.load_split("full")
dataset = datasets.load_dataset(self.dataset_name)
dataset_length = sum([len(ds) for ds in dataset.values()])
self.assertEqual(len(full), dataset_length)
Expand Down Expand Up @@ -227,7 +227,7 @@ def test_shuffling_applied(self, mock_func: Mock) -> None:
fds = FederatedDataset(
dataset="does-not-matter", partitioners={"train": 10}, shuffle=True, seed=42
)
train = fds.load_full("train")
train = fds.load_split("train")
# This should be shuffled
result = train["features"]

Expand All @@ -245,7 +245,7 @@ def test_shuffling_not_applied(self, mock_func: Mock) -> None:
partitioners={"train": 10},
shuffle=False,
)
train = fds.load_full("train")
train = fds.load_split("train")
# This should not be shuffled
result = train["features"]

Expand Down Expand Up @@ -278,7 +278,7 @@ def resplit(dataset: DatasetDict) -> DatasetDict:
resplitter=resplit,
shuffle=True,
)
train = fds.load_full("train")
train = fds.load_split("train")
# This should not be shuffled
result = train["features"]

Expand Down
2 changes: 1 addition & 1 deletion doc/source/tutorial-quickstart-xgboost.rst
Original file line number Diff line number Diff line change
Expand Up @@ -884,7 +884,7 @@ After importing all required packages, we define a :code:`main()` function to pe
# Load centralised test set
if args.centralised_eval or args.centralised_eval_client:
log(INFO, "Loading centralised test set...")
test_data = fds.load_full("test")
test_data = fds.load_split("test")
test_data.set_format("numpy")
num_test = test_data.shape[0]
test_dmatrix = transform_dataset_to_dmatrix(test_data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@
" partition = partition.train_test_split(train_size=0.8)\n",
" trainloaders.append(DataLoader(partition[\"train\"], batch_size=BATCH_SIZE))\n",
" valloaders.append(DataLoader(partition[\"test\"], batch_size=BATCH_SIZE))\n",
" testset = fds.load_full(\"test\").with_transform(apply_transforms)\n",
" testset = fds.load_split(\"test\").with_transform(apply_transforms)\n",
" testloader = DataLoader(testset, batch_size=BATCH_SIZE)\n",
" return trainloaders, valloaders, testloader\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/advanced-pytorch/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def load_partition(partition_id, toy: bool = False):

def load_centralized_data():
fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
centralized_data = fds.load_full("test")
centralized_data = fds.load_split("test")
centralized_data = centralized_data.with_transform(apply_transforms)
return centralized_data

Expand Down
2 changes: 1 addition & 1 deletion examples/advanced-tensorflow/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def get_evaluate_fn(model):

# Load data here to avoid the overhead of doing it in `evaluate` itself
fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
test = fds.load_full("test")
test = fds.load_split("test")
test.set_format("numpy")
x_test, y_test = test["img"] / 255.0, test["label"]

Expand Down
4 changes: 2 additions & 2 deletions examples/custom-metrics/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

# Load data with Flower Datasets (CIFAR-10)
fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
train = fds.load_full("train")
test = fds.load_full("test")
train = fds.load_split("train")
test = fds.load_split("test")

# Using Numpy format
train_np = train.with_format("numpy")
Expand Down
2 changes: 1 addition & 1 deletion examples/embedded-devices/client_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def apply_transforms(batch):
partition = partition.with_transform(apply_transforms)
trainsets.append(partition["train"])
validsets.append(partition["test"])
testset = fds.load_full("test")
testset = fds.load_split("test")
testset = testset.with_transform(apply_transforms)
return trainsets, validsets, testset

Expand Down
2 changes: 1 addition & 1 deletion examples/embedded-devices/client_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def prepare_dataset(use_mnist: bool):
)
x_test, y_test = partition["test"][img_key] / 255.0, partition["test"]["label"]
partitions.append(((x_train, y_train), (x_test, y_test)))
data_centralized = fds.load_full("test")
data_centralized = fds.load_split("test")
data_centralized.set_format("numpy")
x_centralized = data_centralized[img_key] / 255.0
y_centralized = data_centralized["label"]
Expand Down
2 changes: 1 addition & 1 deletion examples/quickstart-sklearn-tabular/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
dataset = fds.load_partition(partition_id, "train").with_format("pandas")[:]
X = dataset[["petal_length", "petal_width", "sepal_length", "sepal_width"]]
y = dataset["species"]
unique_labels = fds.load_full("train").unique("species")
unique_labels = fds.load_split("train").unique("species")
# Split the on edge data: 80% train, 20% test
X_train, X_test = X[: int(0.8 * len(X))], X[int(0.8 * len(X)) :]
y_train, y_test = y[: int(0.8 * len(y))], y[int(0.8 * len(y)) :]
Expand Down
2 changes: 1 addition & 1 deletion examples/simulation-pytorch/sim.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@
"# Download MNIST dataset and partition the \"train\" partition (so one can be assigned to each client)\n",
"mnist_fds = FederatedDataset(dataset=\"mnist\", partitioners={\"train\": NUM_CLIENTS})\n",
"# Let's keep the test set as is, and use it to evaluate the global model on the server\n",
"centralized_testset = mnist_fds.load_full(\"test\")"
"centralized_testset = mnist_fds.load_split(\"test\")"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion examples/simulation-pytorch/sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def evaluate(

# Download MNIST dataset and partition it
mnist_fds = FederatedDataset(dataset="mnist", partitioners={"train": NUM_CLIENTS})
centralized_testset = mnist_fds.load_full("test")
centralized_testset = mnist_fds.load_split("test")

# Configure the strategy
strategy = fl.server.strategy.FedAvg(
Expand Down
2 changes: 1 addition & 1 deletion examples/simulation-tensorflow/sim.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@
"# Download MNIST dataset and partition it\n",
"mnist_fds = FederatedDataset(dataset=\"mnist\", partitioners={\"train\": NUM_CLIENTS})\n",
"# Get the whole test set for centralised evaluation\n",
"centralized_testset = mnist_fds.load_full(\"test\").to_tf_dataset(\n",
"centralized_testset = mnist_fds.load_split(\"test\").to_tf_dataset(\n",
" columns=\"image\", label_cols=\"label\", batch_size=64\n",
")\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/simulation-tensorflow/sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def evaluate(
# Download MNIST dataset and partition it
mnist_fds = FederatedDataset(dataset="mnist", partitioners={"train": NUM_CLIENTS})
# Get the whole test set for centralised evaluation
centralized_testset = mnist_fds.load_full("test").to_tf_dataset(
centralized_testset = mnist_fds.load_split("test").to_tf_dataset(
columns="image", label_cols="label", batch_size=64
)

Expand Down
2 changes: 1 addition & 1 deletion examples/sklearn-logreg-mnist/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def get_evaluate_fn(model: LogisticRegression):

# Load test data here to avoid the overhead of doing it in `evaluate` itself
fds = FederatedDataset(dataset="mnist", partitioners={"train": 10})
dataset = fds.load_full("test").with_format("numpy")
dataset = fds.load_split("test").with_format("numpy")
X_test, y_test = dataset["image"].reshape((len(dataset), -1)), dataset["label"]

# The `evaluate` function will be called after every round
Expand Down
2 changes: 1 addition & 1 deletion examples/vit-finetune/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def get_dataset_with_partitions(num_partitions: int):
dataset="nelorth/oxford-flowers", partitioners={"train": num_partitions}
)

centralized_testset = ox_flowers_fds.load_full("test")
centralized_testset = ox_flowers_fds.load_split("test")
return ox_flowers_fds, centralized_testset


Expand Down
2 changes: 1 addition & 1 deletion examples/xgboost-comprehensive/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
if args.centralised_eval:
# Use centralised test set for evaluation
train_data = partition
valid_data = fds.load_full("test")
valid_data = fds.load_split("test")
valid_data.set_format("numpy")
num_train = train_data.shape[0]
num_val = valid_data.shape[0]
Expand Down
2 changes: 1 addition & 1 deletion examples/xgboost-comprehensive/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
dataset="jxie/higgs", partitioners={"train": 20}, resplitter=resplit
)
log(INFO, "Loading centralised test set...")
test_set = fds.load_full("test")
test_set = fds.load_split("test")
test_set.set_format("numpy")
test_dmatrix = transform_dataset_to_dmatrix(test_set)

Expand Down
2 changes: 1 addition & 1 deletion examples/xgboost-comprehensive/sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def main():
# Load centralised test set
if args.centralised_eval or args.centralised_eval_client:
log(INFO, "Loading centralised test set...")
test_data = fds.load_full("test")
test_data = fds.load_split("test")
test_data.set_format("numpy")
num_test = test_data.shape[0]
test_dmatrix = transform_dataset_to_dmatrix(test_data)
Expand Down

0 comments on commit 7f10ebf

Please sign in to comment.