Skip to content

Commit

Permalink
Merge branch 'main' into cpp-better-comms
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesbvll authored Apr 4, 2024
2 parents a67b1dc + 1336aa9 commit 5abfd6a
Show file tree
Hide file tree
Showing 28 changed files with 36 additions and 36 deletions.
2 changes: 1 addition & 1 deletion datasets/doc/source/how-to-use-with-pytorch.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ expected by a model with a convolutional layer.

If you want to divide the dataset, you can use (at any point before passing the dataset to the DataLoader)::

partition_train_test = partition.train_test_split(test_size=0.2)
partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
partition_train = partition_train_test["train"]
partition_test = partition_train_test["test"]

Expand Down
2 changes: 1 addition & 1 deletion datasets/e2e/pytorch/pytorch_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _create_trainloader(self, batch_size: int) -> DataLoader:
partition_id = 0
fds = FederatedDataset(dataset=self.dataset_name, partitioners={"train": 100})
partition = fds.load_partition(partition_id, "train")
partition_train_test = partition.train_test_split(test_size=0.2)
partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
partition_train_test = partition_train_test.map(
lambda img: {"img": self.transforms(img)}, input_columns="img"
)
Expand Down
2 changes: 1 addition & 1 deletion datasets/e2e/scikit-learn/sklearn_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _get_partition_data(self):
fds = FederatedDataset(dataset=self.dataset_name, partitioners={"train": 10})
partition = fds.load_partition(partition_id, "train")
partition.set_format("numpy")
partition_train_test = partition.train_test_split(test_size=0.2)
partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
X_train, y_train = partition_train_test["train"]["image"], partition_train_test[
"train"]["label"]
X_test, y_test = partition_train_test["test"]["image"], partition_train_test[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@
" for partition_id in range(NUM_CLIENTS):\n",
" partition = fds.load_partition(partition_id, \"train\")\n",
" partition = partition.with_transform(apply_transforms)\n",
" partition = partition.train_test_split(train_size=0.8)\n",
" partition = partition.train_test_split(train_size=0.8, seed=42)\n",
" trainloaders.append(DataLoader(partition[\"train\"], batch_size=BATCH_SIZE))\n",
" valloaders.append(DataLoader(partition[\"test\"], batch_size=BATCH_SIZE))\n",
" testset = fds.load_split(\"test\").with_transform(apply_transforms)\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/advanced-pytorch/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def fit(self, parameters, config):
batch_size: int = config["batch_size"]
epochs: int = config["local_epochs"]

train_valid = self.trainset.train_test_split(self.validation_split)
train_valid = self.trainset.train_test_split(self.validation_split, seed=42)
trainset = train_valid["train"]
valset = train_valid["test"]

Expand Down
2 changes: 1 addition & 1 deletion examples/advanced-pytorch/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def load_partition(partition_id, toy: bool = False):
fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
partition = fds.load_partition(partition_id)
# Divide data on each node: 80% train, 20% test
partition_train_test = partition.train_test_split(test_size=0.2)
partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
partition_train_test = partition_train_test.with_transform(apply_transforms)
return partition_train_test["train"], partition_train_test["test"]

Expand Down
2 changes: 1 addition & 1 deletion examples/advanced-tensorflow/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def load_partition(idx: int):
partition.set_format("numpy")

# Divide data on each node: 80% train, 20% test
partition = partition.train_test_split(test_size=0.2)
partition = partition.train_test_split(test_size=0.2, seed=42)
x_train, y_train = partition["train"]["img"] / 255.0, partition["train"]["label"]
x_test, y_test = partition["test"]["img"] / 255.0, partition["test"]["label"]
return x_train, y_train, x_test, y_test
Expand Down
1 change: 0 additions & 1 deletion examples/app-pytorch/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

# Define FlowerClient and client_fn
class FlowerClient(NumPyClient):

def fit(self, parameters, config):
set_weights(net, parameters)
results = train(net, trainloader, testloader, epochs=1, device=DEVICE)
Expand Down
1 change: 0 additions & 1 deletion examples/custom-mods/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ def wandb_mod(msg: Message, context: Context, app: ClientAppCallable) -> Message

# if the `ClientApp` just processed a "fit" message, let's log some metrics to W&B
if reply.metadata.message_type == MessageType.TRAIN and reply.has_content():

metrics = reply.content.configs_records

results_to_log = dict(metrics.get("fitres.metrics", ConfigsRecord()))
Expand Down
2 changes: 1 addition & 1 deletion examples/embedded-devices/client_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def apply_transforms(batch):
for partition_id in range(NUM_CLIENTS):
partition = fds.load_partition(partition_id, "train")
# Divide data on each node: 90% train, 10% test
partition = partition.train_test_split(test_size=0.1)
partition = partition.train_test_split(test_size=0.1, seed=42)
partition = partition.with_transform(apply_transforms)
trainsets.append(partition["train"])
validsets.append(partition["test"])
Expand Down
2 changes: 1 addition & 1 deletion examples/embedded-devices/client_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def prepare_dataset(use_mnist: bool):
partition = fds.load_partition(partition_id, "train")
partition.set_format("numpy")
# Divide data on each node: 90% train, 10% test
partition = partition.train_test_split(test_size=0.1)
partition = partition.train_test_split(test_size=0.1, seed=42)
x_train, y_train = (
partition["train"][img_key] / 255.0,
partition["train"]["label"],
Expand Down
2 changes: 1 addition & 1 deletion examples/fl-dp-sa/fl_dp_sa/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def load_data(partition_id):
fds = FederatedDataset(dataset="mnist", partitioners={"train": 100})
partition = fds.load_partition(partition_id)
# Divide data on each node: 80% train, 20% test
partition_train_test = partition.train_test_split(test_size=0.2)
partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
pytorch_transforms = Compose([ToTensor(), Normalize((0.5,), (0.5,))])

def apply_transforms(batch):
Expand Down
2 changes: 1 addition & 1 deletion examples/flower-via-docker-compose/helpers/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def load_data(data_sampling_percentage=0.5, client_id=1, total_clients=2):
partition.set_format("numpy")

# Divide data on each client: 80% train, 20% test
partition = partition.train_test_split(test_size=0.2)
partition = partition.train_test_split(test_size=0.2, seed=42)
x_train, y_train = partition["train"]["img"] / 255.0, partition["train"]["label"]
x_test, y_test = partition["test"]["img"] / 255.0, partition["test"]["label"]

Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch-from-centralized-to-federated/cifar.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def load_data(partition_id: int):
fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
partition = fds.load_partition(partition_id)
# Divide data on each node: 80% train, 20% test
partition_train_test = partition.train_test_split(test_size=0.2)
partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
pytorch_transforms = Compose(
[ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)
Expand Down
2 changes: 1 addition & 1 deletion examples/quickstart-huggingface/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def load_data(partition_id):
fds = FederatedDataset(dataset="imdb", partitioners={"train": 1_000})
partition = fds.load_partition(partition_id)
# Divide data: 80% train, 20% test
partition_train_test = partition.train_test_split(test_size=0.2)
partition_train_test = partition.train_test_split(test_size=0.2, seed=42)

tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)

Expand Down
3 changes: 3 additions & 0 deletions examples/quickstart-mlcube/dev/mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def create_directory(path: str) -> None:

def download(task_args: List[str]) -> None:
"""Task: download.
Input parameters:
--data_dir
"""
Expand Down Expand Up @@ -81,6 +82,7 @@ def download(task_args: List[str]) -> None:

def train(task_args: List[str]) -> None:
"""Task: train.
Input parameters:
--data_dir, --log_dir, --model_dir, --parameters_file
"""
Expand Down Expand Up @@ -175,6 +177,7 @@ def train(task_args: List[str]) -> None:

def evaluate(task_args: List[str]) -> None:
"""Task: train.
Input parameters:
--data_dir, --log_dir, --model_dir, --parameters_file
"""
Expand Down
2 changes: 1 addition & 1 deletion examples/quickstart-mlx/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def evaluate(self, parameters, config):

fds = FederatedDataset(dataset="mnist", partitioners={"train": 3})
partition = fds.load_partition(partition_id=args.partition_id)
partition_splits = partition.train_test_split(test_size=0.2)
partition_splits = partition.train_test_split(test_size=0.2, seed=42)

partition_splits["train"].set_format("numpy")
partition_splits["test"].set_format("numpy")
Expand Down
6 changes: 4 additions & 2 deletions examples/quickstart-pytorch-lightning/mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,11 @@ def load_data(partition):

partition = partition.with_transform(apply_transforms)
# 20 % for on federated evaluation
partition_full = partition.train_test_split(test_size=0.2)
partition_full = partition.train_test_split(test_size=0.2, seed=42)
# 60 % for the federated train and 20 % for the federated validation (both in fit)
partition_train_valid = partition_full["train"].train_test_split(train_size=0.75)
partition_train_valid = partition_full["train"].train_test_split(
train_size=0.75, seed=42
)
trainloader = DataLoader(
partition_train_valid["train"],
batch_size=32,
Expand Down
2 changes: 1 addition & 1 deletion examples/quickstart-pytorch/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def load_data(partition_id):
fds = FederatedDataset(dataset="cifar10", partitioners={"train": 3})
partition = fds.load_partition(partition_id)
# Divide data on each node: 80% train, 20% test
partition_train_test = partition.train_test_split(test_size=0.2)
partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
pytorch_transforms = Compose(
[ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)
Expand Down
2 changes: 1 addition & 1 deletion examples/quickstart-tensorflow/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
partition.set_format("numpy")

# Divide data on each node: 80% train, 20% test
partition = partition.train_test_split(test_size=0.2)
partition = partition.train_test_split(test_size=0.2, seed=42)
x_train, y_train = partition["train"]["img"] / 255.0, partition["train"]["label"]
x_test, y_test = partition["test"]["img"] / 255.0, partition["test"]["label"]

Expand Down
2 changes: 1 addition & 1 deletion examples/simulation-pytorch/sim.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,7 @@
" client_dataset = dataset.load_partition(int(cid), \"train\")\n",
"\n",
" # Now let's split it into train (90%) and validation (10%)\n",
" client_dataset_splits = client_dataset.train_test_split(test_size=0.1)\n",
" client_dataset_splits = client_dataset.train_test_split(test_size=0.1, seed=42)\n",
"\n",
" trainset = client_dataset_splits[\"train\"]\n",
" valset = client_dataset_splits[\"test\"]\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/simulation-pytorch/sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def client_fn(cid: str) -> fl.client.Client:
client_dataset = dataset.load_partition(int(cid), "train")

# Now let's split it into train (90%) and validation (10%)
client_dataset_splits = client_dataset.train_test_split(test_size=0.1)
client_dataset_splits = client_dataset.train_test_split(test_size=0.1, seed=42)

trainset = client_dataset_splits["train"]
valset = client_dataset_splits["test"]
Expand Down
2 changes: 1 addition & 1 deletion examples/simulation-tensorflow/sim.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
" client_dataset = dataset.load_partition(int(cid), \"train\")\n",
"\n",
" # Now let's split it into train (90%) and validation (10%)\n",
" client_dataset_splits = client_dataset.train_test_split(test_size=0.1)\n",
" client_dataset_splits = client_dataset.train_test_split(test_size=0.1, seed=42)\n",
"\n",
" trainset = client_dataset_splits[\"train\"].to_tf_dataset(\n",
" columns=\"image\", label_cols=\"label\", batch_size=32\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/simulation-tensorflow/sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def client_fn(cid: str) -> fl.client.Client:
client_dataset = dataset.load_partition(int(cid), "train")

# Now let's split it into train (90%) and validation (10%)
client_dataset_splits = client_dataset.train_test_split(test_size=0.1)
client_dataset_splits = client_dataset.train_test_split(test_size=0.1, seed=42)

trainset = client_dataset_splits["train"].to_tf_dataset(
columns="image", label_cols="label", batch_size=32
Expand Down
2 changes: 0 additions & 2 deletions examples/vit-finetune/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@


class FedViTClient(NumPyClient):

def __init__(self, trainset):

self.trainset = trainset
self.model = get_model()

Expand Down
1 change: 0 additions & 1 deletion examples/vit-finetune/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@


def main():

args = parser.parse_args()

# To control the degree of parallelism
Expand Down
16 changes: 8 additions & 8 deletions examples/whisper-federated-finetuning/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,10 @@ def prepare_silences_dataset(train_dataset, ratio_silence: float = 0.1) -> Datas
"""Generate silences for the train set.
One of the classes in the SpeechCommands datatset is `silence`. However, the dataset
does not include clips of silence. It does however include 5 long files with different
background sounds. The taks of this function is to extract several (defined by `ratio_silence`)
one-second long clips from those background audio files. Later, those audio clips will be
included into the training set.
does not include clips of silence. It does however include 5 long files with
different background sounds. The taks of this function is to extract several
(defined by `ratio_silence`) one-second long clips from those background audio
files. Later, those audio clips will be included into the training set.
"""
# retrieve original silence audio clips
silences = [d for d in train_dataset if d["label"] == 35]
Expand Down Expand Up @@ -138,9 +138,9 @@ def prepare_silences_dataset(train_dataset, ratio_silence: float = 0.1) -> Datas
def construct_client_mapping(full_trainset, num_clients: int = 100):
"""Create a mapping to partition the dataset into `num_client` buckets.
These buckets contain the same number of `spekaer_id` but likely different
number of training exampes since each `speaker_id` in SpeechCommands does
provide different amounts of data to the dataset.
These buckets contain the same number of `spekaer_id` but likely different number of
training exampes since each `speaker_id` in SpeechCommands does provide different
amounts of data to the dataset.
"""
client_ids = list(set(full_trainset["speaker_id"]))
client_ids.remove(
Expand Down Expand Up @@ -191,7 +191,7 @@ def set_params(model: torch.nn.ModuleList, params: List[fl.common.NDArrays]):


def get_model(device, num_classes, compile: bool = True):
"""Create model: Whisper-tiny Encoder + classification head"""
"""Create model: Whisper-tiny Encoder + classification head."""
encoder = WhisperForConditionalGeneration.from_pretrained(
"openai/whisper-tiny"
).get_encoder()
Expand Down
2 changes: 1 addition & 1 deletion src/py/flwr/cli/new/templates/app/code/task.pytorch.py.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def load_data(partition_id, num_partitions):
fds = FederatedDataset(dataset="cifar10", partitioners={"train": num_partitions})
partition = fds.load_partition(partition_id)
# Divide data on each node: 80% train, 20% test
partition_train_test = partition.train_test_split(test_size=0.2)
partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
pytorch_transforms = Compose(
[ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)
Expand Down

0 comments on commit 5abfd6a

Please sign in to comment.