Merge branch 'main' into remove-mxnet-examples

adap · Apr 8, 2024 · 95ff8c5 · 95ff8c5
2 parents 7a27c29 + 2f8f9e5
commit 95ff8c5
Show file tree

Hide file tree

Showing 53 changed files with 144 additions and 87 deletions.
diff --git a/.github/workflows/docker-client.yml b/.github/workflows/docker-client.yml
@@ -4,7 +4,7 @@ on:
   workflow_dispatch:
     inputs:
       flwr-version:
-        description: "Version of Flower e.g. (1.7.0)."
+        description: "Version of Flower"
         required: true
         type: string
 

diff --git a/.github/workflows/docker-server.yml b/.github/workflows/docker-server.yml
@@ -4,7 +4,7 @@ on:
   workflow_dispatch:
     inputs:
       flwr-version:
-        description: "Version of Flower e.g. (1.7.0)."
+        description: "Version of Flower"
         required: true
         type: string
       base-image-tag:

diff --git a/baselines/doc/source/conf.py b/baselines/doc/source/conf.py
@@ -37,7 +37,7 @@
 author = "The Flower Authors"
 
 # The full version, including alpha/beta/rc tags
-release = "1.7.0"
+release = "1.8.0"
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/datasets/doc/source/how-to-use-with-pytorch.rst b/datasets/doc/source/how-to-use-with-pytorch.rst
@@ -63,7 +63,7 @@ expected by a model with a convolutional layer.
 
 If you want to divide the dataset, you can use (at any point before passing the dataset to the DataLoader)::
 
-  partition_train_test = partition.train_test_split(test_size=0.2)
+  partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
   partition_train = partition_train_test["train"]
   partition_test = partition_train_test["test"]
 

diff --git a/datasets/e2e/pytorch/pytorch_test.py b/datasets/e2e/pytorch/pytorch_test.py
@@ -65,7 +65,7 @@ def _create_trainloader(self, batch_size: int) -> DataLoader:
         partition_id = 0
         fds = FederatedDataset(dataset=self.dataset_name, partitioners={"train": 100})
         partition = fds.load_partition(partition_id, "train")
-        partition_train_test = partition.train_test_split(test_size=0.2)
+        partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
         partition_train_test = partition_train_test.map(
             lambda img: {"img": self.transforms(img)}, input_columns="img"
         )

diff --git a/datasets/e2e/scikit-learn/sklearn_test.py b/datasets/e2e/scikit-learn/sklearn_test.py
@@ -29,7 +29,7 @@ def _get_partition_data(self):
         fds = FederatedDataset(dataset=self.dataset_name, partitioners={"train": 10})
         partition = fds.load_partition(partition_id, "train")
         partition.set_format("numpy")
-        partition_train_test = partition.train_test_split(test_size=0.2)
+        partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
         X_train, y_train = partition_train_test["train"]["image"], partition_train_test[
             "train"]["label"]
         X_test, y_test = partition_train_test["test"]["image"], partition_train_test[

diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py
@@ -133,6 +133,7 @@ def divide_dataset(
     >>> train_test = divide_dataset(dataset=partition, division=division)
     >>> train, test = train_test["train"], train_test["test"]
     """
+    _check_division_config_correctness(division)
     dataset_length = len(dataset)
     ranges = _create_division_indices_ranges(dataset_length, division)
     if isinstance(division, (list, tuple)):
@@ -162,15 +163,15 @@ def _create_division_indices_ranges(
         for fraction in division:
             end_idx += int(dataset_length * fraction)
             ranges.append(range(start_idx, end_idx))
-            start_idx += end_idx
+            start_idx = end_idx
     elif isinstance(division, dict):
         ranges = []
         start_idx = 0
         end_idx = 0
         for fraction in division.values():
             end_idx += int(dataset_length * fraction)
             ranges.append(range(start_idx, end_idx))
-            start_idx += end_idx
+            start_idx = end_idx
     else:
         TypeError(
             f"The type of the `division` should be dict, "
@@ -274,6 +275,7 @@ def concatenate_divisions(
     concatenated_divisions : Dataset
         A dataset created as concatenation of the divisions from all partitions.
     """
+    _check_division_config_correctness(partition_division)
     divisions = []
     zero_len_divisions = 0
     for partition_id in range(partitioner.num_partitions):

diff --git a/datasets/flwr_datasets/utils_test.py b/datasets/flwr_datasets/utils_test.py
@@ -31,13 +31,32 @@
         "expected_concatenation_size",
     ),
     [
+        # Create 1 division
+        ((1.0,), [40], 0, 40),
+        ({"train": 1.0}, [40], "train", 40),
+        # Create 2 divisions
         ((0.8, 0.2), [32, 8], 1, 8),
-        ([0.8, 0.2], [32, 8], 1, 8),
         ({"train": 0.8, "test": 0.2}, [32, 8], "test", 8),
+        # Create 3 divisions
+        ([0.6, 0.2, 0.2], [24, 8, 8], 1, 8),
+        ({"train": 0.6, "valid": 0.2, "test": 0.2}, [24, 8, 8], "test", 8),
+        # Create 4 divisions
+        ([0.4, 0.2, 0.2, 0.2], [16, 8, 8, 8], 1, 8),
+        ({"0": 0.4, "1": 0.2, "2": 0.2, "3": 0.2}, [16, 8, 8, 8], "1", 8),
         # Not full dataset
+        # Create 1 division
+        ([0.8], [32], 0, 32),
+        ({"train": 0.8}, [32], "train", 32),
+        # Create 2 divisions
         ([0.2, 0.1], [8, 4], 1, 4),
         ((0.2, 0.1), [8, 4], 0, 8),
         ({"train": 0.2, "test": 0.1}, [8, 4], "test", 4),
+        # Create 3 divisions
+        ([0.6, 0.2, 0.1], [24, 8, 4], 2, 4),
+        ({"train": 0.6, "valid": 0.2, "test": 0.1}, [24, 8, 4], "test", 4),
+        # Create 4 divisions
+        ([0.4, 0.2, 0.1, 0.2], [16, 8, 4, 8], 2, 4),
+        ({"0": 0.4, "1": 0.2, "2": 0.1, "3": 0.2}, [16, 8, 4, 8], "2", 4),
     ],
 )
 class UtilsTests(unittest.TestCase):
@@ -60,7 +79,7 @@ def test_correct_sizes(self) -> None:
         else:
             lengths = [len(split) for split in divided_dataset.values()]
 
-        self.assertEqual(lengths, self.sizes)
+        self.assertEqual(self.sizes, lengths)
 
     def test_correct_return_types(self) -> None:
         """Test correct types of the divided dataset based on the config."""

diff --git a/dev/update-examples.sh b/dev/update-examples.sh
@@ -16,20 +16,47 @@ echo "---" >> $INDEX
 echo "maxdepth: 1" >> $INDEX
 echo "---" >> $INDEX
 
-rm -f "examples/doc/source/*.md"
+rm -f examples/doc/source/*.md
 
 cd examples/
 for d in $(printf '%s\n' */ | sort -V); do
   example=${d%/}
-  # For each example, copy the README into the source of the Example docs
-  [[ $example != doc ]] && cp $example/README.md $ROOT/examples/doc/source/$example.md 2>&1 >/dev/null
-  # For each example, copy all images of the _static folder into the examples
-  # docs static folder
-  [[ $example != doc ]] && [ -d "$example/_static" ] && {
-    cp $example/_static/**.{jpg,png,jpeg} $ROOT/examples/doc/source/_static/ 2>/dev/null || true
-  }
-  # For each example, insert the name of the example into the index file
-  [[ $example != doc ]] && (echo $INSERT_LINE; echo a; echo $example; echo .; echo wq) | ed $INDEX 2>&1 >/dev/null
+
+  if [[ $example != doc ]]; then
+
+    for file in $example/*.md; do
+      # For each example, copy the README into the source of the Example docs
+      if [[ $(basename "$file") = "README.md" ]]; then
+        cp $file $ROOT/examples/doc/source/$example.md 2>&1 >/dev/null
+      else
+        # If the example contains other markdown files, copy them to the source of the Example docs
+        cp $file $ROOT/examples/doc/source/$(basename "$file") 2>&1 >/dev/null
+      fi
+    done
+
+    gh_text="[<img src=\"_static/view-gh.png\" alt=\"View on GitHub\" width=\"200\"/>](https://github.com/adap/flower/blob/main/examples/$example)"
+    readme_file="$ROOT/examples/doc/source/$example.md"
+
+    if ! grep -Fq "$gh_text" "$readme_file"; then
+      awk -v text="$gh_text" '
+      /^# / && !found {
+        print $0 "\n" text;
+        found=1;
+        next;
+      }
+      { print }
+      ' "$readme_file" > tmpfile && mv tmpfile "$readme_file"
+    fi
+
+    # For each example, copy all images of the _static folder into the examples
+    # docs static folder
+    [ -d "$example/_static" ] && {
+      cp $example/_static/**.{jpg,png,jpeg} $ROOT/examples/doc/source/_static/ 2>/dev/null || true
+    }
+    # For each example, insert the name of the example into the index file
+    (echo $INSERT_LINE; echo a; echo $example; echo .; echo wq) | ed $INDEX 2>&1 >/dev/null
+
+  fi
 done
 
 echo "\`\`\`" >> $INDEX

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -86,7 +86,7 @@
 author = "The Flower Authors"
 
 # The full version, including alpha/beta/rc tags
-release = "1.8.0"
+release = "1.9.0"
 
 # -- General configuration ---------------------------------------------------
 

diff --git a/doc/source/ref-changelog.md b/doc/source/ref-changelog.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## Unreleased
+
+### What's new?
+
+### Incompatible changes
+
+None
+
 ## v1.8.0 (2024-04-03)
 
 ### Thanks to our contributors

diff --git a/doc/source/tutorial-series-get-started-with-flower-pytorch.ipynb b/doc/source/tutorial-series-get-started-with-flower-pytorch.ipynb
@@ -145,7 +145,7 @@
     "    for partition_id in range(NUM_CLIENTS):\n",
     "        partition = fds.load_partition(partition_id, \"train\")\n",
     "        partition = partition.with_transform(apply_transforms)\n",
-    "        partition = partition.train_test_split(train_size=0.8)\n",
+    "        partition = partition.train_test_split(train_size=0.8, seed=42)\n",
     "        trainloaders.append(DataLoader(partition[\"train\"], batch_size=BATCH_SIZE))\n",
     "        valloaders.append(DataLoader(partition[\"test\"], batch_size=BATCH_SIZE))\n",
     "    testset = fds.load_split(\"test\").with_transform(apply_transforms)\n",

diff --git a/examples/advanced-pytorch/client.py b/examples/advanced-pytorch/client.py
@@ -46,7 +46,7 @@ def fit(self, parameters, config):
         batch_size: int = config["batch_size"]
         epochs: int = config["local_epochs"]
 
-        train_valid = self.trainset.train_test_split(self.validation_split)
+        train_valid = self.trainset.train_test_split(self.validation_split, seed=42)
         trainset = train_valid["train"]
         valset = train_valid["test"]
 

diff --git a/examples/advanced-pytorch/utils.py b/examples/advanced-pytorch/utils.py
@@ -14,7 +14,7 @@ def load_partition(partition_id, toy: bool = False):
     fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
     partition = fds.load_partition(partition_id)
     # Divide data on each node: 80% train, 20% test
-    partition_train_test = partition.train_test_split(test_size=0.2)
+    partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
     partition_train_test = partition_train_test.with_transform(apply_transforms)
     return partition_train_test["train"], partition_train_test["test"]
 

diff --git a/examples/advanced-tensorflow/client.py b/examples/advanced-tensorflow/client.py
@@ -123,7 +123,7 @@ def load_partition(idx: int):
     partition.set_format("numpy")
 
     # Divide data on each node: 80% train, 20% test
-    partition = partition.train_test_split(test_size=0.2)
+    partition = partition.train_test_split(test_size=0.2, seed=42)
     x_train, y_train = partition["train"]["img"] / 255.0, partition["train"]["label"]
     x_test, y_test = partition["test"]["img"] / 255.0, partition["test"]["label"]
     return x_train, y_train, x_test, y_test

diff --git a/examples/app-pytorch/client.py b/examples/app-pytorch/client.py
@@ -18,7 +18,6 @@
 
 # Define FlowerClient and client_fn
 class FlowerClient(NumPyClient):
-
     def fit(self, parameters, config):
         set_weights(net, parameters)
         results = train(net, trainloader, testloader, epochs=1, device=DEVICE)

diff --git a/examples/app-pytorch/pyproject.toml b/examples/app-pytorch/pyproject.toml
@@ -11,7 +11,6 @@ authors = ["The Flower Authors <[email protected]>"]
 [tool.poetry.dependencies]
 python = "^3.8"
 # Mandatory dependencies
-flwr = { version = "1.8.0", extras = ["simulation"] }
-flwr-datasets = { version = "0.0.2", extras = ["vision"] }
+flwr = { version = "^1.8.0", extras = ["simulation"] }
 torch = "2.2.1"
 torchvision = "0.17.1"
diff --git a/examples/app-pytorch/requirements.txt b/examples/app-pytorch/requirements.txt
@@ -1,4 +1,3 @@
-flwr[simulation]==1.8.0
-flwr-datasets[vision]==0.0.2
+flwr[simulation]>=1.8.0
 torch==2.2.1
 torchvision==0.17.1
diff --git a/examples/app-secure-aggregation/pyproject.toml b/examples/app-secure-aggregation/pyproject.toml
@@ -11,4 +11,4 @@ authors = ["The Flower Authors <[email protected]>"]
 [tool.poetry.dependencies]
 python = "^3.8"
 # Mandatory dependencies
-flwr = { version = "1.8.0", extras = ["simulation"] }
+flwr = { version = "^1.8.0", extras = ["simulation"] }
diff --git a/examples/app-secure-aggregation/requirements.txt b/examples/app-secure-aggregation/requirements.txt
@@ -1 +1 @@
-flwr[simulation]==1.8.0
+flwr[simulation]>=1.8.0
diff --git a/examples/custom-mods/README.md b/examples/custom-mods/README.md
@@ -288,7 +288,7 @@ $ tree .
 pip install -r requirements.txt
 ```
 
-For [W&B](wandb.ai) you will also need a valid account.
+For [W&B](https://wandb.ai) you will also need a valid account.
 
 ### Start the long-running Flower server (SuperLink)
 
@@ -328,7 +328,7 @@ flower-server-app server:app --insecure
 
 ### Check the results
 
-For W&B, you will need to login to the [website](wandb.ai).
+For W&B, you will need to login to the [website](https://wandb.ai).
 
 For TensorBoard, you will need to run the following command in your terminal:
 

diff --git a/examples/custom-mods/client.py b/examples/custom-mods/client.py
@@ -86,7 +86,6 @@ def wandb_mod(msg: Message, context: Context, app: ClientAppCallable) -> Message
 
         # if the `ClientApp` just processed a "fit" message, let's log some metrics to W&B
         if reply.metadata.message_type == MessageType.TRAIN and reply.has_content():
-
             metrics = reply.content.configs_records
 
             results_to_log = dict(metrics.get("fitres.metrics", ConfigsRecord()))

diff --git a/examples/doc/source/_static/.gitignore b/examples/doc/source/_static/.gitignore
@@ -3,3 +3,4 @@
 !favicon.ico
 !flower-logo.png
 !tmux_jtop_view.gif
+!view-gh.png
diff --git a/examples/doc/source/_static/view-gh.png b/examples/doc/source/_static/view-gh.png
diff --git a/examples/doc/source/conf.py b/examples/doc/source/conf.py
@@ -30,7 +30,7 @@
 author = "The Flower Authors"
 
 # The full version, including alpha/beta/rc tags
-release = "1.8.0"
+release = "1.9.0"
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/examples/embedded-devices/client_pytorch.py b/examples/embedded-devices/client_pytorch.py
@@ -108,7 +108,7 @@ def apply_transforms(batch):
     for partition_id in range(NUM_CLIENTS):
         partition = fds.load_partition(partition_id, "train")
         # Divide data on each node: 90% train, 10% test
-        partition = partition.train_test_split(test_size=0.1)
+        partition = partition.train_test_split(test_size=0.1, seed=42)
         partition = partition.with_transform(apply_transforms)
         trainsets.append(partition["train"])
         validsets.append(partition["test"])

diff --git a/examples/embedded-devices/client_tf.py b/examples/embedded-devices/client_tf.py
@@ -44,7 +44,7 @@ def prepare_dataset(use_mnist: bool):
         partition = fds.load_partition(partition_id, "train")
         partition.set_format("numpy")
         # Divide data on each node: 90% train, 10% test
-        partition = partition.train_test_split(test_size=0.1)
+        partition = partition.train_test_split(test_size=0.1, seed=42)
         x_train, y_train = (
             partition["train"][img_key] / 255.0,
             partition["train"]["label"],

diff --git a/examples/fl-dp-sa/fl_dp_sa/task.py b/examples/fl-dp-sa/fl_dp_sa/task.py
@@ -42,7 +42,7 @@ def load_data(partition_id):
     fds = FederatedDataset(dataset="mnist", partitioners={"train": 100})
     partition = fds.load_partition(partition_id)
     # Divide data on each node: 80% train, 20% test
-    partition_train_test = partition.train_test_split(test_size=0.2)
+    partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
     pytorch_transforms = Compose([ToTensor(), Normalize((0.5,), (0.5,))])
 
     def apply_transforms(batch):

diff --git a/examples/fl-dp-sa/pyproject.toml b/examples/fl-dp-sa/pyproject.toml
@@ -15,7 +15,7 @@ readme = "README.md"
 [tool.poetry.dependencies]
 python = "^3.9"
 # Mandatory dependencies
-flwr = { version = "1.8.0", extras = ["simulation"] }
+flwr = { version = "^1.8.0", extras = ["simulation"] }
 flwr-datasets = { version = "0.0.2", extras = ["vision"] }
 torch = "2.2.1"
 torchvision = "0.17.1"
diff --git a/examples/fl-dp-sa/requirements.txt b/examples/fl-dp-sa/requirements.txt
@@ -1,4 +1,4 @@
-flwr[simulation]==1.8.0
+flwr[simulation]>=1.8.0
 flwr-datasets[vision]==0.0.2
 torch==2.2.1
 torchvision==0.17.1
diff --git a/examples/flower-via-docker-compose/helpers/load_data.py b/examples/flower-via-docker-compose/helpers/load_data.py
@@ -25,7 +25,7 @@ def load_data(data_sampling_percentage=0.5, client_id=1, total_clients=2):
     partition.set_format("numpy")
 
     # Divide data on each client: 80% train, 20% test
-    partition = partition.train_test_split(test_size=0.2)
+    partition = partition.train_test_split(test_size=0.2, seed=42)
     x_train, y_train = partition["train"]["img"] / 255.0, partition["train"]["label"]
     x_test, y_test = partition["test"]["img"] / 255.0, partition["test"]["label"]
 

diff --git a/examples/flower-via-docker-compose/requirements.txt b/examples/flower-via-docker-compose/requirements.txt
@@ -1,4 +1,4 @@
-flwr==1.7.0
+flwr==1.8.0
 tensorflow==2.13.1
 numpy==1.24.3
 prometheus_client == 0.19.0

diff --git a/examples/pytorch-from-centralized-to-federated/cifar.py b/examples/pytorch-from-centralized-to-federated/cifar.py
@@ -56,7 +56,7 @@ def load_data(partition_id: int):
     fds = FederatedDataset(dataset="cifar10", partitioners={"train": 10})
     partition = fds.load_partition(partition_id)
     # Divide data on each node: 80% train, 20% test
-    partition_train_test = partition.train_test_split(test_size=0.2)
+    partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
     pytorch_transforms = Compose(
         [ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
     )

diff --git a/examples/quickstart-huggingface/client.py b/examples/quickstart-huggingface/client.py
@@ -22,7 +22,7 @@ def load_data(partition_id):
     fds = FederatedDataset(dataset="imdb", partitioners={"train": 1_000})
     partition = fds.load_partition(partition_id)
     # Divide data: 80% train, 20% test
-    partition_train_test = partition.train_test_split(test_size=0.2)
+    partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
 
     tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)