Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/adap/flower into cpp-better…
Browse files Browse the repository at this point in the history
…-comms
  • Loading branch information
charlesbvll committed Apr 4, 2024
2 parents ecf0aef + 9826ad9 commit 982d1d4
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 18 deletions.
6 changes: 4 additions & 2 deletions datasets/flwr_datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def divide_dataset(
>>> train_test = divide_dataset(dataset=partition, division=division)
>>> train, test = train_test["train"], train_test["test"]
"""
_check_division_config_correctness(division)
dataset_length = len(dataset)
ranges = _create_division_indices_ranges(dataset_length, division)
if isinstance(division, (list, tuple)):
Expand Down Expand Up @@ -162,15 +163,15 @@ def _create_division_indices_ranges(
for fraction in division:
end_idx += int(dataset_length * fraction)
ranges.append(range(start_idx, end_idx))
start_idx += end_idx
start_idx = end_idx
elif isinstance(division, dict):
ranges = []
start_idx = 0
end_idx = 0
for fraction in division.values():
end_idx += int(dataset_length * fraction)
ranges.append(range(start_idx, end_idx))
start_idx += end_idx
start_idx = end_idx
else:
TypeError(
f"The type of the `division` should be dict, "
Expand Down Expand Up @@ -274,6 +275,7 @@ def concatenate_divisions(
concatenated_divisions : Dataset
A dataset created as concatenation of the divisions from all partitions.
"""
_check_division_config_correctness(partition_division)
divisions = []
zero_len_divisions = 0
for partition_id in range(partitioner.num_partitions):
Expand Down
23 changes: 21 additions & 2 deletions datasets/flwr_datasets/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,32 @@
"expected_concatenation_size",
),
[
# Create 1 division
((1.0,), [40], 0, 40),
({"train": 1.0}, [40], "train", 40),
# Create 2 divisions
((0.8, 0.2), [32, 8], 1, 8),
([0.8, 0.2], [32, 8], 1, 8),
({"train": 0.8, "test": 0.2}, [32, 8], "test", 8),
# Create 3 divisions
([0.6, 0.2, 0.2], [24, 8, 8], 1, 8),
({"train": 0.6, "valid": 0.2, "test": 0.2}, [24, 8, 8], "test", 8),
# Create 4 divisions
([0.4, 0.2, 0.2, 0.2], [16, 8, 8, 8], 1, 8),
({"0": 0.4, "1": 0.2, "2": 0.2, "3": 0.2}, [16, 8, 8, 8], "1", 8),
# Not full dataset
# Create 1 division
([0.8], [32], 0, 32),
({"train": 0.8}, [32], "train", 32),
# Create 2 divisions
([0.2, 0.1], [8, 4], 1, 4),
((0.2, 0.1), [8, 4], 0, 8),
({"train": 0.2, "test": 0.1}, [8, 4], "test", 4),
# Create 3 divisions
([0.6, 0.2, 0.1], [24, 8, 4], 2, 4),
({"train": 0.6, "valid": 0.2, "test": 0.1}, [24, 8, 4], "test", 4),
# Create 4 divisions
([0.4, 0.2, 0.1, 0.2], [16, 8, 4, 8], 2, 4),
({"0": 0.4, "1": 0.2, "2": 0.1, "3": 0.2}, [16, 8, 4, 8], "2", 4),
],
)
class UtilsTests(unittest.TestCase):
Expand All @@ -60,7 +79,7 @@ def test_correct_sizes(self) -> None:
else:
lengths = [len(split) for split in divided_dataset.values()]

self.assertEqual(lengths, self.sizes)
self.assertEqual(self.sizes, lengths)

def test_correct_return_types(self) -> None:
"""Test correct types of the divided dataset based on the config."""
Expand Down
4 changes: 2 additions & 2 deletions examples/custom-mods/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ $ tree .
pip install -r requirements.txt
```

For [W&B](wandb.ai) you will also need a valid account.
For [W&B](https://wandb.ai) you will also need a valid account.

### Start the long-running Flower server (SuperLink)

Expand Down Expand Up @@ -328,7 +328,7 @@ flower-server-app server:app --insecure

### Check the results

For W&B, you will need to login to the [website](wandb.ai).
For W&B, you will need to login to the [website](https://wandb.ai).

For TensorBoard, you will need to run the following command in your terminal:

Expand Down
24 changes: 12 additions & 12 deletions examples/vertical-fl/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ In `task.py`, you'll find the preprocessing functions we'll apply to our data:
'Adult' for ages between 11 and 40, and 'Elderly' for those over 40. If the age
isn't listed, we'll label it as 'Unknown'.

```python3
```python
def _bin_age(age_series):
bins = [-np.inf, 10, 40, np.inf]
labels = ["Child", "Adult", "Elderly"]
Expand All @@ -138,7 +138,7 @@ In `task.py`, you'll find the preprocessing functions we'll apply to our data:
understand social status and family roles, simplifying rare titles into a single
'Rare' category and converting any French titles to their English equivalents.

```python3
```python
def _extract_title(name_series):
titles = name_series.str.extract(" ([A-Za-z]+)\.", expand=False)
rare_titles = {
Expand Down Expand Up @@ -170,7 +170,7 @@ In `task.py`, you'll find the preprocessing functions we'll apply to our data:
'Pclass', 'Embarked', 'Title', 'Cabin', and the binned 'Age' into One-Hot
encodings.

```python3
```python
def _create_features(df):
# Convert 'Age' to numeric, coercing errors to NaN
df["Age"] = pd.to_numeric(df["Age"], errors="coerce")
Expand All @@ -190,7 +190,7 @@ In `task.py`, you'll find the preprocessing functions we'll apply to our data:
In `task.py`, we also partition our data for our 3 clients to mirror real-life
collaborations where different organizations hold different feature sets:

```python3
```python
def _partition_data(df, all_keywords):
partitions = []
keywords_sets = [{"Parch", "Cabin", "Pclass"}, {"Sex", "Title"}]
Expand Down Expand Up @@ -236,7 +236,7 @@ collective intelligence without sharing sensitive information.

Note that our final data processing function looks like that:

```python3
```python
def get_partitions_and_label():
df = pd.read_csv("_static/data/train.csv")
processed_df = df.dropna(subset=["Embarked", "Fare"]).copy()
Expand All @@ -259,7 +259,7 @@ Each client's model is a neural network designed to operate on a distinct subset
of features held by a client. In this example we will use simple linear
regression models.

```python3
```python
class ClientModel(nn.Module):
def __init__(self, input_size):
super(ClientModel, self).__init__()
Expand All @@ -281,7 +281,7 @@ The server's model acts as the central aggregator in the VFL system. It's also a
neural network but with a slightly different architecture tailored to its role
in aggregating the client models' outputs.

```python3
```python
class ServerModel(nn.Module):
def __init__(self):
super(ServerModel, self).__init__()
Expand All @@ -305,7 +305,7 @@ a probability score indicative of the likelihood of survival.
The strategy we will write to perform the aggregation will inherit from `FedAvg`
and set the following additional attributes:

```python3
```python
self.model = ServerModel(12)
self.initial_parameters = ndarrays_to_parameters(
[val.cpu().numpy() for _, val in self.model.state_dict().items()]
Expand All @@ -319,7 +319,7 @@ With `labels` given as an argument to the strategy.

We then redefine the `aggregate_fit` method:

```python3
```python
def aggregate_fit(
self,
rnd,
Expand Down Expand Up @@ -406,7 +406,7 @@ The last thing we have to do is to redefine the `aggregate_evaluate` function to
disable distributed evaluation (as the clients do not hold any labels to test
their local models).

```python3
```python
def aggregate_evaluate(
self,
rnd,
Expand All @@ -420,7 +420,7 @@ def aggregate_evaluate(

Our `FlowerClient` class is going to be quite straight forward.

```python3
```python
class FlowerClient(fl.client.NumPyClient):
def __init__(self, cid, data):
self.cid = cid
Expand Down Expand Up @@ -487,7 +487,7 @@ the `aggregate_evaluate` function of the strategy.
Putting everything together, to start our simulation we use the following
function:

```python3
```python
hist = fl.simulation.start_simulation(
client_fn=client_fn,
num_clients=3,
Expand Down

0 comments on commit 982d1d4

Please sign in to comment.