-
Notifications
You must be signed in to change notification settings - Fork 941
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into fds-fix-pathological-partitioner
- Loading branch information
Showing
161 changed files
with
27,054 additions
and
15,943 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
name: Translations | ||
|
||
on: | ||
schedule: | ||
- cron: '0 0 * * *' # Runs every day at midnight | ||
workflow_dispatch: # Allows to manually trigger the workflow | ||
|
||
jobs: | ||
update-and-pr: | ||
runs-on: ubuntu-22.04 | ||
permissions: | ||
contents: write | ||
pull-requests: write | ||
env: | ||
branch-name: auto-update-trans-text | ||
name: Update text | ||
steps: | ||
- uses: actions/checkout@v4 | ||
|
||
- name: Bootstrap | ||
uses: ./.github/actions/bootstrap | ||
with: | ||
python-version: '3.10' | ||
|
||
- name: Install dependencies | ||
run: | | ||
python -m poetry install | ||
pip install sphinx==7.3.7 | ||
- name: Install pandoc | ||
uses: nikeee/setup-pandoc@v1 | ||
|
||
- name: Update text and translations for all locales | ||
run: | | ||
cd doc | ||
make update-text | ||
for langDir in locales/*; do | ||
if [ -d "$langDir" ]; then | ||
lang=$(basename $langDir) | ||
echo "Updating language $lang" | ||
make update-lang lang=$lang | ||
fi | ||
done | ||
- name: Commit changes | ||
run: | | ||
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" | ||
git config --local user.name "github-actions[bot]" | ||
git add doc/locales | ||
git commit -m "Update text and language files" | ||
continue-on-error: true | ||
|
||
- name: Calculate diff # Even without doc changes the update-lang command will generate 228 additions and 60 deletions, so we only want to open a PR when there is more | ||
id: calculate_diff | ||
run: | | ||
additions=$(git diff --numstat HEAD^1 | awk '{s+=$1} END {print s}') | ||
deletions=$(git diff --numstat HEAD^1 | awk '{s+=$2} END {print s}') | ||
echo "Additions: $additions" | ||
echo "Deletions: $deletions" | ||
echo "additions=$additions" >> $GITHUB_OUTPUT | ||
echo "deletions=$deletions" >> $GITHUB_OUTPUT | ||
- name: Push changes | ||
if: steps.calculate_diff.outputs.additions > 228 && steps.calculate_diff.outputs.deletions > 60 | ||
uses: ad-m/github-push-action@master | ||
with: | ||
github_token: ${{ secrets.GITHUB_TOKEN }} | ||
branch: '${{ env.branch-name }}' | ||
|
||
- name: Create Pull Request | ||
if: steps.calculate_diff.outputs.additions > 228 && steps.calculate_diff.outputs.deletions > 60 | ||
uses: peter-evans/create-pull-request@v6 | ||
with: | ||
token: ${{ secrets.GITHUB_TOKEN }} | ||
branch: '${{ env.branch-name }}' | ||
delete-branch: true | ||
title: 'docs(framework:skip) Update source texts for translations (automated)' | ||
body: 'This PR is auto-generated to update text and language files.' | ||
draft: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
# Copyright 2024 Flower Labs GmbH. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
"""SizePartitioner class.""" | ||
|
||
|
||
import warnings | ||
from collections.abc import Sequence | ||
|
||
import datasets | ||
from flwr_datasets.partitioner.partitioner import Partitioner | ||
|
||
|
||
class SizePartitioner(Partitioner): | ||
"""Partitioner that creates each partition with the size specified by a user. | ||
Parameters | ||
---------- | ||
partition_sizes : Sequence[int] | ||
The size of each partition. partition_id 0 will have partition_sizes[0] | ||
samples, partition_id 1 will have partition_sizes[1] samples, etc. | ||
Examples | ||
-------- | ||
>>> from flwr_datasets import FederatedDataset | ||
>>> from flwr_datasets.partitioner import SizePartitioner | ||
>>> | ||
>>> partition_sizes = [15_000, 5_000, 30_000] | ||
>>> partitioner = SizePartitioner(partition_sizes) | ||
>>> fds = FederatedDataset(dataset="cifar10", partitioners={"train": partitioner}) | ||
""" | ||
|
||
def __init__(self, partition_sizes: Sequence[int]) -> None: | ||
super().__init__() | ||
self._pre_ds_validate_partition_sizes(partition_sizes) | ||
self._partition_sizes = partition_sizes | ||
self._partition_id_to_indices: dict[int, list[int]] = {} | ||
self._partition_id_to_indices_determined = False | ||
|
||
def load_partition(self, partition_id: int) -> datasets.Dataset: | ||
"""Load a single partition of the size of partition_sizes[partition_id]. | ||
For example if given partition_sizes=[20_000, 10_000, 30_000], | ||
then partition_id=0 will return a partition of size 20_000, | ||
partition_id=1 will return a partition of size 10_000, etc. | ||
Parameters | ||
---------- | ||
partition_id : int | ||
The index that corresponds to the requested partition. | ||
Returns | ||
------- | ||
dataset_partition : Dataset | ||
Single dataset partition. | ||
""" | ||
self._determine_partition_id_to_indices_if_needed() | ||
return self.dataset.select(self._partition_id_to_indices[partition_id]) | ||
|
||
@property | ||
def num_partitions(self) -> int: | ||
"""Total number of partitions.""" | ||
self._determine_partition_id_to_indices_if_needed() | ||
return len(self._partition_sizes) | ||
|
||
@property | ||
def partition_id_to_indices(self) -> dict[int, list[int]]: | ||
"""Partition id to indices (the result of partitioning).""" | ||
self._determine_partition_id_to_indices_if_needed() | ||
return self._partition_id_to_indices | ||
|
||
def _determine_partition_id_to_indices_if_needed( | ||
self, | ||
) -> None: | ||
"""Create an assignment of indices to the partition indices.""" | ||
if self._partition_id_to_indices_determined: | ||
return | ||
self._post_ds_validate_partition_sizes() | ||
start = 0 | ||
end = 0 | ||
for partition_id, partition_size in enumerate(self._partition_sizes): | ||
end += partition_size | ||
indices = list(range(start, end)) | ||
self._partition_id_to_indices[partition_id] = indices | ||
start = end | ||
self._partition_id_to_indices_determined = True | ||
|
||
def _pre_ds_validate_partition_sizes(self, partition_sizes: Sequence[int]) -> None: | ||
"""Check if the partition sizes are valid (no information about the dataset).""" | ||
if not isinstance(partition_sizes, Sequence): | ||
raise ValueError("Partition sizes must be a sequence.") | ||
if len(partition_sizes) == 0: | ||
raise ValueError("Partition sizes must not be empty.") | ||
if not all( | ||
isinstance(partition_size, int) for partition_size in partition_sizes | ||
): | ||
raise ValueError("All partition sizes must be integers.") | ||
if not all(partition_size > 0 for partition_size in partition_sizes): | ||
raise ValueError("All partition sizes must be greater than zero.") | ||
|
||
def _post_ds_validate_partition_sizes(self) -> None: | ||
"""Validate the partition sizes against the dataset size.""" | ||
desired_partition_sizes = sum(self._partition_sizes) | ||
dataset_size = len(self.dataset) | ||
if desired_partition_sizes > dataset_size: | ||
raise ValueError( | ||
f"The sum of partition sizes sum({self._partition_sizes})" | ||
f"= {desired_partition_sizes} is greater than the size of" | ||
f" the dataset {dataset_size}." | ||
) | ||
if desired_partition_sizes < dataset_size: | ||
warnings.warn( | ||
f"The sum of partition sizes is {desired_partition_sizes}, which is" | ||
f"smaller than the size of the dataset: {dataset_size}. " | ||
f"Ignore this warning if it is the desired behavior.", | ||
stacklevel=1, | ||
) |
Oops, something went wrong.