Merge branch 'main' into improve-changelog-generation

adap · Sep 24, 2024 · 3cdcb15 · 3cdcb15
2 parents 70cbba4 + 983217f
commit 3cdcb15
Show file tree

Hide file tree

Showing 172 changed files with 97,971 additions and 75,464 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -16,6 +16,14 @@ profile = black
 indent_style = space
 indent_size = 2
 
+[*.md]
+indent_style = space
+indent_size = 2
+
 [*.yml]
 indent_style = space
 indent_size = 2
+
+[*.toml]
+indent_style = space
+indent_size = 4
diff --git a/.github/workflows/datasets-e2e.yml b/.github/workflows/datasets-e2e.yml
@@ -45,7 +45,7 @@ jobs:
       - name: Bootstrap
         uses: ./.github/actions/bootstrap
         with:
-          python-version: 3.8
+          python-version: 3.9
       - name: Install dependencies
         run: python -m poetry install
       - name: Run tests

diff --git a/.github/workflows/datasets.yml b/.github/workflows/datasets.yml
@@ -37,7 +37,7 @@ jobs:
         # In case of a mismatch, the job has to download Python to install it.
         # Note: Due to a bug in actions/setup-python, we have to put "3.10" in
         # quotes as it will otherwise assume "3.1"
-        python: [3.8, 3.9, '3.10', '3.11']
+        python: ['3.9', '3.10', '3.11']
 
     name: Python ${{ matrix.python }}
 

diff --git a/.github/workflows/update_translations.yml b/.github/workflows/update_translations.yml
@@ -0,0 +1,79 @@
+name: Translations
+
+on:
+  schedule:
+    - cron: '0 0 * * *' # Runs every day at midnight
+  workflow_dispatch: # Allows to manually trigger the workflow
+
+jobs:
+  update-and-pr:
+    runs-on: ubuntu-22.04
+    permissions:
+      contents: write
+      pull-requests: write
+    env:
+      branch-name: auto-update-trans-text
+    name: Update text
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Bootstrap
+        uses: ./.github/actions/bootstrap
+        with:
+          python-version: '3.10'
+
+      - name: Install dependencies
+        run: |
+          python -m poetry install
+          pip install sphinx==7.3.7
+
+      - name: Install pandoc
+        uses: nikeee/setup-pandoc@v1
+
+      - name: Update text and translations for all locales
+        run: |
+          cd doc
+          make update-text
+          for langDir in locales/*; do
+            if [ -d "$langDir" ]; then
+              lang=$(basename $langDir)
+              echo "Updating language $lang"
+              make update-lang lang=$lang
+            fi
+          done
+
+      - name: Commit changes
+        run: |
+          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git config --local user.name "github-actions[bot]"
+          git add doc/locales
+          git commit -m "Update text and language files"
+        continue-on-error: true
+
+      - name: Calculate diff  # Even without doc changes the update-lang command will generate 228 additions and 60 deletions, so we only want to open a PR when there is more
+        id: calculate_diff
+        run: |
+          additions=$(git diff --numstat HEAD^1 | awk '{s+=$1} END {print s}')
+          deletions=$(git diff --numstat HEAD^1 | awk '{s+=$2} END {print s}')
+          echo "Additions: $additions"
+          echo "Deletions: $deletions"
+          echo "additions=$additions" >> $GITHUB_OUTPUT
+          echo "deletions=$deletions" >> $GITHUB_OUTPUT
+
+      - name: Push changes
+        if: steps.calculate_diff.outputs.additions > 228 && steps.calculate_diff.outputs.deletions > 60
+        uses: ad-m/github-push-action@master
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          branch: '${{ env.branch-name }}'
+
+      - name: Create Pull Request
+        if: steps.calculate_diff.outputs.additions > 228 && steps.calculate_diff.outputs.deletions > 60
+        uses: peter-evans/create-pull-request@v6
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          branch: '${{ env.branch-name }}'
+          delete-branch: true
+          title: 'docs(framework:skip) Update source texts for translations (automated)'
+          body: 'This PR is auto-generated to update text and language files.'
+          draft: false
diff --git a/benchmarks/flowertune-llm/README.md b/benchmarks/flowertune-llm/README.md
@@ -1,4 +1,4 @@
-![](_static/flower_llm.png)
+[![FlowerTune LLM Leaderboard](_static/flower_llm.png)](https://flower.ai/benchmarks/llm-leaderboard)
 
 # FlowerTune LLM Leaderboard
 
@@ -13,29 +13,30 @@ As the first step, please register for a Flower account on [flower.ai/login](htt
 Then, create a new Python environment and install Flower. 
 
 > [!TIP]
-> We recommend using `pyenv` and the `virtualenv` plugin to create your environment. Other manager such as Conda would likely work too. Check the [documentation](https://flower.ai/docs/framework/how-to-install-flower.html) for alternative ways of installing Flower.
+> We recommend using `pyenv` with the `virtualenv` plugin to create your environment. Other managers, such as Conda, will likely work as well. Check the [documentation](https://flower.ai/docs/framework/how-to-install-flower.html) for alternative ways to install Flower.
 
 ```shell
 pip install flwr
 ```
 
-On the new environment, create a new Flower project using the `FlowerTune` template. You will be prompted for a name to give to your project, your username, and for your choice of LLM challenge:
+In the new environment, create a new Flower project using the `FlowerTune` template. You will be prompted for a name to give to your project, your username, and for your choice of LLM challenge:
 ```shell
 flwr new --framework=FlowerTune
 ```
 
 The `flwr new` command will generate a directory with the following structure:
 
 ```bash
-<project-name>
-       ├── README.md           # <- Instructions
-       ├── pyproject.toml      # <- Environment dependencies and configs
-       └── <project_name>
-                  ├── client_app.py   # <- Flower ClientApp build
-                  ├── dataset.py      # <- Dataset and tokenizer build
-                  ├── models.py       # <- Model build
-                  ├── server_app.py   # <- Flower ServerApp build
-                  └── strategy.py     # <- Flower strategy build
+<project_name>
+├── README.md           # Instructions
+├── pyproject.toml      # Environment dependencies and configs
+└── <project_name>
+    ├── __init__.py
+    ├── client_app.py   # Flower ClientApp build
+    ├── dataset.py      # Dataset and tokenizer build
+    ├── models.py       # Model build
+    ├── server_app.py   # Flower ServerApp build
+    └── strategy.py     # Flower strategy build
 ```
 
 This can serve as the starting point for you to build up your own federated LLM fine-tuning methods.

diff --git a/benchmarks/flowertune-llm/evaluation/README.md b/benchmarks/flowertune-llm/evaluation/README.md
@@ -5,7 +5,7 @@ If you are participating [LLM Leaderboard](https://flower.ai/benchmarks/llm-lead
 
 ## How to run
 
-Navigate to the directory corresponding to your selected challenge (`general NLP`, `finance`, `medical`, or `code`) and follow the instructions there to execute the evaluation.
+Navigate to the directory corresponding to your selected challenge ([`general NLP`](https://github.com/adap/flower/tree/main/benchmarks/flowertune-llm/evaluation/general-nlp), [`finance`](https://github.com/adap/flower/tree/main/benchmarks/flowertune-llm/evaluation/finance), [`medical`](https://github.com/adap/flower/tree/main/benchmarks/flowertune-llm/evaluation/medical), or [`code`](https://github.com/adap/flower/tree/main/benchmarks/flowertune-llm/evaluation/code)) and follow the instructions there to execute the evaluation.
 
 > [!NOTE]  
 > If you wish to participate in the LLM Leaderboard, you must not modify the evaluation code and should use the exact command provided in the respective directory to run the evaluation.
@@ -39,6 +39,9 @@ The default template generated by `flwr new` (see the [Project Creation Instruct
 |:----------:|:-----:|:---------:|:--------------:|:---------------:|:-----:|
 | Pass@1 (%) | 31.60 |   23.78   |     28.57      |      25.47      | 27.36 |
 
+> [!NOTE]  
+> In the LLM Leaderboard, we rank the submissions based on the **average** value derived from different evaluation datasets for each challenge.
+
 
 ## Make submission on FlowerTune LLM Leaderboard
 

diff --git a/datasets/flwr_datasets/common/telemetry.py b/datasets/flwr_datasets/common/telemetry.py
@@ -25,7 +25,7 @@
 from concurrent.futures import Future, ThreadPoolExecutor
 from enum import Enum, auto
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union, cast
+from typing import Any, Optional, Union, cast
 
 from flwr_datasets.common.version import package_name, package_version
 
@@ -114,7 +114,7 @@ class EventType(str, Enum):
     # The type signature is not compatible with mypy, pylint and flake8
     # so each of those needs to be disabled for this line.
     # pylint: disable-next=no-self-argument,arguments-differ,line-too-long
-    def _generate_next_value_(name: str, start: int, count: int, last_values: List[Any]) -> Any:  # type: ignore # noqa: E501
+    def _generate_next_value_(name: str, start: int, count: int, last_values: list[Any]) -> Any:  # type: ignore # noqa: E501
         return name
 
     PING = auto()
@@ -127,7 +127,7 @@ def _generate_next_value_(name: str, start: int, count: int, last_values: List[A
 
 # Use the ThreadPoolExecutor with max_workers=1 to have a queue
 # and also ensure that telemetry calls are not blocking.
-state: Dict[str, Union[Optional[str], Optional[ThreadPoolExecutor]]] = {
+state: dict[str, Union[Optional[str], Optional[ThreadPoolExecutor]]] = {
     # Will be assigned ThreadPoolExecutor(max_workers=1)
     # in event() the first time it's required
     "executor": None,
@@ -143,7 +143,7 @@ def _generate_next_value_(name: str, start: int, count: int, last_values: List[A
 # pylint: disable-next=unsubscriptable-object
 def event(
     event_type: EventType,
-    event_details: Optional[Dict[str, Any]] = None,
+    event_details: Optional[dict[str, Any]] = None,
 ) -> Future:  # type: ignore
     """Submit create_event to ThreadPoolExecutor to avoid blocking."""
     if state["executor"] is None:
@@ -155,7 +155,7 @@ def event(
     return result
 
 
-def create_event(event_type: EventType, event_details: Optional[Dict[str, Any]]) -> str:
+def create_event(event_type: EventType, event_details: Optional[dict[str, Any]]) -> str:
     """Create telemetry event."""
     if state["source"] is None:
         state["source"] = _get_source_id()

diff --git a/datasets/flwr_datasets/common/typing.py b/datasets/flwr_datasets/common/typing.py
@@ -15,12 +15,12 @@
 """Flower Datasets type definitions."""
 
 
-from typing import Any, List
+from typing import Any
 
 import numpy as np
 import numpy.typing as npt
 
 NDArray = npt.NDArray[Any]
 NDArrayInt = npt.NDArray[np.int_]
 NDArrayFloat = npt.NDArray[np.float_]
-NDArrays = List[NDArray]
+NDArrays = list[NDArray]
diff --git a/datasets/flwr_datasets/common/version.py b/datasets/flwr_datasets/common/version.py
@@ -19,15 +19,14 @@
 
 
 import importlib.metadata as importlib_metadata
-from typing import Tuple
 
 
-def _check_package(name: str) -> Tuple[str, str]:
+def _check_package(name: str) -> tuple[str, str]:
     version: str = importlib_metadata.version(name)
     return name, version
 
 
-def _version() -> Tuple[str, str]:
+def _version() -> tuple[str, str]:
     """Read and return Flower Dataset package name and version.
 
     Returns

diff --git a/datasets/flwr_datasets/federated_dataset_test.py b/datasets/flwr_datasets/federated_dataset_test.py
@@ -17,7 +17,7 @@
 
 
 import unittest
-from typing import Dict, Union
+from typing import Union
 from unittest.mock import Mock, patch
 
 import numpy as np
@@ -385,7 +385,7 @@ def test_dict_of_partitioners_passes_partitioners(self) -> None:
         """Test if partitioners are passed directly (no recreation)."""
         num_train_partitions = 100
         num_test_partitions = 100
-        partitioners: Dict[str, Union[Partitioner, int]] = {
+        partitioners: dict[str, Union[Partitioner, int]] = {
             "train": IidPartitioner(num_partitions=num_train_partitions),
             "test": IidPartitioner(num_partitions=num_test_partitions),
         }
@@ -419,7 +419,7 @@ def test_mixed_type_partitioners_passes_instantiated_partitioners(self) -> None:
         """Test if an instantiated partitioner is passed directly."""
         num_train_partitions = 100
         num_test_partitions = 100
-        partitioners: Dict[str, Union[Partitioner, int]] = {
+        partitioners: dict[str, Union[Partitioner, int]] = {
             "train": IidPartitioner(num_partitions=num_train_partitions),
             "test": num_test_partitions,
         }
@@ -433,7 +433,7 @@ def test_mixed_type_partitioners_creates_from_int(self) -> None:
         """Test if an IidPartitioner partitioner is created."""
         num_train_partitions = 100
         num_test_partitions = 100
-        partitioners: Dict[str, Union[Partitioner, int]] = {
+        partitioners: dict[str, Union[Partitioner, int]] = {
             "train": IidPartitioner(num_partitions=num_train_partitions),
             "test": num_test_partitions,
         }

diff --git a/datasets/flwr_datasets/metrics/utils.py b/datasets/flwr_datasets/metrics/utils.py
@@ -16,7 +16,7 @@
 
 
 import warnings
-from typing import List, Optional, Union
+from typing import Optional, Union
 
 import pandas as pd
 
@@ -206,7 +206,7 @@ def compute_frequencies(
 
 
 def _compute_counts(
-    labels: Union[List[int], List[str]], unique_labels: Union[List[int], List[str]]
+    labels: Union[list[int], list[str]], unique_labels: Union[list[int], list[str]]
 ) -> pd.Series:
     """Compute the count of labels when taking into account all possible labels.
 
@@ -237,7 +237,7 @@ def _compute_counts(
 
 
 def _compute_frequencies(
-    labels: Union[List[int], List[str]], unique_labels: Union[List[int], List[str]]
+    labels: Union[list[int], list[str]], unique_labels: Union[list[int], list[str]]
 ) -> pd.Series:
     """Compute the distribution of labels when taking into account all possible labels.