Skip to content

Commit

Permalink
Merge branch 'main' into improve-changelog-generation
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesbvll authored Sep 24, 2024
2 parents 70cbba4 + 983217f commit 3cdcb15
Show file tree
Hide file tree
Showing 172 changed files with 97,971 additions and 75,464 deletions.
8 changes: 8 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ profile = black
indent_style = space
indent_size = 2

[*.md]
indent_style = space
indent_size = 2

[*.yml]
indent_style = space
indent_size = 2

[*.toml]
indent_style = space
indent_size = 4
2 changes: 1 addition & 1 deletion .github/workflows/datasets-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
- name: Bootstrap
uses: ./.github/actions/bootstrap
with:
python-version: 3.8
python-version: 3.9
- name: Install dependencies
run: python -m poetry install
- name: Run tests
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/datasets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
# In case of a mismatch, the job has to download Python to install it.
# Note: Due to a bug in actions/setup-python, we have to put "3.10" in
# quotes as it will otherwise assume "3.1"
python: [3.8, 3.9, '3.10', '3.11']
python: ['3.9', '3.10', '3.11']

name: Python ${{ matrix.python }}

Expand Down
79 changes: 79 additions & 0 deletions .github/workflows/update_translations.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
name: Translations

on:
schedule:
- cron: '0 0 * * *' # Runs every day at midnight
workflow_dispatch: # Allows to manually trigger the workflow

jobs:
update-and-pr:
runs-on: ubuntu-22.04
permissions:
contents: write
pull-requests: write
env:
branch-name: auto-update-trans-text
name: Update text
steps:
- uses: actions/checkout@v4

- name: Bootstrap
uses: ./.github/actions/bootstrap
with:
python-version: '3.10'

- name: Install dependencies
run: |
python -m poetry install
pip install sphinx==7.3.7
- name: Install pandoc
uses: nikeee/setup-pandoc@v1

- name: Update text and translations for all locales
run: |
cd doc
make update-text
for langDir in locales/*; do
if [ -d "$langDir" ]; then
lang=$(basename $langDir)
echo "Updating language $lang"
make update-lang lang=$lang
fi
done
- name: Commit changes
run: |
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add doc/locales
git commit -m "Update text and language files"
continue-on-error: true

- name: Calculate diff # Even without doc changes the update-lang command will generate 228 additions and 60 deletions, so we only want to open a PR when there is more
id: calculate_diff
run: |
additions=$(git diff --numstat HEAD^1 | awk '{s+=$1} END {print s}')
deletions=$(git diff --numstat HEAD^1 | awk '{s+=$2} END {print s}')
echo "Additions: $additions"
echo "Deletions: $deletions"
echo "additions=$additions" >> $GITHUB_OUTPUT
echo "deletions=$deletions" >> $GITHUB_OUTPUT
- name: Push changes
if: steps.calculate_diff.outputs.additions > 228 && steps.calculate_diff.outputs.deletions > 60
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: '${{ env.branch-name }}'

- name: Create Pull Request
if: steps.calculate_diff.outputs.additions > 228 && steps.calculate_diff.outputs.deletions > 60
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.GITHUB_TOKEN }}
branch: '${{ env.branch-name }}'
delete-branch: true
title: 'docs(framework:skip) Update source texts for translations (automated)'
body: 'This PR is auto-generated to update text and language files.'
draft: false
25 changes: 13 additions & 12 deletions benchmarks/flowertune-llm/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
![](_static/flower_llm.png)
[![FlowerTune LLM Leaderboard](_static/flower_llm.png)](https://flower.ai/benchmarks/llm-leaderboard)

# FlowerTune LLM Leaderboard

Expand All @@ -13,29 +13,30 @@ As the first step, please register for a Flower account on [flower.ai/login](htt
Then, create a new Python environment and install Flower.

> [!TIP]
> We recommend using `pyenv` and the `virtualenv` plugin to create your environment. Other manager such as Conda would likely work too. Check the [documentation](https://flower.ai/docs/framework/how-to-install-flower.html) for alternative ways of installing Flower.
> We recommend using `pyenv` with the `virtualenv` plugin to create your environment. Other managers, such as Conda, will likely work as well. Check the [documentation](https://flower.ai/docs/framework/how-to-install-flower.html) for alternative ways to install Flower.
```shell
pip install flwr
```

On the new environment, create a new Flower project using the `FlowerTune` template. You will be prompted for a name to give to your project, your username, and for your choice of LLM challenge:
In the new environment, create a new Flower project using the `FlowerTune` template. You will be prompted for a name to give to your project, your username, and for your choice of LLM challenge:
```shell
flwr new --framework=FlowerTune
```

The `flwr new` command will generate a directory with the following structure:

```bash
<project-name>
├── README.md # <- Instructions
├── pyproject.toml # <- Environment dependencies and configs
└── <project_name>
├── client_app.py # <- Flower ClientApp build
├── dataset.py # <- Dataset and tokenizer build
├── models.py # <- Model build
├── server_app.py # <- Flower ServerApp build
└── strategy.py # <- Flower strategy build
<project_name>
├── README.md # Instructions
├── pyproject.toml # Environment dependencies and configs
└── <project_name>
├── __init__.py
├── client_app.py # Flower ClientApp build
├── dataset.py # Dataset and tokenizer build
├── models.py # Model build
├── server_app.py # Flower ServerApp build
└── strategy.py # Flower strategy build
```

This can serve as the starting point for you to build up your own federated LLM fine-tuning methods.
Expand Down
5 changes: 4 additions & 1 deletion benchmarks/flowertune-llm/evaluation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ If you are participating [LLM Leaderboard](https://flower.ai/benchmarks/llm-lead

## How to run

Navigate to the directory corresponding to your selected challenge (`general NLP`, `finance`, `medical`, or `code`) and follow the instructions there to execute the evaluation.
Navigate to the directory corresponding to your selected challenge ([`general NLP`](https://github.com/adap/flower/tree/main/benchmarks/flowertune-llm/evaluation/general-nlp), [`finance`](https://github.com/adap/flower/tree/main/benchmarks/flowertune-llm/evaluation/finance), [`medical`](https://github.com/adap/flower/tree/main/benchmarks/flowertune-llm/evaluation/medical), or [`code`](https://github.com/adap/flower/tree/main/benchmarks/flowertune-llm/evaluation/code)) and follow the instructions there to execute the evaluation.

> [!NOTE]
> If you wish to participate in the LLM Leaderboard, you must not modify the evaluation code and should use the exact command provided in the respective directory to run the evaluation.
Expand Down Expand Up @@ -39,6 +39,9 @@ The default template generated by `flwr new` (see the [Project Creation Instruct
|:----------:|:-----:|:---------:|:--------------:|:---------------:|:-----:|
| Pass@1 (%) | 31.60 | 23.78 | 28.57 | 25.47 | 27.36 |

> [!NOTE]
> In the LLM Leaderboard, we rank the submissions based on the **average** value derived from different evaluation datasets for each challenge.

## Make submission on FlowerTune LLM Leaderboard

Expand Down
10 changes: 5 additions & 5 deletions datasets/flwr_datasets/common/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from concurrent.futures import Future, ThreadPoolExecutor
from enum import Enum, auto
from pathlib import Path
from typing import Any, Dict, List, Optional, Union, cast
from typing import Any, Optional, Union, cast

from flwr_datasets.common.version import package_name, package_version

Expand Down Expand Up @@ -114,7 +114,7 @@ class EventType(str, Enum):
# The type signature is not compatible with mypy, pylint and flake8
# so each of those needs to be disabled for this line.
# pylint: disable-next=no-self-argument,arguments-differ,line-too-long
def _generate_next_value_(name: str, start: int, count: int, last_values: List[Any]) -> Any: # type: ignore # noqa: E501
def _generate_next_value_(name: str, start: int, count: int, last_values: list[Any]) -> Any: # type: ignore # noqa: E501
return name

PING = auto()
Expand All @@ -127,7 +127,7 @@ def _generate_next_value_(name: str, start: int, count: int, last_values: List[A

# Use the ThreadPoolExecutor with max_workers=1 to have a queue
# and also ensure that telemetry calls are not blocking.
state: Dict[str, Union[Optional[str], Optional[ThreadPoolExecutor]]] = {
state: dict[str, Union[Optional[str], Optional[ThreadPoolExecutor]]] = {
# Will be assigned ThreadPoolExecutor(max_workers=1)
# in event() the first time it's required
"executor": None,
Expand All @@ -143,7 +143,7 @@ def _generate_next_value_(name: str, start: int, count: int, last_values: List[A
# pylint: disable-next=unsubscriptable-object
def event(
event_type: EventType,
event_details: Optional[Dict[str, Any]] = None,
event_details: Optional[dict[str, Any]] = None,
) -> Future: # type: ignore
"""Submit create_event to ThreadPoolExecutor to avoid blocking."""
if state["executor"] is None:
Expand All @@ -155,7 +155,7 @@ def event(
return result


def create_event(event_type: EventType, event_details: Optional[Dict[str, Any]]) -> str:
def create_event(event_type: EventType, event_details: Optional[dict[str, Any]]) -> str:
"""Create telemetry event."""
if state["source"] is None:
state["source"] = _get_source_id()
Expand Down
4 changes: 2 additions & 2 deletions datasets/flwr_datasets/common/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
"""Flower Datasets type definitions."""


from typing import Any, List
from typing import Any

import numpy as np
import numpy.typing as npt

NDArray = npt.NDArray[Any]
NDArrayInt = npt.NDArray[np.int_]
NDArrayFloat = npt.NDArray[np.float_]
NDArrays = List[NDArray]
NDArrays = list[NDArray]
5 changes: 2 additions & 3 deletions datasets/flwr_datasets/common/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,14 @@


import importlib.metadata as importlib_metadata
from typing import Tuple


def _check_package(name: str) -> Tuple[str, str]:
def _check_package(name: str) -> tuple[str, str]:
version: str = importlib_metadata.version(name)
return name, version


def _version() -> Tuple[str, str]:
def _version() -> tuple[str, str]:
"""Read and return Flower Dataset package name and version.
Returns
Expand Down
8 changes: 4 additions & 4 deletions datasets/flwr_datasets/federated_dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@


import unittest
from typing import Dict, Union
from typing import Union
from unittest.mock import Mock, patch

import numpy as np
Expand Down Expand Up @@ -385,7 +385,7 @@ def test_dict_of_partitioners_passes_partitioners(self) -> None:
"""Test if partitioners are passed directly (no recreation)."""
num_train_partitions = 100
num_test_partitions = 100
partitioners: Dict[str, Union[Partitioner, int]] = {
partitioners: dict[str, Union[Partitioner, int]] = {
"train": IidPartitioner(num_partitions=num_train_partitions),
"test": IidPartitioner(num_partitions=num_test_partitions),
}
Expand Down Expand Up @@ -419,7 +419,7 @@ def test_mixed_type_partitioners_passes_instantiated_partitioners(self) -> None:
"""Test if an instantiated partitioner is passed directly."""
num_train_partitions = 100
num_test_partitions = 100
partitioners: Dict[str, Union[Partitioner, int]] = {
partitioners: dict[str, Union[Partitioner, int]] = {
"train": IidPartitioner(num_partitions=num_train_partitions),
"test": num_test_partitions,
}
Expand All @@ -433,7 +433,7 @@ def test_mixed_type_partitioners_creates_from_int(self) -> None:
"""Test if an IidPartitioner partitioner is created."""
num_train_partitions = 100
num_test_partitions = 100
partitioners: Dict[str, Union[Partitioner, int]] = {
partitioners: dict[str, Union[Partitioner, int]] = {
"train": IidPartitioner(num_partitions=num_train_partitions),
"test": num_test_partitions,
}
Expand Down
6 changes: 3 additions & 3 deletions datasets/flwr_datasets/metrics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


import warnings
from typing import List, Optional, Union
from typing import Optional, Union

import pandas as pd

Expand Down Expand Up @@ -206,7 +206,7 @@ def compute_frequencies(


def _compute_counts(
labels: Union[List[int], List[str]], unique_labels: Union[List[int], List[str]]
labels: Union[list[int], list[str]], unique_labels: Union[list[int], list[str]]
) -> pd.Series:
"""Compute the count of labels when taking into account all possible labels.
Expand Down Expand Up @@ -237,7 +237,7 @@ def _compute_counts(


def _compute_frequencies(
labels: Union[List[int], List[str]], unique_labels: Union[List[int], List[str]]
labels: Union[list[int], list[str]], unique_labels: Union[list[int], list[str]]
) -> pd.Series:
"""Compute the distribution of labels when taking into account all possible labels.
Expand Down
Loading

0 comments on commit 3cdcb15

Please sign in to comment.