diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5270bf89ae33..3e314c8d1de5 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -7,7 +7,10 @@ README.md @jafermarq @tanertopal @danieljanes # Flower Baselines -/baselines @jafermarq @tanertopal @danieljanes +/baselines @jafermarq @danieljanes + +# Flower Benchmarks +/benchmarks @jafermarq @danieljanes # Flower Datasets /datasets @jafermarq @tanertopal @danieljanes @@ -27,3 +30,9 @@ README.md @jafermarq @tanertopal @danieljanes # GitHub Actions and Workflows /.github/workflows @Robert-Steiner @tanertopal @danieljanes /.github/actions @Robert-Steiner @tanertopal @danieljanes + +# Docker-related files +/.devcontainer @Robert-Steiner @Moep90 +**/Dockerfile @Robert-Steiner @Moep90 +**/*.Dockerfile @Robert-Steiner @Moep90 +src/docker @Robert-Steiner @Moep90 diff --git a/.github/workflows/_docker-build.yml b/.github/workflows/_docker-build.yml index a3373c6e93fa..227b0d7482ae 100644 --- a/.github/workflows/_docker-build.yml +++ b/.github/workflows/_docker-build.yml @@ -36,7 +36,7 @@ permissions: jobs: build: name: Build image - runs-on: ubuntu-22.04 + runs-on: ${{ matrix.platform.runner-os }} timeout-minutes: 180 outputs: build-id: ${{ steps.build-id.outputs.id }} @@ -44,10 +44,8 @@ jobs: fail-fast: true matrix: platform: [ - # build-push action and qemu use different platform names - # therefore we create a map - { name: "amd64", qemu: "", docker: "linux/amd64" }, - { name: "arm64", qemu: "arm64", docker: "linux/arm64" }, + { name: "amd64", docker: "linux/amd64", runner-os: "ubuntu-22.04" }, + { name: "arm64", docker: "linux/arm64", runner-os: "ubuntu-4-core-arm64" }, ] steps: - name: Create build id @@ -79,12 +77,6 @@ jobs: print(build_args, file=fh) print("EOF", file=fh) - - name: Set up QEMU - if: matrix.platform.qemu != '' - uses: docker/setup-qemu-action@49b3bc8e6bdd4a60e6116a5414239cba5943d3cf # v3.2.0 - with: - platforms: ${{ matrix.platform.qemu }} - - name: Extract metadata (tags, labels) for Docker id: meta uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 # v5.5.1 @@ -104,7 +96,7 @@ jobs: uses: Wandalen/wretry.action@6feedb7dedadeb826de0f45ff482b53b379a7844 # v3.5.0 id: build with: - action: docker/build-push-action@2cdde995de11925a030ce8070c3d77a52ffcf1c0 # v5.3.0 + action: docker/build-push-action@5cd11c3a4ced054e52742c5fd54dca954e0edd85 # v6.7.0 attempt_limit: 60 # 60 attempts * (9 secs delay + 1 sec retry) = ~10 mins attempt_delay: 9000 # 9 secs with: | @@ -122,7 +114,7 @@ jobs: touch "/tmp/digests/${digest#sha256:}" - name: Upload digest - uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6 + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: name: digests-${{ steps.build-id.outputs.id }}-${{ matrix.platform.name }} path: /tmp/digests/* diff --git a/.github/workflows/docker-build-main.yml b/.github/workflows/docker-build-main.yml new file mode 100644 index 000000000000..81ef845eae29 --- /dev/null +++ b/.github/workflows/docker-build-main.yml @@ -0,0 +1,69 @@ +name: Build Docker Images Main Branch + +on: + push: + branches: + - 'main' + +jobs: + parameters: + if: github.repository == 'adap/flower' + name: Collect docker build parameters + runs-on: ubuntu-22.04 + timeout-minutes: 10 + outputs: + pip-version: ${{ steps.versions.outputs.pip-version }} + setuptools-version: ${{ steps.versions.outputs.setuptools-version }} + flwr-version-ref: ${{ steps.versions.outputs.flwr-version-ref }} + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - uses: ./.github/actions/bootstrap + id: bootstrap + + - id: versions + run: | + echo "pip-version=${{ steps.bootstrap.outputs.pip-version }}" >> "$GITHUB_OUTPUT" + echo "setuptools-version=${{ steps.bootstrap.outputs.setuptools-version }}" >> "$GITHUB_OUTPUT" + echo "flwr-version-ref=git+${{ github.server_url }}/${{ github.repository }}.git@${{ github.sha }}" >> "$GITHUB_OUTPUT" + + build-docker-base-images: + name: Build base images + if: github.repository == 'adap/flower' + uses: ./.github/workflows/_docker-build.yml + needs: parameters + with: + namespace-repository: flwr/base + file-dir: src/docker/base/ubuntu + build-args: | + PIP_VERSION=${{ needs.parameters.outputs.pip-version }} + SETUPTOOLS_VERSION=${{ needs.parameters.outputs.setuptools-version }} + FLWR_VERSION_REF=${{ needs.parameters.outputs.flwr-version-ref }} + tags: unstable + secrets: + dockerhub-user: ${{ secrets.DOCKERHUB_USERNAME }} + dockerhub-token: ${{ secrets.DOCKERHUB_TOKEN }} + + build-docker-binary-images: + name: Build binary images + if: github.repository == 'adap/flower' + uses: ./.github/workflows/_docker-build.yml + needs: build-docker-base-images + strategy: + fail-fast: false + matrix: + images: [ + { repository: "flwr/superlink", file_dir: "src/docker/superlink" }, + { repository: "flwr/supernode", file_dir: "src/docker/supernode" }, + { repository: "flwr/serverapp", file_dir: "src/docker/serverapp" }, + { repository: "flwr/superexec", file_dir: "src/docker/superexec" }, + { repository: "flwr/clientapp", file_dir: "src/docker/clientapp" } + ] + with: + namespace-repository: ${{ matrix.images.repository }} + file-dir: ${{ matrix.images.file_dir }} + build-args: BASE_IMAGE=unstable + tags: unstable + secrets: + dockerhub-user: ${{ secrets.DOCKERHUB_USERNAME }} + dockerhub-token: ${{ secrets.DOCKERHUB_TOKEN }} diff --git a/.github/workflows/docker-readme.yml b/.github/workflows/docker-readme.yml new file mode 100644 index 000000000000..29dd787d638e --- /dev/null +++ b/.github/workflows/docker-readme.yml @@ -0,0 +1,51 @@ +name: Update Docker READMEs + +on: + push: + branches: + - 'main' + paths: + - 'src/docker/**/README.md' + +jobs: + collect: + if: ${{ github.repository == 'adap/flower' }} + name: Collect Docker READMEs + runs-on: ubuntu-22.04 + timeout-minutes: 10 + outputs: + readme_files: ${{ steps.filter.outputs.readme_files }} + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2 + id: filter + with: + list-files: "json" + filters: | + readme: + - 'src/docker/**/README.md' + + update: + if: ${{ needs.collect.outputs.readme_files != '' && toJson(fromJson(needs.collect.outputs.readme_files)) != '[]' }} + name: Update Docker READMEs + runs-on: ubuntu-22.04 + timeout-minutes: 10 + needs: collect + strategy: + matrix: + readme_path: ${{ fromJSON(needs.collect.outputs.readme_files) }} + + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - id: repository + run: echo "name=$(basename $(dirname ${{ matrix.readme_path }}))" >> "$GITHUB_OUTPUT" + + - name: Docker Hub Description + uses: peter-evans/dockerhub-description@e98e4d1628a5f3be2be7c231e50981aee98723ae # v4.0.0 + with: + repository: flwr/${{ steps.repository.outputs.name }} + readme-filepath: ${{ matrix.readme_path }} + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 49e5b7bf1b36..815d6422848b 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -146,8 +146,6 @@ jobs: if: ${{ github.repository == 'adap/flower' && !github.event.pull_request.head.repo.fork && github.actor != 'dependabot[bot]' }} run: | python -m pip install https://${{ env.ARTIFACT_BUCKET }}/py/${{ needs.wheel.outputs.dir }}/${{ needs.wheel.outputs.short_sha }}/${{ needs.wheel.outputs.whl_path }} - - name: Install e2e components - run: pip install . - name: Download dataset if: ${{ matrix.dataset }} run: python -c "${{ matrix.dataset }}" @@ -172,7 +170,7 @@ jobs: run: ./../test_superlink.sh bare sqlite - name: Run driver test with client authentication if: ${{ matrix.directory == 'e2e-bare-auth' }} - run: ./../test_superlink.sh bare client-auth + run: ./../test_superlink.sh "${{ matrix.directory }}" client-auth - name: Run reconnection test with SQLite database if: ${{ matrix.directory == 'e2e-bare' }} run: ./../test_reconnection.sh sqlite diff --git a/.github/workflows/framework-release.yml b/.github/workflows/framework-release.yml index 812d5b1e398e..e608329872de 100644 --- a/.github/workflows/framework-release.yml +++ b/.github/workflows/framework-release.yml @@ -16,6 +16,8 @@ jobs: if: ${{ github.repository == 'adap/flower' }} name: Publish release runs-on: ubuntu-22.04 + outputs: + flwr-version: ${{ steps.publish.outputs.flwr-version }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -26,10 +28,12 @@ jobs: uses: ./.github/actions/bootstrap - name: Get artifacts and publish + id: publish env: GITHUB_REF: ${{ github.ref }} run: | TAG_NAME=$(echo "${GITHUB_REF_NAME}" | cut -c2-) + echo "flwr-version=$TAG_NAME" >> "$GITHUB_OUTPUT" wheel_name="flwr-${TAG_NAME}-py3-none-any.whl" tar_name="flwr-${TAG_NAME}.tar.gz" @@ -67,8 +71,7 @@ jobs: - id: matrix run: | - FLWR_VERSION=$(poetry version -s) - python dev/build-docker-image-matrix.py --flwr-version "${FLWR_VERSION}" > matrix.json + python dev/build-docker-image-matrix.py --flwr-version "${{ needs.publish.outputs.flwr-version }}" > matrix.json echo "matrix=$(cat matrix.json)" >> $GITHUB_OUTPUT build-base-images: diff --git a/baselines/README.md b/baselines/README.md index 3a84df02d8de..75bcccb68b2a 100644 --- a/baselines/README.md +++ b/baselines/README.md @@ -1,10 +1,9 @@ # Flower Baselines +> [!NOTE] > We are changing the way we structure the Flower baselines. While we complete the transition to the new format, you can still find the existing baselines in the `flwr_baselines` directory. Currently, you can make use of baselines for [FedAvg](https://github.com/adap/flower/tree/main/baselines/flwr_baselines/flwr_baselines/publications/fedavg_mnist), [FedOpt](https://github.com/adap/flower/tree/main/baselines/flwr_baselines/flwr_baselines/publications/adaptive_federated_optimization), and [LEAF-FEMNIST](https://github.com/adap/flower/tree/main/baselines/flwr_baselines/flwr_baselines/publications/leaf/femnist). -> The documentation below has been updated to reflect the new way of using Flower baselines. - ## Structure @@ -15,17 +14,15 @@ baselines// ├── README.md ├── pyproject.toml └── - ├── *.py # several .py files including main.py and __init__.py - └── conf - └── *.yaml # one or more Hydra config files + └── *.py # several .py files ``` -Please note that some baselines might include additional files (e.g. a `requirements.txt`) or a hierarchy of `.yaml` files for [Hydra](https://hydra.cc/). ## Running the baselines -Each baseline is self-contained in its own directory. Furthermore, each baseline defines its own Python environment using [Poetry](https://python-poetry.org/docs/) via a `pyproject.toml` file and [`pyenv`](https://github.com/pyenv/pyenv). If you haven't setup `Poetry` and `pyenv` already on your machine, please take a look at the [Documentation](https://flower.ai/docs/baselines/how-to-use-baselines.html#setting-up-your-machine) for a guide on how to do so. +> [!NOTE] +> We are in the process of migrating all baselines to use `flwr run`. Those baselines that remain using the previous system (i.e. using [Poetry](https://python-poetry.org/), [Hydra](https://hydra.cc/) and [start_simulation](https://flower.ai/docs/framework/ref-api/flwr.simulation.start_simulation.html)) might require you to first setup `Poetry` and `pyenv` already on your machine, please take a look at the [Documentation](https://flower.ai/docs/baselines/how-to-use-baselines.html#setting-up-your-machine) for a guide on how to do so. -Assuming `pyenv` and `Poetry` are already installed on your system. Running a baseline can be done by: +Each baseline is self-contained in its own directory. To run a baseline: 1. Cloning the flower repository @@ -34,11 +31,7 @@ Assuming `pyenv` and `Poetry` are already installed on your system. Running a ba ``` 2. Navigate inside the directory of the baseline you'd like to run. -3. Follow the `[Environment Setup]` instructions in the `README.md`. In most cases this will require you to just do: - - ```bash - poetry install - ``` +3. Follow the `[Environment Setup]` instructions in the `README.md`. 4. Run the baseline as indicated in the `[Running the Experiments]` section in the `README.md` or in the `[Expected Results]` section to reproduce the experiments in the paper. @@ -46,17 +39,22 @@ Assuming `pyenv` and `Poetry` are already installed on your system. Running a ba Do you have a new federated learning paper and want to add a new baseline to Flower? Or do you want to add an experiment to an existing baseline paper? Great, we really appreciate your contribution !! +> [!TIP] +> A more verbose version of these steps can be found in the [Flower Baselines documentation](https://flower.ai/docs/baselines/how-to-contribute-baselines.html). + The steps to follow are: +1. Create a new Python 3.10 environment and install Flower (`pip install flwr`) 1. Fork the Flower repo and clone it into your machine. -2. Navigate to the `baselines/` directory, choose a single-word (and **lowercase**) name for your baseline, and from there run: +2. Navigate to the `baselines/` directory, from there and with your environment activated, run: ```bash - # This will create a new directory with the same structure as `baseline_template`. - ./dev/create-baseline.sh + # Choose option "Flower Baseline" when prompted + flwr new ``` -3. Then, go inside your baseline directory and continue with the steps detailed in `EXTENDED_README.md` and `README.md`. -4. Once your code is ready and you have checked that following the instructions in your `README.md` the Python environment can be created correctly and that running the code following your instructions can reproduce the experiments in the paper, you just need to create a Pull Request (PR). Then, the process to merge your baseline into the Flower repo will begin! +3. Then, go inside your baseline directory and continue with the steps detailed in the `README.md`. +4. Once your code is ready, check that you have completed all the sections in the `README.md` and that, if a new environment is created, your baseline still runs (i.e. play the role of a person running the baseline you want to contribute). +5. Create a Pull Request (PR). Then, the process to merge your baseline into the Flower repo will begin! Further resources: diff --git a/baselines/baseline_template/EXTENDED_README.md b/baselines/baseline_template/EXTENDED_README.md deleted file mode 100644 index 9c8f5bc72fa9..000000000000 --- a/baselines/baseline_template/EXTENDED_README.md +++ /dev/null @@ -1,123 +0,0 @@ - -# Extended Readme - -> The baselines are expected to run in a machine running Ubuntu 22.04 - -While `README.md` should include information about the baseline you implement and how to run it, this _extended_ readme provides info on what's the expected directory structure for a new baseline and more generally the instructions to follow before your baseline can be merged into the Flower repository. Please follow closely these instructions. It is likely that you have already completed steps 1-2. - -1. Fork the Flower repository and clone it. -2. Navigate to the `baselines/` directory and from there run: - ```bash - # This will create a new directory with the same structure as this `baseline_template` directory. - ./dev/create-baseline.sh - ``` -3. All your code and configs should go into a sub-directory with the same name as the name of your baseline. - * The sub-directory contains a series of Python scripts that you can edit. Please stick to these files and consult with us if you need additional ones. - * There is also a basic config structure in `/conf` ready be parsed by [Hydra](https://hydra.cc/) when executing your `main.py`. -4. Therefore, the directory structure in your baseline should look like: - ```bash - baselines/ - ├── README.md # describes your baseline and everything needed to use it - ├── EXTENDED_README.md # to remove before creating your PR - ├── pyproject.toml # details your Python environment - └── - ├── *.py # several .py files including main.py and __init__.py - └── conf - └── *.yaml # one or more Hydra config files - - ``` -> :warning: Make sure the variable `name` in `pyproject.toml` is set to the name of the sub-directory containing all your code. - -5. Add your dependencies to the `pyproject.toml` (see below a few examples on how to do it). Read more about Poetry below in this `EXTENDED_README.md`. -6. Regularly check that your coding style and the documentation you add follow good coding practices. To test whether your code meets the requirements, please run the following: - ```bash - # After activating your environment and from your baseline's directory - cd .. # to go to the top-level directory of all baselines - ./dev/test-baseline.sh - ./dev/test-baseline-structure.sh - ``` - Both `test-baseline.sh` and `test-baseline-structure.sh` will also be automatically run when you create a PR, and both tests need to pass for the baseline to be merged. - To automatically solve some formatting issues and apply easy fixes, please run the formatting script: - ```bash - # After activating your environment and from your baseline's directory - cd .. # to go to the top-level directory of all baselines - ./dev/format-baseline.sh - ``` -7. Ensure that the Python environment for your baseline can be created without errors by simply running `poetry install` and that this is properly described later when you complete the `Environment Setup` section in `README.md`. This is specially important if your environment requires additional steps after doing `poetry install`. -8. Ensure that your baseline runs with default arguments by running `poetry run python -m .main`. Then, describe this and other forms of running your code in the `Running the Experiments` section in `README.md`. -9. Once your code is ready and you have checked: - * that following the instructions in your `README.md` the Python environment can be created correctly - - * that running the code following your instructions can reproduce the experiments in the paper - - , then you just need to create a Pull Request (PR) to kickstart the process of merging your baseline into the Flower repository. - -> Once you are happy to merge your baseline contribution, please delete this `EXTENDED_README.md` file. - - -## About Poetry - -We use Poetry to manage the Python environment for each individual baseline. You can follow the instructions [here](https://python-poetry.org/docs/) to install Poetry in your machine. - - -### Specifying a Python Version (optional) -By default, Poetry will use the Python version in your system. In some settings, you might want to specify a particular version of Python to use inside your Poetry environment. You can do so with [`pyenv`](https://github.com/pyenv/pyenv). Check the documentation for the different ways of installing `pyenv`, but one easy way is using the [automatic installer](https://github.com/pyenv/pyenv-installer): -```bash -curl https://pyenv.run | bash # then, don't forget links to your .bashrc/.zshrc -``` - -You can then install any Python version with `pyenv install ` (e.g. `pyenv install 3.9.17`). Then, in order to use that version for your baseline, you'd do the following: - -```bash -# cd to your baseline directory (i.e. where the `pyproject.toml` is) -pyenv local - -# set that version for poetry -poetry env use - -# then you can install your Poetry environment (see the next setp) -``` - -### Installing Your Environment -With the Poetry tool already installed, you can create an environment for this baseline with commands: -```bash -# run this from the same directory as the `pyproject.toml` file is -poetry install -``` - -This will create a basic Python environment with just Flower and additional packages, including those needed for simulation. Next, you should add the dependencies for your code. It is **critical** that you fix the version of the packages you use using a `=` not a `=^`. You can do so via [`poetry add`](https://python-poetry.org/docs/cli/#add). Below are some examples: - -```bash -# For instance, if you want to install tqdm -poetry add tqdm==4.65.0 - -# If you already have a requirements.txt, you can add all those packages (but ensure you have fixed the version) in one go as follows: -poetry add $( cat requirements.txt ) -``` -With each `poetry add` command, the `pyproject.toml` gets automatically updated so you don't need to keep that `requirements.txt` as part of this baseline. - - -More critically however, is adding your ML framework of choice to the list of dependencies. For some frameworks you might be able to do so with the `poetry add` command. Check [the Poetry documentation](https://python-poetry.org/docs/cli/#add) for how to add packages in various ways. For instance, let's say you want to use PyTorch: - -```bash -# with plain `pip` you'd run a command such as: -pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117 - -# to add the same 3 dependencies to your Poetry environment you'd need to add the URL to the wheel that the above pip command auto-resolves for you. -# You can find those wheels in `https://download.pytorch.org/whl/cu117`. Copy the link and paste it after the `poetry add` command. -# For instance to add `torch==1.13.1+cu117` and a x86 Linux system with Python3.8 you'd: -poetry add https://download.pytorch.org/whl/cu117/torch-1.13.1%2Bcu117-cp38-cp38-linux_x86_64.whl -# you'll need to repeat this for both `torchvision` and `torchaudio` -``` -The above is just an example of how you can add these dependencies. Please refer to the Poetry documentation to extra reference. - -If all attempts fail, you can still install packages via standard `pip`. You'd first need to source/activate your Poetry environment. -```bash -# first ensure you have created your environment -# and installed the base packages provided in the template -poetry install - -# then activate it -poetry shell -``` -Now you are inside your environment (pretty much as when you use `virtualenv` or `conda`) so you can install further packages with `pip`. Please note that, unlike with `poetry add`, these extra requirements won't be captured by `pyproject.toml`. Therefore, please ensure that you provide all instructions needed to: (1) create the base environment with Poetry and (2) install any additional dependencies via `pip` when you complete your `README.md`. \ No newline at end of file diff --git a/baselines/baseline_template/LICENSE b/baselines/baseline_template/LICENSE deleted file mode 100644 index d64569567334..000000000000 --- a/baselines/baseline_template/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/baselines/baseline_template/README.md b/baselines/baseline_template/README.md deleted file mode 100644 index ee6e1e96976f..000000000000 --- a/baselines/baseline_template/README.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -title: title of the paper -url: URL to the paper page (not the pdf) -labels: [label1, label2] # please add between 4 and 10 single-word (maybe two-words) labels (e.g. system heterogeneity, image classification, asynchronous, weight sharing, cross-silo). Do not use "" -dataset: [dataset1, dataset2] # list of datasets you include in your baseline. Do not use "" ---- - -# :warning: *_Title of your baseline_* - -> Note: If you use this baseline in your work, please remember to cite the original authors of the paper as well as the Flower paper. - -> :warning: This is the template to follow when creating a new Flower Baseline. Please follow the instructions in `EXTENDED_README.md` - -> :warning: Please follow the instructions carefully. You can see the [FedProx-MNIST baseline](https://github.com/adap/flower/tree/main/baselines/fedprox) as an example of a baseline that followed this guide. - -> :warning: Please complete the metadata section at the very top of this README. This generates a table at the top of the file that will facilitate indexing baselines. - -**Paper:** :warning: *_add the URL of the paper page (not to the .pdf). For instance if you link a paper on ArXiv, add here the URL to the abstract page (e.g. https://arxiv.org/abs/1512.03385). If your paper is in from a journal or conference proceedings, please follow the same logic._* - -**Authors:** :warning: *_list authors of the paper_* - -**Abstract:** :warning: *_add here the abstract of the paper you are implementing_* - - -## About this baseline - -**What’s implemented:** :warning: *_Concisely describe what experiment(s) in the publication can be replicated by running the code. Please only use a few sentences. Start with: “The code in this directory …”_* - -**Datasets:** :warning: *_List the datasets you used (if you used a medium to large dataset, >10GB please also include the sizes of the dataset)._* - -**Hardware Setup:** :warning: *_Give some details about the hardware (e.g. a server with 8x V100 32GB and 256GB of RAM) you used to run the experiments for this baseline. Someone out there might not have access to the same resources you have so, could list the absolute minimum hardware needed to run the experiment in a reasonable amount of time ? (e.g. minimum is 1x 16GB GPU otherwise a client model can’t be trained with a sufficiently large batch size). Could you test this works too?_* - -**Contributors:** :warning: *_let the world know who contributed to this baseline. This could be either your name, your name and affiliation at the time, or your GitHub profile name if you prefer. If multiple contributors signed up for this baseline, please list yourself and your colleagues_* - - -## Experimental Setup - -**Task:** :warning: *_what’s the primary task that is being federated? (e.g. image classification, next-word prediction). If you have experiments for several, please list them_* - -**Model:** :warning: *_provide details about the model you used in your experiments (if more than use a list). If your model is small, describing it as a table would be :100:. Some FL methods do not use an off-the-shelve model (e.g. ResNet18) instead they create your own. If this is your case, please provide a summary here and give pointers to where in the paper (e.g. Appendix B.4) is detailed._* - -**Dataset:** :warning: *_Earlier you listed already the datasets that your baseline uses. Now you should include a breakdown of the details about each of them. Please include information about: how the dataset is partitioned (e.g. LDA with alpha 0.1 as default and all clients have the same number of training examples; or each client gets assigned a different number of samples following a power-law distribution with each client only instances of 2 classes)? if your dataset is naturally partitioned just state “naturally partitioned”; how many partitions there are (i.e. how many clients)? Please include this an all information relevant about the dataset and its partitioning into a table._* - -**Training Hyperparameters:** :warning: *_Include a table with all the main hyperparameters in your baseline. Please show them with their default value._* - - -## Environment Setup - -:warning: _The Python environment for all baselines should follow these guidelines in the `EXTENDED_README`. Specify the steps to create and activate your environment. If there are any external system-wide requirements, please include instructions for them too. These instructions should be comprehensive enough so anyone can run them (if non standard, describe them step-by-step)._ - - -## Running the Experiments - -:warning: _Provide instructions on the steps to follow to run all the experiments._ -```bash -# The main experiment implemented in your baseline using default hyperparameters (that should be setup in the Hydra configs) should run (including dataset download and necessary partitioning) by executing the command: - -poetry run python -m .main # where is the name of this directory and that of the only sub-directory in this directory (i.e. where all your source code is) - -# If you are using a dataset that requires a complicated download (i.e. not using one natively supported by TF/PyTorch) + preprocessing logic, you might want to tell people to run one script first that will do all that. Please ensure the download + preprocessing can be configured to suit (at least!) a different download directory (and use as default the current directory). The expected command to run to do this is: - -poetry run python -m .dataset_preparation - -# It is expected that you baseline supports more than one dataset and different FL settings (e.g. different number of clients, dataset partitioning methods, etc). Please provide a list of commands showing how these experiments are run. Include also a short explanation of what each one does. Here it is expected you'll be using the Hydra syntax to override the default config. - -poetry run python -m .main -. -. -. -poetry run python -m .main -``` - - -## Expected Results - -:warning: _Your baseline implementation should replicate several of the experiments in the original paper. Please include here the exact command(s) needed to run each of those experiments followed by a figure (e.g. a line plot) or table showing the results you obtained when you ran the code. Below is an example of how you can present this. Please add command followed by results for all your experiments._ - -```bash -# it is likely that for one experiment you need to sweep over different hyperparameters. You are encouraged to use Hydra's multirun functionality for this. This is an example of how you could achieve this for some typical FL hyperparameteres - -poetry run python -m .main --multirun num_client_per_round=5,10,50 dataset=femnist,cifar10 -# the above command will run a total of 6 individual experiments (because 3client_configs x 2datasets = 6 -- you can think of it as a grid). - -[Now show a figure/table displaying the results of the above command] - -# add more commands + plots for additional experiments. -``` diff --git a/baselines/baseline_template/baseline_template/__init__.py b/baselines/baseline_template/baseline_template/__init__.py deleted file mode 100644 index a5e567b59135..000000000000 --- a/baselines/baseline_template/baseline_template/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Template baseline package.""" diff --git a/baselines/baseline_template/baseline_template/client.py b/baselines/baseline_template/baseline_template/client.py deleted file mode 100644 index d2e2206111f3..000000000000 --- a/baselines/baseline_template/baseline_template/client.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Define your client class and a function to construct such clients. - -Please overwrite `flwr.client.NumPyClient` or `flwr.client.Client` and create a function -to instantiate your client. -""" diff --git a/baselines/baseline_template/baseline_template/conf/base.yaml b/baselines/baseline_template/baseline_template/conf/base.yaml deleted file mode 100644 index 2d65b3b989b2..000000000000 --- a/baselines/baseline_template/baseline_template/conf/base.yaml +++ /dev/null @@ -1,17 +0,0 @@ ---- -# this is the config that will be loaded as default by main.py -# Please follow the provided structure (this will ensuring all baseline follow -# a similar configuration structure and hence be easy to customise) - -dataset: - # dataset config - -model: - # model config - -strategy: - _target_: # points to your strategy (either custom or exiting in Flower) - # rest of strategy config - -client: - # client config diff --git a/baselines/baseline_template/baseline_template/dataset.py b/baselines/baseline_template/baseline_template/dataset.py deleted file mode 100644 index 5e436abe12fb..000000000000 --- a/baselines/baseline_template/baseline_template/dataset.py +++ /dev/null @@ -1,10 +0,0 @@ -"""Handle basic dataset creation. - -In case of PyTorch it should return dataloaders for your dataset (for both the clients -and the server). If you are using a custom dataset class, this module is the place to -define it. If your dataset requires to be downloaded (and this is not done -automatically -- e.g. as it is the case for many dataset in TorchVision) and -partitioned, please include all those functions and logic in the -`dataset_preparation.py` module. You can use all those functions from functions/methods -defined here of course. -""" diff --git a/baselines/baseline_template/baseline_template/dataset_preparation.py b/baselines/baseline_template/baseline_template/dataset_preparation.py deleted file mode 100644 index bd3440b9276b..000000000000 --- a/baselines/baseline_template/baseline_template/dataset_preparation.py +++ /dev/null @@ -1,34 +0,0 @@ -"""Handle the dataset partitioning and (optionally) complex downloads. - -Please add here all the necessary logic to either download, uncompress, pre/post-process -your dataset (or all of the above). If the desired way of running your baseline is to -first download the dataset and partition it and then run the experiments, please -uncomment the lines below and tell us in the README.md (see the "Running the Experiment" -block) that this file should be executed first. -""" -# import hydra -# from hydra.core.hydra_config import HydraConfig -# from hydra.utils import call, instantiate -# from omegaconf import DictConfig, OmegaConf - - -# @hydra.main(config_path="conf", config_name="base", version_base=None) -# def download_and_preprocess(cfg: DictConfig) -> None: -# """Does everything needed to get the dataset. - -# Parameters -# ---------- -# cfg : DictConfig -# An omegaconf object that stores the hydra config. -# """ - -# ## 1. print parsed config -# print(OmegaConf.to_yaml(cfg)) - -# # Please include here all the logic -# # Please use the Hydra config style as much as possible specially -# # for parts that can be customised (e.g. how data is partitioned) - -# if __name__ == "__main__": - -# download_and_preprocess() diff --git a/baselines/baseline_template/baseline_template/main.py b/baselines/baseline_template/baseline_template/main.py deleted file mode 100644 index 25ae1bec6a10..000000000000 --- a/baselines/baseline_template/baseline_template/main.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Create and connect the building blocks for your experiments; start the simulation. - -It includes processioning the dataset, instantiate strategy, specify how the global -model is going to be evaluated, etc. At the end, this script saves the results. -""" -# these are the basic packages you'll need here -# feel free to remove some if aren't needed -import hydra -from omegaconf import DictConfig, OmegaConf - - -@hydra.main(config_path="conf", config_name="base", version_base=None) -def main(cfg: DictConfig) -> None: - """Run the baseline. - - Parameters - ---------- - cfg : DictConfig - An omegaconf object that stores the hydra config. - """ - # 1. Print parsed config - print(OmegaConf.to_yaml(cfg)) - - # 2. Prepare your dataset - # here you should call a function in datasets.py that returns whatever is needed to: - # (1) ensure the server can access the dataset used to evaluate your model after - # aggregation - # (2) tell each client what dataset partitions they should use (e.g. a this could - # be a location in the file system, a list of dataloader, a list of ids to extract - # from a dataset, it's up to you) - - # 3. Define your clients - # Define a function that returns another function that will be used during - # simulation to instantiate each individual client - # client_fn = client.() - - # 4. Define your strategy - # pass all relevant argument (including the global dataset used after aggregation, - # if needed by your method.) - # strategy = instantiate(cfg.strategy, ) - - # 5. Start Simulation - # history = fl.simulation.start_simulation() - - # 6. Save your results - # Here you can save the `history` returned by the simulation and include - # also other buffers, statistics, info needed to be saved in order to later - # on generate the plots you provide in the README.md. You can for instance - # access elements that belong to the strategy for example: - # data = strategy.get_my_custom_data() -- assuming you have such method defined. - # Hydra will generate for you a directory each time you run the code. You - # can retrieve the path to that directory with this: - # save_path = HydraConfig.get().runtime.output_dir - - -if __name__ == "__main__": - main() diff --git a/baselines/baseline_template/baseline_template/models.py b/baselines/baseline_template/baseline_template/models.py deleted file mode 100644 index 71fa553d1f59..000000000000 --- a/baselines/baseline_template/baseline_template/models.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Define our models, and training and eval functions. - -If your model is 100% off-the-shelf (e.g. directly from torchvision without requiring -modifications) you might be better off instantiating your model directly from the Hydra -config. In this way, swapping your model for another one can be done without changing -the python code at all -""" diff --git a/baselines/baseline_template/baseline_template/server.py b/baselines/baseline_template/baseline_template/server.py deleted file mode 100644 index 2fd7d42cde5a..000000000000 --- a/baselines/baseline_template/baseline_template/server.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Create global evaluation function. - -Optionally, also define a new Server class (please note this is not needed in most -settings). -""" diff --git a/baselines/baseline_template/baseline_template/strategy.py b/baselines/baseline_template/baseline_template/strategy.py deleted file mode 100644 index 17436c401c30..000000000000 --- a/baselines/baseline_template/baseline_template/strategy.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Optionally define a custom strategy. - -Needed only when the strategy is not yet implemented in Flower or because you want to -extend or modify the functionality of an existing strategy. -""" diff --git a/baselines/baseline_template/baseline_template/utils.py b/baselines/baseline_template/baseline_template/utils.py deleted file mode 100644 index 9a831719d623..000000000000 --- a/baselines/baseline_template/baseline_template/utils.py +++ /dev/null @@ -1,6 +0,0 @@ -"""Define any utility function. - -They are not directly relevant to the other (more FL specific) python modules. For -example, you may define here things like: loading a model from a checkpoint, saving -results, plotting. -""" diff --git a/baselines/baseline_template/pyproject.toml b/baselines/baseline_template/pyproject.toml deleted file mode 100644 index 31f1ee7bfe6d..000000000000 --- a/baselines/baseline_template/pyproject.toml +++ /dev/null @@ -1,137 +0,0 @@ -[build-system] -requires = ["poetry-core>=1.4.0"] -build-backend = "poetry.masonry.api" - -[tool.poetry] -name = "" # <----- Ensure it matches the name of your baseline directory containing all the source code -version = "1.0.0" -description = "Flower Baselines" -license = "Apache-2.0" -authors = ["The Flower Authors "] -readme = "README.md" -homepage = "https://flower.ai" -repository = "https://github.com/adap/flower" -documentation = "https://flower.ai" -classifiers = [ - "Development Status :: 3 - Alpha", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: Apache Software License", - "Operating System :: MacOS :: MacOS X", - "Operating System :: POSIX :: Linux", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: Implementation :: CPython", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - "Topic :: Scientific/Engineering :: Mathematics", - "Topic :: Software Development", - "Topic :: Software Development :: Libraries", - "Topic :: Software Development :: Libraries :: Python Modules", - "Typing :: Typed", -] - -[tool.poetry.dependencies] -python = ">=3.8.15, <3.12.0" # don't change this -flwr = { extras = ["simulation"], version = "1.5.0" } -hydra-core = "1.3.2" # don't change this - -[tool.poetry.dev-dependencies] -isort = "==5.13.2" -black = "==24.2.0" -docformatter = "==1.7.5" -mypy = "==1.4.1" -pylint = "==2.8.2" -flake8 = "==3.9.2" -pytest = "==6.2.4" -pytest-watch = "==4.2.0" -ruff = "==0.0.272" -types-requests = "==2.27.7" - -[tool.isort] -line_length = 88 -indent = " " -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -use_parentheses = true - -[tool.black] -line-length = 88 -target-version = ["py38", "py39", "py310", "py311"] - -[tool.pytest.ini_options] -minversion = "6.2" -addopts = "-qq" -testpaths = [ - "flwr_baselines", -] - -[tool.mypy] -ignore_missing_imports = true -strict = false -plugins = "numpy.typing.mypy_plugin" - -[tool.pylint."MESSAGES CONTROL"] -disable = "bad-continuation,duplicate-code,too-few-public-methods,useless-import-alias" -good-names = "i,j,k,_,x,y,X,Y" -signature-mutators = "hydra.main.main" - -[tool.pylint.typecheck] -generated-members = "numpy.*, torch.*, tensorflow.*" - -[[tool.mypy.overrides]] -module = [ - "importlib.metadata.*", - "importlib_metadata.*", -] -follow_imports = "skip" -follow_imports_for_stubs = true -disallow_untyped_calls = false - -[[tool.mypy.overrides]] -module = "torch.*" -follow_imports = "skip" -follow_imports_for_stubs = true - -[tool.docformatter] -wrap-summaries = 88 -wrap-descriptions = 88 - -[tool.ruff] -target-version = "py38" -line-length = 88 -select = ["D", "E", "F", "W", "B", "ISC", "C4"] -fixable = ["D", "E", "F", "W", "B", "ISC", "C4"] -ignore = ["B024", "B027"] -exclude = [ - ".bzr", - ".direnv", - ".eggs", - ".git", - ".hg", - ".mypy_cache", - ".nox", - ".pants.d", - ".pytype", - ".ruff_cache", - ".svn", - ".tox", - ".venv", - "__pypackages__", - "_build", - "buck-out", - "build", - "dist", - "node_modules", - "venv", - "proto", -] - -[tool.ruff.pydocstyle] -convention = "numpy" diff --git a/baselines/dev/create-baseline.sh b/baselines/dev/create-baseline.sh deleted file mode 100755 index 53cd79c569aa..000000000000 --- a/baselines/dev/create-baseline.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -# This script duplicates the `baseline_template` directory and changes its name -# to the one you specify when running this script. That name is also used to -# rename the subdirectory inside your new baseline directory as well as to set -# the Python package name that Poetry will build - -set -e -cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/../ - -template="baseline_template" -name=$1 - -# copying directory -echo "Copying '$template' and renaming it to '$name'" -cp -r $template $name - -# renaming sub-directory -echo "Renaming sub-directory as '$name'" -mv $name/$template $name/$name - -# adjusting package name in pyproject.toml -cd $name -if [[ "$OSTYPE" == "darwin"* ]]; then - sed -i '' -e "s//$name/" pyproject.toml -else - sed -i -e "s//$name/" pyproject.toml -fi - -echo "!!! Your directory for your baseline '$name' is ready." diff --git a/baselines/doc/source/conf.py b/baselines/doc/source/conf.py index ecc3482c6fce..974c264a6220 100644 --- a/baselines/doc/source/conf.py +++ b/baselines/doc/source/conf.py @@ -37,7 +37,7 @@ author = "The Flower Authors" # The full version, including alpha/beta/rc tags -release = "1.10.0" +release = "1.11.0" # -- General configuration --------------------------------------------------- diff --git a/baselines/doc/source/how-to-contribute-baselines.rst b/baselines/doc/source/how-to-contribute-baselines.rst index b568e73f1c11..429ac714c1aa 100644 --- a/baselines/doc/source/how-to-contribute-baselines.rst +++ b/baselines/doc/source/how-to-contribute-baselines.rst @@ -6,16 +6,14 @@ Do you have a new federated learning paper and want to add a new baseline to Flo The goal of Flower Baselines is to reproduce experiments from popular papers to accelerate researchers by enabling faster comparisons to new strategies, datasets, models, and federated pipelines in general. Before you start to work on a new baseline or experiment, please check the `Flower Issues `_ or `Flower Pull Requests `_ to see if someone else is already working on it. Please open a new issue if you are planning to work on a new baseline or experiment with a short description of the corresponding paper and the experiment you want to contribute. +If you are proposing a brand new baseline, please indicate what experiments from the paper are planning to include. Requirements ------------ -Contributing a new baseline is really easy. You only have to make sure that your federated learning experiments are running with Flower and replicate the results of a paper. Flower baselines need to make use of: +Contributing a new baseline is really easy. You only have to make sure that your federated learning experiments run with Flower, use `Flower Datasets `_, and replicate the results of a paper. +Preferably, the baselines make use of PyTorch, but other ML frameworks are also welcome. The baselines are expected to run in a machine with Ubuntu 22.04, but if yours runs also on macOS even better! -* `Poetry `_ to manage the Python environment. -* `Hydra `_ to manage the configuration files for your experiments. - -You can find more information about how to setup Poetry in your machine in the ``EXTENDED_README.md`` that is generated when you prepare your baseline. Add a new Flower Baseline ------------------------- @@ -27,11 +25,18 @@ Let's say you want to contribute the code of your most recent Federated Learning #. **Get the Flower source code on your machine** #. Fork the Flower codebase: go to the `Flower GitHub repo `_ and fork the code (click the *Fork* button in the top-right corner and follow the instructions) #. Clone the (forked) Flower source code: :code:`git clone git@github.com:[your_github_username]/flower.git` - #. Open the code in your favorite editor. -#. **Use the provided script to create your baseline directory** - #. Navigate to the baselines directory and run :code:`./dev/create-baseline.sh fedawesome` - #. A new directory in :code:`baselines/fedawesome` is created. - #. Follow the instructions in :code:`EXTENDED_README.md` and :code:`README.md` in your baseline directory. +#. **Create a new baseline using the template** + #. Create a new Python environment with Python 3.10 (we recommend doing this with `pyenv `_) + #. Install flower with: :code:`pip install flwr`. + #. Navigate to the baselines directory and run: :code:`flwr new fedawesome`. When prompted, choose the option :code:`Flower Baseline`. + #. A new directory in :code:`baselines/fedawesome` is created with the structure needed for a Flower Baseline. + #. Follow the instructions in the :code:`README.md` in your baseline directory. + + .. tip:: + At this point, your baseline contains source code showing how a simple :code:`PyTorch+CIFAR10` project can be built with Flower. + You can run it directly by executing :code:`flwr run .` from inside the directory of your baseline. Update the code with that + needed to implement your baseline. + #. **Open a pull request** #. Stage your changes: :code:`git add .` #. Commit & push: :code:`git commit -m "Create new FedAwesome baseline" ; git push` @@ -49,15 +54,18 @@ Further reading: Usability --------- -Flower is known and loved for its usability. Therefore, make sure that your baseline or experiment can be executed with a single command such as: +Flower is known and loved for its usability. Therefore, make sure that your baseline or experiment can be executed with a single command after installing the baseline project: .. code-block:: bash - poetry run python -m .main - - # or, once sourced into your environment - python -m .main + # Install the baseline project + pip install -e . + + # Run the baseline using default config + flwr run . + + # Run the baseline overriding the config + flwr run . --run-config lr=0.01,num-server-rounds=200 -We provide you with a `template-baseline `_ to use as guidance when contributing your baseline. Having all baselines follow a homogenous structure helps users to tryout many baselines without the overheads of having to understand each individual codebase. Similarly, by using Hydra throughout, users will immediately know how to parameterise your experiments directly from the command line. -We look forward to your contribution! +We look forward to your contribution! \ No newline at end of file diff --git a/baselines/doc/source/how-to-use-baselines.rst b/baselines/doc/source/how-to-use-baselines.rst index 4704a9b6074e..ec65f8f7d5ee 100644 --- a/baselines/doc/source/how-to-use-baselines.rst +++ b/baselines/doc/source/how-to-use-baselines.rst @@ -5,7 +5,6 @@ Use Baselines We are changing the way we structure the Flower baselines. While we complete the transition to the new format, you can still find the existing baselines and use them: `baselines (old) `_. Currently, you can make use of baselines for `FedAvg `_, `FedOpt `_, and `LEAF-FEMNIST `_. - The documentation below has been updated to reflect the new way of using Flower baselines. Structure --------- @@ -15,87 +14,116 @@ All baselines are available in the directory `baselines / + ├── LICENSE ├── README.md - ├── pyproject.toml + ├── pyproject.toml # defines dependencies + ├── _static # optionally a directory to save plots └── - ├── *.py # several .py files including main.py and __init__.py - └── conf - └── *.yaml # one or more Hydra config files - -Please note that some baselines might include additional files (e.g. a :code:`requirements.txt`) or a hierarchy of :code:`.yaml` files for `Hydra `_. + └── *.py # several .py files Setting up your machine ----------------------- -.. note:: - Flower baselines are designed to run on Ubuntu 22.04. While a GPU is not required to run the baselines, some of the more computationally demanding ones do benefit from GPU acceleration. +.. tip:: + Flower baselines are designed to run on Ubuntu 22.04 and Python 3.10. While a GPU is not required to run the baselines, some of the more computationally demanding ones do benefit from GPU acceleration. + All baselines are expected to make use of `pyenv `_. -Common to all baselines is `Poetry `_, a tool to manage Python dependencies. Baselines also make use of `Pyenv `_. You'll need to install both on your system before running a baseline. What follows is a step-by-step guide on getting :code:`pyenv` and :code:`Poetry` installed on your system. +.. note:: + We are in the process of migrating all baselines to use `flwr run`. Those that haven't yet been migrated still make use of `Poetry `_, a tool to manage Python dependencies. + Identifying whether the baseline you want to run requires Poetry or not is easy: check if the `Environment Setup` section in the baseline readme mentions Poetry. + Follow the instructions later in this section if you need to setup Poetry in your system. -Let's begin by installing :code:`pyenv`. We'll be following the standard procedure. Please refer to the `pyenv docs `_ for alternative ways of installing it. +Let's begin by installing :code:`pyenv`. We'll be following the standard procedure. Please refer to the `pyenv docs `_ for alternative ways of installing it, including for platforms other than Ubuntu. .. code-block:: bash - # first install a few packages needed later for pyenv - sudo apt install build-essential zlib1g-dev libssl-dev libsqlite3-dev \ - libreadline-dev libbz2-dev libffi-dev liblzma-dev + # first install a few packages needed later for pyenv + sudo apt install build-essential zlib1g-dev libssl-dev libsqlite3-dev \ + libreadline-dev libbz2-dev libffi-dev liblzma-dev - # now clone pyenv into your home directory (this is the default way of installing pyenv) - git clone https://github.com/pyenv/pyenv.git ~/.pyenv + # now clone pyenv into your home directory (this is the default way of installing pyenv) + git clone https://github.com/pyenv/pyenv.git ~/.pyenv - # Then add pyenv to your path by adding the below to your .bashrc/.zshrc - export PYENV_ROOT="$HOME/.pyenv" - command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH" - eval "$(pyenv init -)" + # Then add pyenv to your path by adding the below to your .bashrc/.zshrc + export PYENV_ROOT="$HOME/.pyenv" + command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH" + eval "$(pyenv init -)" Verify your installation by opening a new terminal and .. code-block:: bash - # check python versions available - pyenv versions - # * system (...) # <-- it should just show one + # check python versions available + pyenv versions + # * system (...) # <-- it should just show one + +Then you can proceed and install any version of Python. Baselines use Python 3.10, so we'll be installing a recent version of it. + +.. code-block:: bash + + pyenv install 3.10.14 + # this will take a little while + # once done, you should see that that version is available + pyenv versions + # system + # * 3.10.14 # <-- you just installed this -Then you can proceed and install any version of Python. Most baselines currently use Python 3.10.6, so we'll be installing that one. +Next, let's install the :code:`virtualenv` plugin. Check `the documentation `_ for alternative installation methods. .. code-block:: bash - pyenv install 3.10.6 - # this will take a little while - # once done, you should see that that version is available - pyenv versions - # system - # * 3.10.6 # <-- you just installed this + # Clone `pyenv-virtualenv` + git clone https://github.com/pyenv/pyenv-virtualenv.git $(pyenv root)/plugins/pyenv-virtualenv + + # Restart your shell + exec "$SHELL" + -Now that we have :code:`pyenv` installed, we are ready to install :code:`poetry`. Installing Poetry can be done from a single command: +Using :code:`pyenv` +~~~~~~~~~~~~~~~~~~~ + +Creating a virtual environment can be done as follows: .. code-block:: bash - curl -sSL https://install.python-poetry.org | python3 - + # Create an environment for Python 3.10.14 named test-env + pyenv virtualenv 3.10.14 test-env + + # Then activate it + pyenv activate test-env + + # Deactivate it as follows + pyenv deactivate - # add to path by putting this line at the end of your .zshrc/.bashrc - export PATH="$HOME/.local/bin:$PATH" + +(optional) Setup Poetry +~~~~~~~~~~~~~~~~~~~~~~~ + +Now that we have :code:`pyenv` installed, we are ready to install :code:`poetry`. It can be done from a single command: + +.. code-block:: bash + + curl -sSL https://install.python-poetry.org | python3 - + + # add to path by putting this line at the end of your .zshrc/.bashrc + export PATH="$HOME/.local/bin:$PATH" To install Poetry from source, to customise your installation, or to further integrate Poetry with your shell after installation, please check `the Poetry documentation `_. + Using a Flower Baseline ----------------------- -To use Flower Baselines you need first to install :code:`pyenv` and :code:`Poetry`, then: +To use Flower Baselines you need first to install :code:`pyenv` and, depending on the baselines, also :code:`Poetry`, then: 1. Clone the flower repository .. code-block:: bash - git clone https://github.com/adap/flower.git && cd flower + git clone https://github.com/adap/flower.git && cd flower 2. Navigate inside the directory of the baseline you'd like to run -3. Follow the :code:`[Environment Setup]` instructions in the :code:`README.md`. In most cases this will require you to just do: - -.. code-block:: bash - - poetry install - -4. Run the baseline as indicated in the :code:`[Running the Experiments]` section in the :code:`README.md` or in the `[Expected Results]` section to reproduce the experiments in the paper. +3. Follow the :code:`[Environment Setup]` instructions in the :code:`README.md`. +4. Run the baseline as indicated in the :code:`[Running the Experiments]` section in the :code:`README.md` or in the :code:`[Expected Results]` section to reproduce the experiments in the paper. diff --git a/benchmarks/flowertune-llm/README.md b/benchmarks/flowertune-llm/README.md index 0cb69e7ff9c7..ed2f8821cd88 100644 --- a/benchmarks/flowertune-llm/README.md +++ b/benchmarks/flowertune-llm/README.md @@ -1,4 +1,4 @@ -![](_static/flower_llm.jpg) +![](_static/flower_llm.png) # FlowerTune LLM Leaderboard @@ -9,39 +9,40 @@ Please follow the instructions to run and evaluate the federated LLMs. ## Create a new project -As the first step, please register a Flower account on [Flower website](https://flower.ai/login). -Assuming `flwr` package is already installed on your system (check [here](https://flower.ai/docs/framework/how-to-install-flower.html) for `flwr` installation). -We provide a single-line command to create a new project directory based on your selected challenge: +As the first step, please register for a Flower account on [flower.ai/login](https://flower.ai/login). +Then, create a new Python environment and install Flower. + +> [!TIP] +> We recommend using `pyenv` and the `virtualenv` plugin to create your environment. Other manager such as Conda would likely work too. Check the [documentation](https://flower.ai/docs/framework/how-to-install-flower.html) for alternative ways of installing Flower. ```shell -flwr new --framework=flwrtune --username=your_flower_account +pip install flwr ``` -Then you will see a prompt to ask your project name and the choice of LLM challenges from the set of general NLP, finance, medical and code. -Type your project name and select your preferred challenge, -and then a new project directory will be generated automatically. - -### Structure +On the new environment, create a new Flower project using the `FlowerTune` template. You will be prompted for a name to give to your project, your username, and for your choice of LLM challenge: +```shell +flwr new --framework=FlowerTune +``` -After running `flwr new`, you will see a new directory generated with the following structure: +The `flwr new` command will generate a directory with the following structure: ```bash ├── README.md # <- Instructions - ├── pyproject.toml # <- Environment dependencies + ├── pyproject.toml # <- Environment dependencies and configs └── - ├── app.py # <- Flower ClientApp/ServerApp build - ├── client.py # <- Flower client constructor - ├── server.py # <- Sever-related functions - ├── models.py # <- Model build + ├── client_app.py # <- Flower ClientApp build ├── dataset.py # <- Dataset and tokenizer build - ├── conf/config.yaml # <- User configuration - └── conf/static_config.yaml # <- Static configuration + ├── models.py # <- Model build + ├── server_app.py # <- Flower ServerApp build + └── strategy.py # <- Flower strategy build ``` This can serve as the starting point for you to build up your own federated LLM fine-tuning methods. -Please note that any modification to the content of `conf/static_config.yaml` is strictly prohibited for those who wish to participate in the [LLM Leaderboard](https://flower.ai/benchmarks/llm-leaderboard). -Otherwise, the submission will not be considered. + +> [!IMPORTANT] +> Please note that if you intend to submit your project as an entry to the [LLM Leaderboard](https://flower.ai/benchmarks/llm-leaderboard) modifications to `[tool.flwr.app.config.static]` and `[tool.flwr.federations.local-simulation]` sections in the `pyproject.toml` are not allowed and will invalidate the submission. + ## Run FlowerTune LLM challenges @@ -50,12 +51,17 @@ With a new project directory created, running a baseline challenge can be done b 1. Navigate inside the directory that you just created. -2. Follow the `Environments setup` section of `README.md` in the project directory to install project dependencies. +2. Follow the `Environments setup` section of `README.md` in the project directory to install the project dependencies. 3. Run the challenge as indicated in the `Running the challenge` section in the `README.md`. -## Evaluate pre-trained LLMs +## Evaluate fine-tuned LLMs + +Once the LLM fine-tuning finished, evaluate the performance of your fine-tuned LLM +following the `README.md` in [`evaluation`](https://github.com/adap/flower/tree/main/benchmarks/flowertune-llm/evaluation) directory. + -After the LLM fine-tuning finished, evaluate the performance of your pre-trained LLMs -following the `README.md` in `evaluation` directory. +> [!NOTE] +> If you have any questions about running FlowerTune LLM challenges or evaluation, please feel free to make posts at [Flower Discuss](https://discuss.flower.ai) forum, +or join our [Slack channel](https://flower.ai/join-slack/) to ask questions in the `#flowertune-llm-leaderboard` channel. diff --git a/benchmarks/flowertune-llm/_static/flower_llm.jpg b/benchmarks/flowertune-llm/_static/flower_llm.jpg deleted file mode 100644 index 96081d9c2ad1..000000000000 Binary files a/benchmarks/flowertune-llm/_static/flower_llm.jpg and /dev/null differ diff --git a/benchmarks/flowertune-llm/_static/flower_llm.png b/benchmarks/flowertune-llm/_static/flower_llm.png new file mode 100644 index 000000000000..e9a0ba3bf30e Binary files /dev/null and b/benchmarks/flowertune-llm/_static/flower_llm.png differ diff --git a/benchmarks/flowertune-llm/evaluation/README.md b/benchmarks/flowertune-llm/evaluation/README.md new file mode 100644 index 000000000000..d7216c089d8a --- /dev/null +++ b/benchmarks/flowertune-llm/evaluation/README.md @@ -0,0 +1,46 @@ +# FlowerTune LLM Evaluation + +This directory provides various evaluation metrics to assess the quality of your fine-tuned LLMs. +If you are participating [LLM Leaderboard](https://flower.ai/benchmarks/llm-leaderboard), evaluating your fine-tuned LLM is the final step prior to have your submission added to the [LLM Leaderboard](https://flower.ai/benchmarks/llm-leaderboard#how-to-participate). The evaluation scores generated here will be displayed as the definitive values on the LLM Leaderboard. + +## How to run + +Navigate to the directory corresponding to your selected challenge (`general NLP`, `finance`, `medical`, or `code`) and follow the instructions there to execute the evaluation. + +> [!NOTE] +> If you wish to participate in the LLM Leaderboard, you must not modify the evaluation code and should use the exact command provided in the respective directory to run the evaluation. + + +## Baseline results + +The default template generated by `flwr new` (see the [Project Creation Instructions](https://github.com/adap/flower/tree/main/benchmarks/flowertune-llm#create-a-new-project)) for each challenge will produce results as follows, which serve as the lower bound on the LLM Leaderboard. + +### General NLP + +| | MT-1 | MT-2 | MT-Avg | +|:--------:|:----:|:----:|:------:| +| MT Score | 5.54 | 5.52 | 5.53 | + +### Finance + +| | FPB | FIQA | TFNS | Avg | +|:-------:|:-----:|:-----:|:-----:|:-----:| +| Acc (%) | 44.55 | 62.50 | 28.77 | 45.27 | + +### Medical + +| | PubMedQA | MedMCQA | MedQA | Avg | +|:-------:|:--------:|:-------:|:-----:|:-----:| +| Acc (%) | 59.00 | 23.69 | 27.10 | 36.60 | + +### Code + +| | MBPP | HumanEval | MultiPL-E (JS) | MultiPL-E (C++) | Avg | +|:----------:|:-----:|:---------:|:--------------:|:---------------:|:-----:| +| Pass@1 (%) | 31.60 | 23.78 | 28.57 | 25.47 | 27.36 | + + +## Make submission on FlowerTune LLM Leaderboard + +If your LLM outperforms the listed benchmarks in any challenge, +we encourage you to submit your code and model to the FlowerTune LLM Leaderboard without hesitation (see the [How-to-participate Instructions](https://flower.ai/benchmarks/llm-leaderboard#how-to-participate)). diff --git a/benchmarks/flowertune-llm/evaluation/code/README.md b/benchmarks/flowertune-llm/evaluation/code/README.md new file mode 100644 index 000000000000..fd63ced2f1e2 --- /dev/null +++ b/benchmarks/flowertune-llm/evaluation/code/README.md @@ -0,0 +1,70 @@ +# Evaluation for Code challenge + +We leverage the code generation evaluation metrics provided by [bigcode-evaluation-harness](https://github.com/bigcode-project/bigcode-evaluation-harness/tree/main) to evaluate our fine-tuned LLMs. +Three datasets have been selected for this evaluation: [MBPP](https://huggingface.co/datasets/google-research-datasets/mbpp) (Python), [HumanEval](https://huggingface.co/datasets/openai/openai_humaneval) (Python), and [MultiPL-E](https://github.com/nuprl/MultiPL-E) (JavaScript, C++). + +> [!WARNING] +> The evaluation process takes ~30 GB VRAM. On a 40GB A100 it requires 15-30mins depending on the dataset to complete. + +## Environment Setup + +```shell +git clone --depth=1 https://github.com/adap/flower.git && mv flower/benchmarks/flowertune-llm/evaluation/code ./flowertune-eval-code && rm -rf flower && cd flowertune-eval-code +``` + +Create a new Python environment (we recommend Python 3.10), activate it, then install dependencies with: + +```shell +# From a new python environment, run: +pip install -r requirements.txt + +# Log in HuggingFace account +huggingface-cli login +``` + +After that, install `Node.js` and `g++` for the evaluation of JavaScript, C++: + +```shell +# Install nvm (Node Version Manager) +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash + +# Restart your terminal + +# Download and install Node.js (you may need to restart the terminal) +nvm install 20 + +# Install g++ +sudo apt-get install g++ +``` + +Then, download the `main.py` script from `bigcode-evaluation-harness` repository. + +```shell +git clone https://github.com/bigcode-project/bigcode-evaluation-harness.git && cd bigcode-evaluation-harness && git checkout 0f3e95f0806e78a4f432056cdb1be93604a51d69 && mv main.py ../ && cd .. && rm -rf bigcode-evaluation-harness +``` + + +## Generate model answers & calculate pass@1 score + +> [!NOTE] +> Evaluation needs to be run on MBPP, HumanEval, MultiPL-E (JS) and MultiPL-E (C++). + +```bash +python main.py \ +--model=mistralai/Mistral-7B-v0.3 \ +--peft_model=/path/to/fine-tuned-peft-model-dir/ \ # e.g., ./peft_1 +--max_length_generation=1024 \ # change to 2048 when running mbpp +--batch_size=4 \ +--use_auth_token \ +--allow_code_execution \ +--save_generations \ +--save_references \ +--tasks=humaneval \ # chosen from [mbpp, humaneval, multiple-js, multiple-cpp] +--metric_output_path=./evaluation_results_humaneval.json # change dataset name based on your choice +``` + +The model answers and pass@1 scores will be saved to `generations_{dataset_name}.json` and `evaluation_results_{dataset_name}.json`, respectively. + + +> [!NOTE] +> Please ensure that you provide all **four pass@1 scores** for the evaluation datasets when submitting to the LLM Leaderboard (see the [`Make Submission`](https://github.com/adap/flower/tree/main/benchmarks/flowertune-llm/evaluation#make-submission-on-flowertune-llm-leaderboard) section). diff --git a/benchmarks/flowertune-llm/evaluation/code/requirements.txt b/benchmarks/flowertune-llm/evaluation/code/requirements.txt new file mode 100644 index 000000000000..74b5b79d634d --- /dev/null +++ b/benchmarks/flowertune-llm/evaluation/code/requirements.txt @@ -0,0 +1,7 @@ +peft==0.6.2 +datasets==2.20.0 +evaluate==0.3.0 +sentencepiece==0.2.0 +protobuf==5.27.1 +bitsandbytes==0.43.1 +git+https://github.com/bigcode-project/bigcode-evaluation-harness.git@0f3e95f0806e78a4f432056cdb1be93604a51d69 diff --git a/benchmarks/flowertune-llm/evaluation/general-nlp/README.md b/benchmarks/flowertune-llm/evaluation/general-nlp/README.md new file mode 100644 index 000000000000..18666968108d --- /dev/null +++ b/benchmarks/flowertune-llm/evaluation/general-nlp/README.md @@ -0,0 +1,63 @@ +# Evaluation for General NLP challenge + +We leverage MT-Bench metric provided by [FastChat](https://github.com/lm-sys/FastChat) to evaluate fine-tuned LLMs. +[MT-Bench](https://arxiv.org/abs/2306.05685) represents a comprehensive suite of multi-turn, open-ended questions designed to evaluate chat assistants. +Strong LLMs, such as GPT-4, serve as judges to assess the quality of responses provided by the chat assistants under examination. + +## Environment Setup + +```shell +git clone --depth=1 https://github.com/adap/flower.git && mv flower/benchmarks/flowertune-llm/evaluation/general-nlp ./flowertune-eval-general-nlp && rm -rf flower && cd flowertune-eval-general-nlp +``` + +Create a new Python environment (we recommend Python 3.10), activate it, then install dependencies with: + +```shell +# From a new python environment, run: +pip install -r requirements.txt + +# Log in HuggingFace account +huggingface-cli login +``` + +Download data from [FastChat](https://github.com/lm-sys/FastChat): + +```shell +git clone https://github.com/lm-sys/FastChat.git && cd FastChat && git checkout d561f87b24de197e25e3ddf7e09af93ced8dfe36 && mv fastchat/llm_judge/data ../data && cd .. && rm -rf FastChat +``` + + +## Generate model answers from MT-bench questions + +```bash +python gen_model_answer.py --peft-path=/path/to/fine-tuned-peft-model-dir/ # e.g., ./peft_1 +``` +The answers will be saved to `data/mt_bench/model_answer/[base_model_name].jsonl` in default. + + +## Generate judgments using GPT-4 + +Please follow these [instructions](https://platform.openai.com/docs/quickstart/developer-quickstart) to create a OpenAI API key. +The estimated costs of running this evaluation is approximately USD10. + +> [!NOTE] +> If you changed the base model of your LLM project specify it to the command below via `--model-list`. + +```bash +export OPENAI_API_KEY=XXXXXX # set the OpenAI API key +python gen_judgement.py --model-list Mistral-7B-v0.3 +``` + +The judgments will be saved to `data/mt_bench/model_judgment/gpt-4_single.jsonl` in default. + + +## Show MT-bench scores + +```bash +python show_result.py --model-list Mistral-7B-v0.3 +``` +GPT-4 will give a score on a scale of 10 to the first-turn (MT-1) and second-turn (MT-2) of the conversations, along with an average value as the third score. + +> [!NOTE] +> Please ensure that you provide all **three scores** when submitting to the LLM Leaderboard (see the [`Make Submission`](https://github.com/adap/flower/tree/main/benchmarks/flowertune-llm/evaluation#make-submission-on-flowertune-llm-leaderboard) section). + diff --git a/benchmarks/flowertune-llm/evaluation/general-nlp/gen_judgement.py b/benchmarks/flowertune-llm/evaluation/general-nlp/gen_judgement.py new file mode 100644 index 000000000000..14ad3c7c6544 --- /dev/null +++ b/benchmarks/flowertune-llm/evaluation/general-nlp/gen_judgement.py @@ -0,0 +1,130 @@ +""" +This python file is adapted from https://github.com/lm-sys/FastChat/blob/main/fastchat/llm_judge/gen_judgment.py + +FastChat (https://github.com/lm-sys/FastChat) is licensed under the Apache License, Version 2.0. + +Citation: +@misc{zheng2023judging, + title={Judging LLM-as-a-judge with MT-Bench and Chatbot Arena}, + author={Lianmin Zheng and Wei-Lin Chiang and Ying Sheng and Siyuan Zhuang and Zhanghao Wu + and Yonghao Zhuang and Zi Lin and Zhuohan Li and Dacheng Li and Eric. P Xing and Hao Zhang + and Joseph E. Gonzalez and Ion Stoica}, + year={2023}, + eprint={2306.05685}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +""" + +import argparse +import json + +from fastchat.llm_judge.common import ( + NEED_REF_CATS, + check_data, + get_model_list, + load_judge_prompts, + load_model_answers, + load_questions, + play_a_match_single, +) +from fastchat.llm_judge.gen_judgment import make_judge_single, make_match_single +from tqdm import tqdm + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--judge-file", + type=str, + default="data/judge_prompts.jsonl", + help="The file of judge prompts.", + ) + parser.add_argument("--judge-model", type=str, default="gpt-4") + parser.add_argument( + "--model-list", + type=str, + nargs="+", + default=None, + help="A list of models to be evaluated", + ) + args = parser.parse_args() + + question_file = "data/mt_bench/question.jsonl" + answer_dir = "data/mt_bench/model_answer" + ref_answer_dir = "data/mt_bench/reference_answer" + + # Load questions + questions = load_questions(question_file, None, None) + + # Load answers + model_answers = load_model_answers(answer_dir) + ref_answers = load_model_answers(ref_answer_dir) + + # Load judge + judge_prompts = load_judge_prompts(args.judge_file) + + if args.model_list is None: + models = get_model_list(answer_dir) + else: + models = args.model_list + + judges = make_judge_single(args.judge_model, judge_prompts) + play_a_match_func = play_a_match_single + output_file = f"data/mt_bench/model_judgment/{args.judge_model}_single.jsonl" + make_match_func = make_match_single + baseline_model = None + + check_data(questions, model_answers, ref_answers, models, judges) + + question_math = [q for q in questions if q["category"] in NEED_REF_CATS] + question_default = [q for q in questions if q["category"] not in NEED_REF_CATS] + + # Make matches + matches = [] + matches += make_match_func( + question_default, models, model_answers, judges["default"], baseline_model + ) + matches += make_match_func( + question_math, + models, + model_answers, + judges["math"], + baseline_model, + ref_answers, + ) + matches += make_match_func( + question_default, + models, + model_answers, + judges["default-mt"], + baseline_model, + multi_turn=True, + ) + matches += make_match_func( + question_math, + models, + model_answers, + judges["math-mt"], + baseline_model, + ref_answers, + multi_turn=True, + ) + + match_stat = {} + match_stat["bench_name"] = "mt_bench" + match_stat["mode"] = "single" + match_stat["judge"] = args.judge_model + match_stat["baseline"] = baseline_model + match_stat["model_list"] = models + match_stat["total_num_questions"] = len(questions) + match_stat["total_num_matches"] = len(matches) + match_stat["output_path"] = output_file + + # Show match stats and prompt enter to continue + print("Stats:") + print(json.dumps(match_stat, indent=4)) + input("Press Enter to confirm...") + + # Play matches + for match in tqdm(matches): + play_a_match_func(match, output_file=output_file) diff --git a/benchmarks/flowertune-llm/evaluation/general-nlp/gen_model_answer.py b/benchmarks/flowertune-llm/evaluation/general-nlp/gen_model_answer.py new file mode 100644 index 000000000000..cefb4fbff08d --- /dev/null +++ b/benchmarks/flowertune-llm/evaluation/general-nlp/gen_model_answer.py @@ -0,0 +1,135 @@ +""" +This python file is adapted from https://github.com/lm-sys/FastChat/blob/main/fastchat/llm_judge/gen_model_answer.py + +FastChat (https://github.com/lm-sys/FastChat) is licensed under the Apache License, Version 2.0. + +Citation: +@misc{zheng2023judging, + title={Judging LLM-as-a-judge with MT-Bench and Chatbot Arena}, + author={Lianmin Zheng and Wei-Lin Chiang and Ying Sheng and Siyuan Zhuang and Zhanghao Wu + and Yonghao Zhuang and Zi Lin and Zhuohan Li and Dacheng Li and Eric. P Xing and Hao Zhang + and Joseph E. Gonzalez and Ion Stoica}, + year={2023}, + eprint={2306.05685}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +""" + +import argparse +import json +import os +import random +import time + +import torch +from fastchat.conversation import get_conv_template +from fastchat.llm_judge.common import load_questions, temperature_config +from peft import AutoPeftModelForCausalLM, PeftModel +from tqdm import tqdm +from transformers import AutoModelForCausalLM, AutoTokenizer + +parser = argparse.ArgumentParser() +parser.add_argument("--peft-path", type=str, default=None) +parser.add_argument("--template", type=str, default="vicuna_v1.1") +parser.add_argument("--max-new-token", type=int, default=1024) +parser.add_argument("--num-choices", type=int, default=1) +args = parser.parse_args() + +# Load model and tokenizer +model = AutoPeftModelForCausalLM.from_pretrained( + args.peft_path, torch_dtype=torch.float16 +).to("cuda") +base_model = model.peft_config["default"].base_model_name_or_path +tokenizer = AutoTokenizer.from_pretrained(base_model) + +model_name = base_model.split("/")[1] +question_file = f"./data/mt_bench/question.jsonl" +answer_file = f"./data/mt_bench/model_answer/{model_name}.jsonl" + +# Load questions +questions = load_questions(question_file, None, None) +# Random shuffle the questions to balance the loading +random.shuffle(questions) + +# Generate answers +for question in tqdm(questions): + # Set temperature value + temperature = ( + temperature_config[question["category"]] + if question["category"] in temperature_config + else 0.7 + ) + choices = [] + for i in range(args.num_choices): + torch.manual_seed(i) + conv = get_conv_template(args.template) + turns = [] + for j in range(len(question["turns"])): + qs = question["turns"][j] + conv.append_message(conv.roles[0], qs) + conv.append_message(conv.roles[1], None) + prompt = conv.get_prompt() + input_ids = tokenizer([prompt]).input_ids + + do_sample = False if temperature < 1e-4 else True + + # Some models may error out when generating long outputs + try: + output_ids = model.generate( + input_ids=torch.as_tensor(input_ids).cuda(), + do_sample=do_sample, + temperature=temperature, + max_new_tokens=args.max_new_token, + pad_token_id=tokenizer.eos_token_id, + ) + output_ids = ( + output_ids[0] + if model.config.is_encoder_decoder + else output_ids[0][len(input_ids[0]) :] + ) + + # Be consistent with the template's stop_token_ids + if conv.stop_token_ids: + stop_token_ids_index = [ + i + for i, id in enumerate(output_ids) + if id in conv.stop_token_ids + ] + if len(stop_token_ids_index) > 0: + output_ids = output_ids[: stop_token_ids_index[0]] + + output = tokenizer.decode( + output_ids, + spaces_between_special_tokens=False, + ) + if conv.stop_str and output.find(conv.stop_str) > 0: + output = output[: output.find(conv.stop_str)] + for special_token in tokenizer.special_tokens_map.values(): + if isinstance(special_token, list): + for special_tok in special_token: + output = output.replace(special_tok, "") + else: + output = output.replace(special_token, "") + output = output.strip() + + if conv.name == "xgen" and output.startswith("Assistant:"): + output = output.replace("Assistant:", "", 1).strip() + except RuntimeError as e: + print("ERROR question ID: ", question["question_id"]) + output = "ERROR" + + conv.update_last_message(output) + turns.append(output) + choices.append({"index": i, "turns": turns}) + + # Dump answers + os.makedirs(os.path.dirname(answer_file), exist_ok=True) + with open(os.path.expanduser(answer_file), "a") as fout: + ans_json = { + "question_id": question["question_id"], + "model_id": model_name, + "choices": choices, + "tstamp": time.time(), + } + fout.write(json.dumps(ans_json) + "\n") diff --git a/benchmarks/flowertune-llm/evaluation/general-nlp/requirements.txt b/benchmarks/flowertune-llm/evaluation/general-nlp/requirements.txt new file mode 100644 index 000000000000..7a0f43b98698 --- /dev/null +++ b/benchmarks/flowertune-llm/evaluation/general-nlp/requirements.txt @@ -0,0 +1,6 @@ +peft==0.6.2 +sentencepiece==0.2.0 +protobuf==5.27.1 +fschat[model_worker,webui]==0.2.35 +openai==0.28.0 +anthropic==0.18.1 diff --git a/benchmarks/flowertune-llm/evaluation/general-nlp/show_result.py b/benchmarks/flowertune-llm/evaluation/general-nlp/show_result.py new file mode 100644 index 000000000000..6a00c10bbdba --- /dev/null +++ b/benchmarks/flowertune-llm/evaluation/general-nlp/show_result.py @@ -0,0 +1,36 @@ +""" +This python file is adapted from https://github.com/lm-sys/FastChat/blob/main/fastchat/llm_judge/show_result.py + +FastChat (https://github.com/lm-sys/FastChat) is licensed under the Apache License, Version 2.0. + +Citation: +@misc{zheng2023judging, + title={Judging LLM-as-a-judge with MT-Bench and Chatbot Arena}, + author={Lianmin Zheng and Wei-Lin Chiang and Ying Sheng and Siyuan Zhuang and Zhanghao Wu + and Yonghao Zhuang and Zi Lin and Zhuohan Li and Dacheng Li and Eric. P Xing and Hao Zhang + and Joseph E. Gonzalez and Ion Stoica}, + year={2023}, + eprint={2306.05685}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +""" + +import argparse + +from fastchat.llm_judge.show_result import display_result_single + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input-file", type=str, default=None) + parser.add_argument("--bench-name", type=str, default="mt_bench") + parser.add_argument("--judge-model", type=str, default="gpt-4") + parser.add_argument( + "--model-list", + type=str, + nargs="+", + default=None, + help="A list of models to be evaluated", + ) + args = parser.parse_args() + display_result_single(args) diff --git a/benchmarks/flowertune-llm/evaluation/medical/README.md b/benchmarks/flowertune-llm/evaluation/medical/README.md new file mode 100644 index 000000000000..78de069460d8 --- /dev/null +++ b/benchmarks/flowertune-llm/evaluation/medical/README.md @@ -0,0 +1,38 @@ +# Evaluation for Medical challenge + +We build up a medical question answering (QA) pipeline to evaluate our fined-tuned LLMs. +Three datasets have been selected for this evaluation: [PubMedQA](https://huggingface.co/datasets/bigbio/pubmed_qa), [MedMCQA](https://huggingface.co/datasets/medmcqa), and [MedQA](https://huggingface.co/datasets/bigbio/med_qa). + + +## Environment Setup + +```shell +git clone --depth=1 https://github.com/adap/flower.git && mv flower/benchmarks/flowertune-llm/evaluation/medical ./flowertune-eval-medical && rm -rf flower && cd flowertune-eval-medical +``` + +Create a new Python environment (we recommend Python 3.10), activate it, then install dependencies with: + +```shell +# From a new python environment, run: +pip install -r requirements.txt + +# Log in HuggingFace account +huggingface-cli login +``` + +## Generate model decision & calculate accuracy + +```bash +python eval.py \ +--peft-path=/path/to/fine-tuned-peft-model-dir/ \ # e.g., ./peft_1 +--run-name=fl \ # specified name for this run +--batch-size=16 \ +--quantization=4 \ +--datasets=pubmedqa,medmcqa,medqa +``` + +The model answers and accuracy values will be saved to `benchmarks/generation_{dataset_name}_{run_name}.jsonl` and `benchmarks/acc_{dataset_name}_{run_name}.txt`, respectively. + + +> [!NOTE] +> Please ensure that you provide all **three accuracy values (PubMedQA, MedMCQA, MedQA)** for three evaluation datasets when submitting to the LLM Leaderboard (see the [`Make Submission`](https://github.com/adap/flower/tree/main/benchmarks/flowertune-llm/evaluation#make-submission-on-flowertune-llm-leaderboard) section). diff --git a/benchmarks/flowertune-llm/evaluation/medical/benchmarks.py b/benchmarks/flowertune-llm/evaluation/medical/benchmarks.py new file mode 100644 index 000000000000..c72e2a7894da --- /dev/null +++ b/benchmarks/flowertune-llm/evaluation/medical/benchmarks.py @@ -0,0 +1,174 @@ +import json + +import pandas as pd +from sklearn.metrics import accuracy_score +from torch.utils.data import DataLoader +from tqdm import tqdm +from utils import format_answer, format_example, save_results + +import datasets + +# The instructions refer to Meditron evaluation: +# https://github.com/epfLLM/meditron/blob/main/evaluation/instructions.json +INSTRUCTIONS = { + "pubmedqa": "As an expert doctor in clinical science and medical knowledge, can you tell me if the following statement is correct? Answer yes, no, or maybe.", + "medqa": "You are a medical doctor taking the US Medical Licensing Examination. You need to demonstrate your understanding of basic and clinical science, medical knowledge, and mechanisms underlying health, disease, patient care, and modes of therapy. Show your ability to apply the knowledge essential for medical practice. For the following multiple-choice question, select one correct answer from A to E. Base your answer on the current and standard practices referenced in medical guidelines.", + "medmcqa": "You are a medical doctor answering realworld medical entrance exam questions. Based on your understanding of basic and clinical science, medical knowledge, and mechanisms underlying health, disease, patient care, and modes of therapy, answer the following multiple-choice question. Select one correct answer from A to D. Base your answer on the current and standard practices referenced in medical guidelines.", +} + + +def infer_pubmedqa(model, tokenizer, batch_size, run_name): + name = "pubmedqa" + answer_type = "boolean" + dataset = datasets.load_dataset( + "bigbio/pubmed_qa", + "pubmed_qa_labeled_fold0_source", + split="test", + trust_remote_code=True, + ) + # Post process + instruction = INSTRUCTIONS[name] + + def post_process(row): + context = "\n".join(row["CONTEXTS"]) + row["prompt"] = f"{context}\n{row['QUESTION']}" + row["gold"] = row["final_decision"] + row["long_answer"] = row["LONG_ANSWER"] + row["prompt"] = f"{instruction}\n{row['prompt']}\nThe answer is:\n" + return row + + dataset = dataset.map(post_process) + + # Generate results + generate_results(name, run_name, dataset, model, tokenizer, batch_size, answer_type) + + +def infer_medqa(model, tokenizer, batch_size, run_name): + name = "medqa" + answer_type = "mcq" + dataset = datasets.load_dataset( + "bigbio/med_qa", + "med_qa_en_4options_source", + split="test", + trust_remote_code=True, + ) + + # Post process + instruction = INSTRUCTIONS[name] + + def post_process(row): + choices = [opt["value"] for opt in row["options"]] + row["prompt"] = format_example(row["question"], choices) + for opt in row["options"]: + if opt["value"] == row["answer"]: + row["gold"] = opt["key"] + break + row["prompt"] = f"{instruction}\n{row['prompt']}\nThe answer is:\n" + return row + + dataset = dataset.map(post_process) + + # Generate results + generate_results(name, run_name, dataset, model, tokenizer, batch_size, answer_type) + + +def infer_medmcqa(model, tokenizer, batch_size, run_name): + name = "medmcqa" + answer_type = "mcq" + dataset = datasets.load_dataset( + "medmcqa", split="validation", trust_remote_code=True + ) + + # Post process + instruction = INSTRUCTIONS[name] + + def post_process(row): + options = [row["opa"], row["opb"], row["opc"], row["opd"]] + answer = int(row["cop"]) + row["prompt"] = format_example(row["question"], options) + row["gold"] = chr(ord("A") + answer) if answer in [0, 1, 2, 3] else None + row["prompt"] = f"{instruction}\n{row['prompt']}\nThe answer is:\n" + return row + + dataset = dataset.map(post_process) + + # Generate results + generate_results(name, run_name, dataset, model, tokenizer, batch_size, answer_type) + + +def generate_results( + name, run_name, dataset, model, tokenizer, batch_size, answer_type +): + # Run inference + prediction = inference(dataset, model, tokenizer, batch_size) + + # Calculate accuracy + acc = accuracy_compute(prediction, answer_type) + + # Save results and generations + save_results(name, run_name, prediction, acc) + + +def inference(dataset, model, tokenizer, batch_size): + columns_process = ["prompt", "gold"] + dataset_process = pd.DataFrame(dataset, columns=dataset.features)[columns_process] + dataset_process = dataset_process.assign(output="Null") + temperature = 1.0 + + inference_data = json.loads(dataset_process.to_json(orient="records")) + data_loader = DataLoader(inference_data, batch_size=batch_size, shuffle=False) + + batch_counter = 0 + for batch in tqdm(data_loader, total=len(data_loader), position=0, leave=True): + prompts = [ + f"<|im_start|>question\n{prompt}<|im_end|>\n<|im_start|>answer\n" + for prompt in batch["prompt"] + ] + if batch_counter == 0: + print(prompts[0]) + + # Process tokenizer + stop_seq = ["###"] + if tokenizer.eos_token is not None: + stop_seq.append(tokenizer.eos_token) + if tokenizer.pad_token is not None: + stop_seq.append(tokenizer.pad_token) + max_new_tokens = len( + tokenizer(batch["gold"][0], add_special_tokens=False)["input_ids"] + ) + + outputs = [] + for prompt in prompts: + input_ids = tokenizer.encode(prompt, return_tensors="pt").to("cuda") + output_ids = model.generate( + inputs=input_ids, + max_new_tokens=max_new_tokens, + do_sample=False, + top_p=1.0, + temperature=temperature, + pad_token_id=tokenizer.eos_token_id, + ) + output_ids = output_ids[0][len(input_ids[0]) :] + output = tokenizer.decode(output_ids, skip_special_tokens=True) + outputs.append(output) + + for prompt, out in zip(batch["prompt"], outputs): + dataset_process.loc[dataset_process["prompt"] == prompt, "output"] = out + batch_counter += 1 + + return dataset_process + + +def accuracy_compute(dataset, answer_type): + dataset = json.loads(dataset.to_json(orient="records")) + preds, golds = [], [] + for row in dataset: + answer = row["gold"].lower() + output = row["output"].lower() + pred, gold = format_answer(output, answer, answer_type=answer_type) + preds.append(pred) + golds.append(gold) + + accuracy = accuracy_score(preds, golds) + + return accuracy diff --git a/benchmarks/flowertune-llm/evaluation/medical/eval.py b/benchmarks/flowertune-llm/evaluation/medical/eval.py new file mode 100644 index 000000000000..7405e1493e4d --- /dev/null +++ b/benchmarks/flowertune-llm/evaluation/medical/eval.py @@ -0,0 +1,62 @@ +import argparse + +import torch +from peft import PeftModel +from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig + +from benchmarks import infer_medmcqa, infer_medqa, infer_pubmedqa + +# Fixed seed +torch.manual_seed(2024) + +parser = argparse.ArgumentParser() +parser.add_argument( + "--base-model-name-path", type=str, default="mistralai/Mistral-7B-v0.3" +) +parser.add_argument("--run-name", type=str, default="fl") +parser.add_argument("--peft-path", type=str, default=None) +parser.add_argument( + "--datasets", + type=str, + default="pubmedqa", + help="The dataset to infer on: [pubmedqa, medqa, medmcqa]", +) +parser.add_argument("--batch-size", type=int, default=16) +parser.add_argument("--quantization", type=int, default=4) +args = parser.parse_args() + + +# Load model and tokenizer +if args.quantization == 4: + quantization_config = BitsAndBytesConfig(load_in_4bit=True) + torch_dtype = torch.float32 +elif args.quantization == 8: + quantization_config = BitsAndBytesConfig(load_in_8bit=True) + torch_dtype = torch.float16 +else: + raise ValueError( + f"Use 4-bit or 8-bit quantization. You passed: {args.quantization}/" + ) + +model = AutoModelForCausalLM.from_pretrained( + args.base_model_name_path, + quantization_config=quantization_config, + torch_dtype=torch_dtype, +) +if args.peft_path is not None: + model = PeftModel.from_pretrained( + model, args.peft_path, torch_dtype=torch_dtype + ).to("cuda") + +tokenizer = AutoTokenizer.from_pretrained(args.base_model_name_path) + +# Evaluate +for dataset in args.datasets.split(","): + if dataset == "pubmedqa": + infer_pubmedqa(model, tokenizer, args.batch_size, args.run_name) + elif dataset == "medqa": + infer_medqa(model, tokenizer, args.batch_size, args.run_name) + elif dataset == "medmcqa": + infer_medmcqa(model, tokenizer, args.batch_size, args.run_name) + else: + raise ValueError("Undefined Dataset.") diff --git a/benchmarks/flowertune-llm/evaluation/medical/requirements.txt b/benchmarks/flowertune-llm/evaluation/medical/requirements.txt new file mode 100644 index 000000000000..adfc8b0c59db --- /dev/null +++ b/benchmarks/flowertune-llm/evaluation/medical/requirements.txt @@ -0,0 +1,7 @@ +peft==0.6.2 +pandas==2.2.2 +scikit-learn==1.5.0 +datasets==2.20.0 +sentencepiece==0.2.0 +protobuf==5.27.1 +bitsandbytes==0.43.1 diff --git a/benchmarks/flowertune-llm/evaluation/medical/utils.py b/benchmarks/flowertune-llm/evaluation/medical/utils.py new file mode 100644 index 000000000000..44d0763d39d4 --- /dev/null +++ b/benchmarks/flowertune-llm/evaluation/medical/utils.py @@ -0,0 +1,81 @@ +import os +import re + + +def format_example(question, choices): + if not question.endswith("?") and not question.endswith("."): + question += "?" + options_str = "\n".join([f"{chr(65+i)}. {choices[i]}" for i in range(len(choices))]) + prompt = "Question: " + question + "\n\nOptions:\n" + options_str + return prompt + + +def save_results(dataset_name, run_name, dataset, acc): + path = "./benchmarks/" + if not os.path.exists(path): + os.makedirs(path) + + # Save results + results_path = os.path.join(path, f"acc_{dataset_name}_{run_name}.txt") + with open(results_path, "w") as f: + f.write(f"Accuracy: {acc}. ") + print(f"Accuracy: {acc}. ") + + # Save generations + generation_path = os.path.join(path, f"generation_{dataset_name}_{run_name}.jsonl") + dataset.to_json(generation_path, orient="records") + + +def format_answer(output_full, answer, answer_type="mcq"): + output = output_full + default = (output_full, answer) + if "\n##" in output: + try: + output = output.split("\n##")[1].split("\n")[0].strip().lower() + except Exception: + return default + if "###" in answer: + try: + answer = answer.split("answer is:")[1].split("###")[0].strip() + except Exception: + return default + + output = re.sub(r"[^a-zA-Z0-9]", " ", output).strip() + output = re.sub(" +", " ", output) + + if answer_type == "boolean": + output = clean_boolean_answer(output) + elif answer_type == "mcq": + output = clean_mcq_answer(output) + + if output in ["a", "b", "c", "d", "e", "yes", "no"]: + return output, answer + else: + return default + + +def clean_mcq_answer(output): + output = clean_answer(output) + try: + output = output[0] + except Exception: + return output + return output + + +def clean_boolean_answer(output): + if "yesyes" in output: + output = output.replace("yesyes", "yes") + elif "nono" in output: + output = output.replace("nono", "no") + elif "yesno" in output: + output = output.replace("yesno", "yes") + elif "noyes" in output: + output = output.replace("noyes", "no") + output = clean_answer(output) + return output + + +def clean_answer(output): + output_clean = output.encode("ascii", "ignore").decode("ascii") + return output_clean diff --git a/doc/source/conf.py b/doc/source/conf.py index d3881325a5ce..c645c556c603 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -90,10 +90,10 @@ author = "The Flower Authors" # The full version of the next release, including alpha/beta/rc tags -release = "1.11.0" +release = "1.12.0" # The current released version rst_prolog = """ -.. |stable_flwr_version| replace:: 1.10.0 +.. |stable_flwr_version| replace:: 1.11.0 .. |stable_flwr_superlink_docker_digest| replace:: 4b317d5b6030710b476f4dbfab2c3a33021ad40a0fcfa54d7edd45e0c51d889c .. |ubuntu_version| replace:: 22.04 .. |setuptools_version| replace:: 70.3.0 @@ -264,6 +264,7 @@ def find_test_modules(package_path): "example-mxnet-walk-through": "index.html", "ref-api/flwr.simulation.run_simulation_from_cli": "index.html", "contributor-how-to-create-new-messages": "index.html", + "example-jax-from-centralized-to-federated": "tutorial-quickstart-jax.html", } # -- Options for HTML output ------------------------------------------------- diff --git a/doc/source/contributor-explanation-architecture.rst b/doc/source/contributor-explanation-architecture.rst index a20a84313118..48b43cf3f2b8 100644 --- a/doc/source/contributor-explanation-architecture.rst +++ b/doc/source/contributor-explanation-architecture.rst @@ -1,6 +1,8 @@ Flower Architecture =================== +This document provides an overview of the Flower architecture. The architecture is designed to be modular and flexible, and can use two different types of engines: Deployment Engine and Simulation Engine. + Edge Client Engine ------------------ diff --git a/doc/source/contributor-how-to-build-docker-images.rst b/doc/source/contributor-how-to-build-docker-images.rst index 522d124dfd9b..d6acad4afa03 100644 --- a/doc/source/contributor-how-to-build-docker-images.rst +++ b/doc/source/contributor-how-to-build-docker-images.rst @@ -26,7 +26,7 @@ Before we can start, we need to meet a few prerequisites in our local developmen default values, others must be specified when building the image. All available build arguments for each image are listed in one of the tables below. -Building the base image +Building the Base Image ----------------------- .. list-table:: @@ -65,6 +65,10 @@ Building the base image - The Flower package to be installed. - No - ``flwr`` or ``flwr-nightly`` + * - ``FLWR_VERSION_REF`` + - A `direct reference `_ without the ``@`` specifier. If both ``FLWR_VERSION`` and ``FLWR_VERSION_REF`` are specified, the ``FLWR_VERSION_REF`` has precedence. + - No + - `Direct Reference Examples`_ The following example creates a base Ubuntu/Alpine image with Python ``3.11.0``, pip :substitution-code:`|pip_version|`, setuptools :substitution-code:`|setuptools_version|` @@ -84,8 +88,8 @@ and Flower :substitution-code:`|stable_flwr_version|`: In this example, we specify our image name as ``flwr_base`` and the tag as ``0.1.0``. Remember that the build arguments as well as the name and tag can be adapted to your needs. These values serve as examples only. -Building the SuperLink/SuperNode or ServerApp image ---------------------------------------------------- +Building a Flower Binary Image +------------------------------ .. list-table:: :widths: 25 45 15 15 @@ -130,3 +134,21 @@ After creating the image, we can test whether the image is working: .. code-block:: bash $ docker run --rm flwr_superlink:0.1.0 --help + +Direct Reference Examples +------------------------- + +.. code-block:: bash + :substitutions: + + # main branch + git+https://github.com/adap/flower.git@main + + # commit hash + git+https://github.com/adap/flower.git@1187c707f1894924bfa693d99611cf6f93431835 + + # tag + git+https://github.com/adap/flower.git@|stable_flwr_version| + + # artifact store + https://artifact.flower.ai/py/main/latest/flwr-|stable_flwr_version|-py3-none-any.whl diff --git a/doc/source/docker/index.rst b/doc/source/docker/index.rst index a070a47cb853..ac6124b4c138 100644 --- a/doc/source/docker/index.rst +++ b/doc/source/docker/index.rst @@ -33,11 +33,12 @@ Advanced Options set-environment-variables run-as-root-user + run-as-subprocess pin-version use-a-different-version -Run Flower Docker Compose -------------------------- +Run Flower using Docker Compose +------------------------------- .. toctree:: :maxdepth: 1 diff --git a/doc/source/docker/run-as-subprocess.rst b/doc/source/docker/run-as-subprocess.rst new file mode 100644 index 000000000000..f8c482f632a0 --- /dev/null +++ b/doc/source/docker/run-as-subprocess.rst @@ -0,0 +1,53 @@ +Run ClientApp as a Subprocess +============================= + +In this mode, the ClientApp is executed as a subprocess within the SuperNode Docker container, +rather than running in a separate container. This approach reduces the number of running containers, +which can be beneficial for environments with limited resources. However, it also means that the +ClientApp is no longer isolated from the SuperNode, which may introduce additional security +concerns. + +Prerequisites +------------- + +#. Before running the ClientApp as a subprocess, ensure that the FAB dependencies have been installed + in the SuperNode images. This can be done by extending the SuperNode image: + + .. code-block:: dockerfile + :caption: Dockerfile.supernode + :linenos: + :substitutions: + + FROM flwr/supernode:|stable_flwr_version| + + WORKDIR /app + COPY pyproject.toml . + RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ + && python -m pip install -U --no-cache-dir . + + ENTRYPOINT ["flower-supernode"] + +#. Next, build the SuperNode Docker image by running the following command in the directory where + Dockerfile is located: + + .. code-block:: shell + + $ docker build -f Dockerfile.supernode -t flwr_supernode:0.0.1 . + + +Run the ClientApp as a Subprocess +--------------------------------- + +Start the SuperNode with the flag ``--isolation subprocess``, which tells the SuperNode to execute +the ClientApp as a subprocess: + +.. code-block:: shell + + $ docker run --rm \ + --detach \ + flwr_supernode:0.0.1 \ + --insecure \ + --superlink superlink:9092 \ + --node-config "partition-id=1 num-partitions=2" \ + --supernode-address localhost:9094 \ + --isolation subprocess diff --git a/doc/source/docker/tutorial-quickstart-docker-compose.rst b/doc/source/docker/tutorial-quickstart-docker-compose.rst index 93a000295951..49cef55ec5a2 100644 --- a/doc/source/docker/tutorial-quickstart-docker-compose.rst +++ b/doc/source/docker/tutorial-quickstart-docker-compose.rst @@ -44,7 +44,7 @@ Step 1: Set Up Setting the ``PROJECT_DIR`` helps Docker Compose locate the ``pyproject.toml`` file, allowing it to install dependencies in the SuperExec and SuperNode images correctly. -Step 2: Run Flower in insecure mode +Step 2: Run Flower in Insecure Mode ----------------------------------- To begin, start Flower with the most basic configuration. In this setup, Flower @@ -230,7 +230,7 @@ Step 6: Run Flower with TLS [tool.flwr.federations.docker-compose-tls] address = "127.0.0.1:9093" - root-certificates = "superexec-certificates/ca.crt" + root-certificates = "../superexec-certificates/ca.crt" #. Restart the services with TLS enabled: @@ -248,43 +248,57 @@ Step 6: Run Flower with TLS Step 7: Add another SuperNode ----------------------------- -You can add more SuperNodes by duplicating the SuperNode definition in the ``compose.yml`` file. +You can add more SuperNodes and ClientApps by duplicating their definitions in the ``compose.yml`` +file. -Just make sure to give each new SuperNode service a unique service name like ``supernode-3``, ``supernode-4``, etc. +Just give each new SuperNode and ClientApp service a unique service name like ``supernode-3``, +``clientapp-3``, etc. In ``compose.yml``, add the following: .. code-block:: yaml :caption: compose.yml + :substitutions: - services: # other service definitions supernode-3: - user: root - deploy: - resources: - limits: - cpus: "2" + image: flwr/supernode:${FLWR_VERSION:-|stable_flwr_version|} command: + - --insecure - --superlink - superlink:9092 - - --insecure + - --supernode-address + - 0.0.0.0:9096 + - --isolation + - process + - --node-config + - "partition-id=1 num-partitions=2" depends_on: - superlink - volumes: - - apps-volume:/app/.flwr/apps/:ro + + clientapp-3: build: context: ${PROJECT_DIR:-.} dockerfile_inline: | - FROM flwr/supernode:${FLWR_VERSION:-1.10.0} + FROM flwr/clientapp:${FLWR_VERSION:-|stable_flwr_version|} WORKDIR /app COPY --chown=app:app pyproject.toml . RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ && python -m pip install -U --no-cache-dir . - ENTRYPOINT ["flower-supernode", "--node-config", "partition-id=0,num-partitions=2"] + ENTRYPOINT ["flwr-clientapp"] + command: + - --supernode + - supernode-3:9096 + deploy: + resources: + limits: + cpus: "2" + stop_signal: SIGINT + depends_on: + - supernode-3 If you also want to enable TLS for the new SuperNodes, duplicate the SuperNode definition for each new SuperNode service in the ``with-tls.yml`` file. @@ -296,13 +310,18 @@ In ``with-tls.yml``, add the following: .. code-block:: yaml :caption: with-tls.yml - services: # other service definitions supernode-3: command: - --superlink - superlink:9092 + - --supernode-address + - 0.0.0.0:9096 + - --isolation + - process + - --node-config + - "partition-id=1 num-partitions=2" - --root-certificates - certificates/ca.crt secrets: @@ -315,14 +334,13 @@ Step 8: Persisting the SuperLink State and Enabling TLS To run Flower with persisted SuperLink state and enabled TLS, a slight change in the ``with-state.yml`` file is required: -#. Comment out the lines 3-5 and uncomment the lines 6-10: +#. Comment out the lines 2-4 and uncomment the lines 5-9: .. code-block:: yaml :caption: with-state.yml :linenos: - :emphasize-lines: 3-10 + :emphasize-lines: 2-9 - services: superlink: # command: # - --insecure diff --git a/doc/source/docker/tutorial-quickstart-docker.rst b/doc/source/docker/tutorial-quickstart-docker.rst index 29ae6d5f6a43..189d019cb097 100644 --- a/doc/source/docker/tutorial-quickstart-docker.rst +++ b/doc/source/docker/tutorial-quickstart-docker.rst @@ -66,8 +66,8 @@ Open your terminal and run: * ``docker run``: This tells Docker to run a container from an image. * ``--rm``: Remove the container once it is stopped or the command exits. * | ``-p 9091:9091 -p 9092:9092``: Map port ``9091`` and ``9092`` of the container to the same port of - | the host machine, allowing you to access the Driver API on ``http://localhost:9091`` and - | the Fleet API on ``http://localhost:9092``. + | the host machine, allowing other services to access the Driver API on + | ``http://localhost:9091`` and the Fleet API on ``http://localhost:9092``. * ``--network flwr-network``: Make the container join the network named ``flwr-network``. * ``--name superlink``: Assign the name ``superlink`` to the container. * ``--detach``: Run the container in the background, freeing up the terminal. @@ -79,32 +79,92 @@ Open your terminal and run: Step 3: Start the SuperNode --------------------------- -The SuperNode Docker image comes with a pre-installed version of Flower and serves as a base for -building your own SuperNode image. +Start two SuperNode containers. -#. Create a SuperNode Dockerfile called ``Dockerfile.supernode`` and paste the following code into it: +#. Start the first container: + + .. code-block:: bash + :substitutions: + + $ docker run --rm \ + -p 9094:9094 \ + --network flwr-network \ + --name supernode-1 \ + --detach \ + flwr/supernode:|stable_flwr_version| \ + --insecure \ + --superlink superlink:9092 \ + --node-config "partition-id=0 num-partitions=2" \ + --supernode-address 0.0.0.0:9094 \ + --isolation process + + .. dropdown:: Understand the command + + * ``docker run``: This tells Docker to run a container from an image. + * ``--rm``: Remove the container once it is stopped or the command exits. + * | ``-p 9094:9094``: Map port ``9094`` of the container to the same port of + | the host machine, allowing other services to access the SuperNode API on + | ``http://localhost:9094``. + * ``--network flwr-network``: Make the container join the network named ``flwr-network``. + * ``--name supernode-1``: Assign the name ``supernode-1`` to the container. + * ``--detach``: Run the container in the background, freeing up the terminal. + * | ``flwr/supernode:|stable_flwr_version|``: This is the name of the image to be run and the specific tag + | of the image. + * | ``--insecure``: This flag tells the container to operate in an insecure mode, allowing + | unencrypted communication. + * | ``--superlink superlink:9092``: Connect to the SuperLink's Fleet API at the address + | ``superlink:9092``. + * | ``--node-config "partition-id=0 num-partitions=2"``: Set the partition ID to ``0`` and the + | number of partitions to ``2`` for the SuperNode configuration. + * | ``--supernode-address 0.0.0.0:9094``: Set the address and port number that the SuperNode + | is listening on. + * | ``--isolation process``: Tells the SuperNode that the ClientApp is created by separate + | independent process. The SuperNode does not attempt to create it. + +#. Start the second container: + + .. code-block:: shell + :substitutions: + + $ docker run --rm \ + -p 9095:9095 \ + --network flwr-network \ + --name supernode-2 \ + --detach \ + flwr/supernode:|stable_flwr_version| \ + --insecure \ + --superlink superlink:9092 \ + --node-config "partition-id=1 num-partitions=2" \ + --supernode-address 0.0.0.0:9095 \ + --isolation process + +Step 4: Start the ClientApp +--------------------------- + +The ClientApp Docker image comes with a pre-installed version of Flower and serves as a base for +building your own ClientApp image. In order to install the FAB dependencies, you will need to create +a Dockerfile that extends the ClientApp image and installs the required dependencies. + +#. Create a ClientApp Dockerfile called ``Dockerfile.clientapp`` and paste the following code into it: .. code-block:: dockerfile - :caption: Dockerfile.supernode + :caption: Dockerfile.clientapp :linenos: :substitutions: - FROM flwr/supernode:|stable_flwr_version| + FROM flwr/clientapp:|stable_flwr_version| WORKDIR /app COPY pyproject.toml . RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ && python -m pip install -U --no-cache-dir . - COPY flower.quickstart-docker.1-0-0.fab . - RUN flwr install flower.quickstart-docker.1-0-0.fab - - ENTRYPOINT ["flower-supernode"] + ENTRYPOINT ["flwr-clientapp"] .. dropdown:: Understand the Dockerfile - * | :substitution-code:`FROM flwr/supernode:|stable_flwr_version|`: This line specifies that the Docker image - | to be built from is the ``flwr/supernode image``, version :substitution-code:`|stable_flwr_version|`. + * | :substitution-code:`FROM flwr/clientapp:|stable_flwr_version|`: This line specifies that the Docker image + | to be built from is the ``flwr/clientapp image``, version :substitution-code:`|stable_flwr_version|`. * | ``WORKDIR /app``: Set the working directory for the container to ``/app``. | Any subsequent commands that reference a directory will be relative to this directory. * | ``COPY pyproject.toml .``: Copy the ``pyproject.toml`` file @@ -116,51 +176,37 @@ building your own SuperNode image. | | The ``-U`` flag indicates that any existing packages should be upgraded, and | ``--no-cache-dir`` prevents pip from using the cache to speed up the installation. - * | ``COPY flower.quickstart-docker.1-0-0.fab .``: Copy the - | ``flower.quickstart-docker.1-0-0.fab`` file from the current working directory into - | the container's ``/app`` directory. - * | ``RUN flwr install flower.quickstart-docker.1-0-0.fab``: Run the ``flwr`` install command - | to install the Flower App Bundle locally. - * | ``ENTRYPOINT ["flower-supernode"]``: Set the command ``flower-supernode`` to be + * | ``ENTRYPOINT ["flwr-clientapp"]``: Set the command ``flwr-clientapp`` to be | the default command run when the container is started. .. important:: - Note that `flwr `__ is already installed in the ``flwr/supernode`` + Note that `flwr `__ is already installed in the ``flwr/clientapp`` base image, so only other package dependencies such as ``flwr-datasets``, ``torch``, etc., need to be installed. As a result, the ``flwr`` dependency is removed from the ``pyproject.toml`` after it has been copied into the Docker image (see line 5). -#. Build the Flower App Bundle (FAB): - - .. code-block:: bash - - $ flwr build - -#. Next, build the SuperNode Docker image by running the following command in the directory where - Dockerfile is located: +#. Next, build the ClientApp Docker image by running the following command in the directory where + the Dockerfile is located: .. code-block:: bash - $ docker build -f Dockerfile.supernode -t flwr_supernode:0.0.1 . + $ docker build -f Dockerfile.clientapp -t flwr_clientapp:0.0.1 . .. note:: - The image name was set as ``flwr_supernode`` with the tag ``0.0.1``. Remember that + The image name was set as ``flwr_clientapp`` with the tag ``0.0.1``. Remember that these values are merely examples, and you can customize them according to your requirements. -#. Start the first SuperNode container: +#. Start the first ClientApp container: .. code-block:: bash $ docker run --rm \ --network flwr-network \ --detach \ - flwr_supernode:0.0.1 \ - --insecure \ - --superlink superlink:9092 \ - --node-config \ - partition-id=0,num-partitions=2 + flwr_clientapp:0.0.1 \ + --supernode supernode-1:9094 .. dropdown:: Understand the command @@ -168,35 +214,28 @@ building your own SuperNode image. * ``--rm``: Remove the container once it is stopped or the command exits. * ``--network flwr-network``: Make the container join the network named ``flwr-network``. * ``--detach``: Run the container in the background, freeing up the terminal. - * | ``flwr_supernode:0.0.1``: This is the name of the image to be run and the specific tag + * | ``flwr_clientapp:0.0.1``: This is the name of the image to be run and the specific tag | of the image. - * | ``--insecure``: This flag tells the container to operate in an insecure mode, allowing - | unencrypted communication. - * | ``--superlink superlink:9092``: Connect to the SuperLinks Fleet API on the address - | ``superlink:9092``. - * | ``--node-config partition-id=0,num-partitions=2``: Set the partition ID to ``0`` and the - | number of partitions to ``2`` for the SuperNode configuration. + * | ``--supernode supernode-1:9094``: Connect to the SuperNode's Fleet API at the address + | ``supernode-1:9094``. -#. Start the second SuperNode container: +#. Start the second ClientApp container: .. code-block:: shell $ docker run --rm \ --network flwr-network \ --detach \ - flwr_supernode:0.0.1 \ - --insecure \ - --superlink superlink:9092 \ - --node-config \ - partition-id=1,num-partitions=2 + flwr_clientapp:0.0.1 \ + --supernode supernode-2:9095 -Step 4: Start the SuperExec +Step 5: Start the SuperExec --------------------------- -The procedure for building and running a SuperExec image is almost identical to the SuperNode image. +The procedure for building and running a SuperExec image is almost identical to the ClientApp image. -Similar to the SuperNode image, the SuperExec Docker image comes with a pre-installed version of -Flower and serves as a base for building your own SuperExec image. +Similar to the ClientApp image, you will need to create a Dockerfile that extends the SuperExec +image and installs the required FAB dependencies. #. Create a SuperExec Dockerfile called ``Dockerfile.superexec`` and paste the following code in: @@ -254,8 +293,7 @@ Flower and serves as a base for building your own SuperExec image. --detach \ flwr_superexec:0.0.1 \ --insecure \ - --executor-config \ - superlink=\"superlink:9091\" + --executor-config superlink=\"superlink:9091\" .. dropdown:: Understand the command @@ -273,7 +311,7 @@ Flower and serves as a base for building your own SuperExec image. * | ``--executor-config superlink=\"superlink:9091\"``: Configure the SuperExec executor to | connect to the SuperLink running on port ``9091``. -Step 5: Run the Quickstart Project +Step 6: Run the Quickstart Project ---------------------------------- #. Add the following lines to the ``pyproject.toml``: @@ -297,7 +335,7 @@ Step 5: Run the Quickstart Project $ docker logs -f superexec -Step 6: Update the Application +Step 7: Update the Application ------------------------------ #. Change the application code. For example, change the ``seed`` in ``quickstart_docker/task.py`` @@ -310,39 +348,32 @@ Step 6: Update the Application partition_train_test = partition.train_test_split(test_size=0.2, seed=43) # ... -#. Stop the current SuperNode containers: +#. Stop the current ClientApp containers: .. code-block:: bash - $ docker stop $(docker ps -a -q --filter ancestor=flwr_supernode:0.0.1) + $ docker stop $(docker ps -a -q --filter ancestor=flwr_clientapp:0.0.1) -#. Rebuild the FAB and SuperNode image: +#. Rebuild the FAB and ClientApp image: .. code-block:: bash - $ flwr build - $ docker build -f Dockerfile.supernode -t flwr_supernode:0.0.1 . + $ docker build -f Dockerfile.clientapp -t flwr_clientapp:0.0.1 . -#. Launch two new SuperNode containers based on the newly built image: +#. Launch two new ClientApp containers based on the newly built image: .. code-block:: bash $ docker run --rm \ --network flwr-network \ --detach \ - flwr_supernode:0.0.1 \ - --insecure \ - --superlink superlink:9092 \ - --node-config \ - partition-id=0,num-partitions=2 + flwr_clientapp:0.0.1 \ + --supernode supernode-1:9094 $ docker run --rm \ --network flwr-network \ --detach \ - flwr_supernode:0.0.1 \ - --insecure \ - --superlink superlink:9092 \ - --node-config \ - partition-id=1,num-partitions=2 + flwr_clientapp:0.0.1 \ + --supernode supernode-2:9095 #. Run the updated project: @@ -350,14 +381,16 @@ Step 6: Update the Application $ flwr run . docker -Step 7: Clean Up +Step 8: Clean Up ---------------- Remove the containers and the bridge network: .. code-block:: bash - $ docker stop $(docker ps -a -q --filter ancestor=flwr_supernode:0.0.1) \ + $ docker stop $(docker ps -a -q --filter ancestor=flwr_clientapp:0.0.1) \ + supernode-1 \ + supernode-2 \ superexec \ superlink $ docker network rm flwr-network diff --git a/doc/source/example-jax-from-centralized-to-federated.rst b/doc/source/example-jax-from-centralized-to-federated.rst deleted file mode 100644 index 6b06a288a67a..000000000000 --- a/doc/source/example-jax-from-centralized-to-federated.rst +++ /dev/null @@ -1,282 +0,0 @@ -Example: JAX - Run JAX Federated -================================ - -This tutorial will show you how to use Flower to build a federated version of an existing JAX workload. -We are using JAX to train a linear regression model on a scikit-learn dataset. -We will structure the example similar to our `PyTorch - From Centralized To Federated `_ walkthrough. -First, we build a centralized training approach based on the `Linear Regression with JAX `_ tutorial`. -Then, we build upon the centralized training code to run the training in a federated fashion. - -Before we start building our JAX example, we need install the packages :code:`jax`, :code:`jaxlib`, :code:`scikit-learn`, and :code:`flwr`: - -.. code-block:: shell - - $ pip install jax jaxlib scikit-learn flwr - - -Linear Regression with JAX --------------------------- - -We begin with a brief description of the centralized training code based on a :code:`Linear Regression` model. -If you want a more in-depth explanation of what's going on then have a look at the official `JAX documentation `_. - -Let's create a new file called :code:`jax_training.py` with all the components required for a traditional (centralized) linear regression training. -First, the JAX packages :code:`jax` and :code:`jaxlib` need to be imported. In addition, we need to import :code:`sklearn` since we use :code:`make_regression` for the dataset and :code:`train_test_split` to split the dataset into a training and test set. -You can see that we do not yet import the :code:`flwr` package for federated learning. This will be done later. - -.. code-block:: python - - from typing import Dict, List, Tuple, Callable - import jax - import jax.numpy as jnp - from sklearn.datasets import make_regression - from sklearn.model_selection import train_test_split - - key = jax.random.PRNGKey(0) - -The :code:`load_data()` function loads the mentioned training and test sets. - -.. code-block:: python - - def load_data() -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray], List[np.ndarray]]: - # create our dataset and start with similar datasets for different clients - X, y = make_regression(n_features=3, random_state=0) - X, X_test, y, y_test = train_test_split(X, y) - return X, y, X_test, y_test - -The model architecture (a very simple :code:`Linear Regression` model) is defined in :code:`load_model()`. - -.. code-block:: python - - def load_model(model_shape) -> Dict: - # model weights - params = { - 'b' : jax.random.uniform(key), - 'w' : jax.random.uniform(key, model_shape) - } - return params - -We now need to define the training (function :code:`train()`), which loops over the training set and measures the loss (function :code:`loss_fn()`) for each batch of training examples. The loss function is separate since JAX takes derivatives with a :code:`grad()` function (defined in the :code:`main()` function and called in :code:`train()`). - -.. code-block:: python - - def loss_fn(params, X, y) -> Callable: - err = jnp.dot(X, params['w']) + params['b'] - y - return jnp.mean(jnp.square(err)) # mse - - def train(params, grad_fn, X, y) -> Tuple[np.array, float, int]: - num_examples = X.shape[0] - for epochs in range(10): - grads = grad_fn(params, X, y) - params = jax.tree_multimap(lambda p, g: p - 0.05 * g, params, grads) - loss = loss_fn(params,X, y) - # if epochs % 10 == 9: - # print(f'For Epoch {epochs} loss {loss}') - return params, loss, num_examples - -The evaluation of the model is defined in the function :code:`evaluation()`. The function takes all test examples and measures the loss of the linear regression model. - -.. code-block:: python - - def evaluation(params, grad_fn, X_test, y_test) -> Tuple[float, int]: - num_examples = X_test.shape[0] - err_test = loss_fn(params, X_test, y_test) - loss_test = jnp.mean(jnp.square(err_test)) - # print(f'Test loss {loss_test}') - return loss_test, num_examples - -Having defined the data loading, model architecture, training, and evaluation we can put everything together and train our model using JAX. As already mentioned, the :code:`jax.grad()` function is defined in :code:`main()` and passed to :code:`train()`. - -.. code-block:: python - - def main(): - X, y, X_test, y_test = load_data() - model_shape = X.shape[1:] - grad_fn = jax.grad(loss_fn) - print("Model Shape", model_shape) - params = load_model(model_shape) - params, loss, num_examples = train(params, grad_fn, X, y) - evaluation(params, grad_fn, X_test, y_test) - - - if __name__ == "__main__": - main() - -You can now run your (centralized) JAX linear regression workload: - -.. code-block:: python - - python3 jax_training.py - -So far this should all look fairly familiar if you've used JAX before. -Let's take the next step and use what we've built to create a simple federated learning system consisting of one server and two clients. - -JAX meets Flower ----------------- - -The concept of federating an existing workload is always the same and easy to understand. -We have to start a *server* and then use the code in :code:`jax_training.py` for the *clients* that are connected to the *server*. -The *server* sends model parameters to the clients. The *clients* run the training and update the parameters. -The updated parameters are sent back to the *server*, which averages all received parameter updates. -This describes one round of the federated learning process, and we repeat this for multiple rounds. - -Our example consists of one *server* and two *clients*. Let's set up :code:`server.py` first. The *server* needs to import the Flower package :code:`flwr`. -Next, we use the :code:`start_server` function to start a server and tell it to perform three rounds of federated learning. - -.. code-block:: python - - import flwr as fl - - if __name__ == "__main__": - fl.server.start_server(server_address="0.0.0.0:8080", config=fl.server.ServerConfig(num_rounds=3)) - -We can already start the *server*: - -.. code-block:: python - - python3 server.py - -Finally, we will define our *client* logic in :code:`client.py` and build upon the previously defined JAX training in :code:`jax_training.py`. -Our *client* needs to import :code:`flwr`, but also :code:`jax` and :code:`jaxlib` to update the parameters on our JAX model: - -.. code-block:: python - - from typing import Dict, List, Callable, Tuple - - import flwr as fl - import numpy as np - import jax - import jax.numpy as jnp - - import jax_training - - -Implementing a Flower *client* basically means implementing a subclass of either :code:`flwr.client.Client` or :code:`flwr.client.NumPyClient`. -Our implementation will be based on :code:`flwr.client.NumPyClient` and we'll call it :code:`FlowerClient`. -:code:`NumPyClient` is slightly easier to implement than :code:`Client` if you use a framework with good NumPy interoperability (like JAX) because it avoids some of the boilerplate that would otherwise be necessary. -:code:`FlowerClient` needs to implement four methods, two methods for getting/setting model parameters, one method for training the model, and one method for testing the model: - -#. :code:`set_parameters (optional)` - * set the model parameters on the local model that are received from the server - * transform parameters to NumPy :code:`ndarray`'s - * loop over the list of model parameters received as NumPy :code:`ndarray`'s (think list of neural network layers) -#. :code:`get_parameters` - * get the model parameters and return them as a list of NumPy :code:`ndarray`'s (which is what :code:`flwr.client.NumPyClient` expects) -#. :code:`fit` - * update the parameters of the local model with the parameters received from the server - * train the model on the local training set - * get the updated local model parameters and return them to the server -#. :code:`evaluate` - * update the parameters of the local model with the parameters received from the server - * evaluate the updated model on the local test set - * return the local loss to the server - -The challenging part is to transform the JAX model parameters from :code:`DeviceArray` to :code:`NumPy ndarray` to make them compatible with `NumPyClient`. - -The two :code:`NumPyClient` methods :code:`fit` and :code:`evaluate` make use of the functions :code:`train()` and :code:`evaluate()` previously defined in :code:`jax_training.py`. -So what we really do here is we tell Flower through our :code:`NumPyClient` subclass which of our already defined functions to call for training and evaluation. -We included type annotations to give you a better understanding of the data types that get passed around. - -.. code-block:: python - - - class FlowerClient(fl.client.NumPyClient): - """Flower client implementing using linear regression and JAX.""" - - def __init__( - self, - params: Dict, - grad_fn: Callable, - train_x: List[np.ndarray], - train_y: List[np.ndarray], - test_x: List[np.ndarray], - test_y: List[np.ndarray], - ) -> None: - self.params= params - self.grad_fn = grad_fn - self.train_x = train_x - self.train_y = train_y - self.test_x = test_x - self.test_y = test_y - - def get_parameters(self, config) -> Dict: - # Return model parameters as a list of NumPy ndarrays - parameter_value = [] - for _, val in self.params.items(): - parameter_value.append(np.array(val)) - return parameter_value - - def set_parameters(self, parameters: List[np.ndarray]) -> Dict: - # Collect model parameters and update the parameters of the local model - value=jnp.ndarray - params_item = list(zip(self.params.keys(),parameters)) - for item in params_item: - key = item[0] - value = item[1] - self.params[key] = value - return self.params - - - def fit( - self, parameters: List[np.ndarray], config: Dict - ) -> Tuple[List[np.ndarray], int, Dict]: - # Set model parameters, train model, return updated model parameters - print("Start local training") - self.params = self.set_parameters(parameters) - self.params, loss, num_examples = jax_training.train(self.params, self.grad_fn, self.train_x, self.train_y) - results = {"loss": float(loss)} - print("Training results", results) - return self.get_parameters(config={}), num_examples, results - - def evaluate( - self, parameters: List[np.ndarray], config: Dict - ) -> Tuple[float, int, Dict]: - # Set model parameters, evaluate the model on a local test dataset, return result - print("Start evaluation") - self.params = self.set_parameters(parameters) - loss, num_examples = jax_training.evaluation(self.params,self.grad_fn, self.test_x, self.test_y) - print("Evaluation accuracy & loss", loss) - return ( - float(loss), - num_examples, - {"loss": float(loss)}, - ) - -Having defined the federation process, we can run it. - -.. code-block:: python - - def main() -> None: - """Load data, start MNISTClient.""" - - # Load data - train_x, train_y, test_x, test_y = jax_training.load_data() - grad_fn = jax.grad(jax_training.loss_fn) - - # Load model (from centralized training) and initialize parameters - model_shape = train_x.shape[1:] - params = jax_training.load_model(model_shape) - - # Start Flower client - client = FlowerClient(params, grad_fn, train_x, train_y, test_x, test_y) - fl.client.start_client(server_address="0.0.0.0:8080", client.to_client()) - - if __name__ == "__main__": - main() - - -And that's it. You can now open two additional terminal windows and run - -.. code-block:: python - - python3 client.py - -in each window (make sure that the server is still running before you do so) and see your JAX project run federated learning across two clients. Congratulations! - -Next Steps ----------- - -The source code of this example was improved over time and can be found here: `Quickstart JAX `_. -Our example is somewhat over-simplified because both clients load the same dataset. - -You're now prepared to explore this topic further. How about using a more sophisticated model or using a different dataset? How about adding more clients? diff --git a/doc/source/index.rst b/doc/source/index.rst index 2a34693f7b26..4f6ad705e9bc 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -102,7 +102,6 @@ Problem-oriented how-to guides show step-by-step how to achieve a specific goal. :caption: Legacy example guides example-pytorch-from-centralized-to-federated - example-jax-from-centralized-to-federated example-fedbn-pytorch-from-centralized-to-federated Explanations diff --git a/doc/source/ref-changelog.md b/doc/source/ref-changelog.md index 531afb9ada52..7fcea7edc729 100644 --- a/doc/source/ref-changelog.md +++ b/doc/source/ref-changelog.md @@ -1,6 +1,108 @@ # Changelog -## Unreleased +## v1.11.0 (2024-08-30) + +### Thanks to our contributors + +We would like to give our special thanks to all the contributors who made the new version of Flower possible (in `git shortlog` order): + +`Adam Narozniak`, `Charles Beauville`, `Chong Shen Ng`, `Daniel J. Beutel`, `Daniel Nata Nugraha`, `Danny`, `Edoardo Gabrielli`, `Heng Pan`, `Javier`, `Meng Yan`, `Michal Danilowski`, `Mohammad Naseri`, `Robert Steiner`, `Steve Laskaridis`, `Taner Topal`, `Yan Gao` + +### What's new? + +- **Deliver Flower App Bundle (FAB) to SuperLink and SuperNodes** ([#4006](https://github.com/adap/flower/pull/4006), [#3945](https://github.com/adap/flower/pull/3945), [#3999](https://github.com/adap/flower/pull/3999), [#4027](https://github.com/adap/flower/pull/4027), [#3851](https://github.com/adap/flower/pull/3851), [#3946](https://github.com/adap/flower/pull/3946), [#4003](https://github.com/adap/flower/pull/4003), [#4029](https://github.com/adap/flower/pull/4029), [#3942](https://github.com/adap/flower/pull/3942), [#3957](https://github.com/adap/flower/pull/3957), [#4020](https://github.com/adap/flower/pull/4020), [#4044](https://github.com/adap/flower/pull/4044), [#3852](https://github.com/adap/flower/pull/3852), [#4019](https://github.com/adap/flower/pull/4019), [#4031](https://github.com/adap/flower/pull/4031), [#4036](https://github.com/adap/flower/pull/4036), [#4049](https://github.com/adap/flower/pull/4049), [#4017](https://github.com/adap/flower/pull/4017), [#3943](https://github.com/adap/flower/pull/3943), [#3944](https://github.com/adap/flower/pull/3944), [#4011](https://github.com/adap/flower/pull/4011), [#3619](https://github.com/adap/flower/pull/3619)) + + Dynamic code updates are here! `flwr run` can now ship and install the latest version of your `ServerApp` and `ClientApp` to an already-running federation (SuperLink and SuperNodes). + + How does it work? `flwr run` bundles your Flower app into a single FAB (Flower App Bundle) file. It then ships this FAB file, via the SuperExec, to both the SuperLink and those SuperNodes that need it. This allows you to keep SuperExec, SuperLink and SuperNodes running as permanent infrastructure, and then ship code updates (including completely new projects!) dynamically. + + `flwr run` is all you need. + +- **Introduce isolated** `ClientApp` **execution** ([#3970](https://github.com/adap/flower/pull/3970), [#3976](https://github.com/adap/flower/pull/3976), [#4002](https://github.com/adap/flower/pull/4002), [#4001](https://github.com/adap/flower/pull/4001), [#4034](https://github.com/adap/flower/pull/4034), [#4037](https://github.com/adap/flower/pull/4037), [#3977](https://github.com/adap/flower/pull/3977), [#4042](https://github.com/adap/flower/pull/4042), [#3978](https://github.com/adap/flower/pull/3978), [#4039](https://github.com/adap/flower/pull/4039), [#4033](https://github.com/adap/flower/pull/4033), [#3971](https://github.com/adap/flower/pull/3971), [#4035](https://github.com/adap/flower/pull/4035), [#3973](https://github.com/adap/flower/pull/3973), [#4032](https://github.com/adap/flower/pull/4032)) + + The SuperNode can now run your `ClientApp` in a fully isolated way. In an enterprise deployment, this allows you to set strict limits on what the `ClientApp` can and cannot do. + + `flower-supernode` supports three `--isolation` modes: + + - Unset: The SuperNode runs the `ClientApp` in the same process (as in previous versions of Flower). This is the default mode. + - `--isolation=subprocess`: The SuperNode starts a subprocess to run the `ClientApp`. + - `--isolation=process`: The SuperNode expects an externally-managed process to run the `ClientApp`. This external process is not managed by the SuperNode, so it has to be started beforehand and terminated manually. The common way to use this isolation mode is via the new `flwr/clientapp` Docker image. + +- **Improve Docker support for enterprise deployments** ([#4050](https://github.com/adap/flower/pull/4050), [#4090](https://github.com/adap/flower/pull/4090), [#3784](https://github.com/adap/flower/pull/3784), [#3998](https://github.com/adap/flower/pull/3998), [#4094](https://github.com/adap/flower/pull/4094), [#3722](https://github.com/adap/flower/pull/3722)) + + Flower 1.11 ships many Docker improvements that are especially useful for enterprise deployments: + + - `flwr/supernode` comes with a new Alpine Docker image. + - `flwr/clientapp` is a new image to be used with the `--isolation=process` option. In this mode, SuperNode and `ClientApp` run in two different Docker containers. `flwr/supernode` (preferably the Alpine version) runs the long-running SuperNode with `--isolation=process`. `flwr/clientapp` runs the `ClientApp`. This is the recommended way to deploy Flower in enterprise settings. + - New all-in-one Docker Compose enables you to easily start a full Flower Deployment Engine on a single machine. + - Completely new Docker documentation: https://flower.ai/docs/framework/docker/index.html + +- **Improve SuperNode authentication** ([#4043](https://github.com/adap/flower/pull/4043), [#4047](https://github.com/adap/flower/pull/4047), [#4074](https://github.com/adap/flower/pull/4074)) + + SuperNode auth has been improved in several ways, including improved logging, improved testing, and improved error handling. + +- **Update** `flwr new` **templates** ([#3933](https://github.com/adap/flower/pull/3933), [#3894](https://github.com/adap/flower/pull/3894), [#3930](https://github.com/adap/flower/pull/3930), [#3931](https://github.com/adap/flower/pull/3931), [#3997](https://github.com/adap/flower/pull/3997), [#3979](https://github.com/adap/flower/pull/3979), [#3965](https://github.com/adap/flower/pull/3965), [#4013](https://github.com/adap/flower/pull/4013), [#4064](https://github.com/adap/flower/pull/4064)) + + All `flwr new` templates have been updated to show the latest recommended use of Flower APIs. + +- **Improve Simulation Engine** ([#4095](https://github.com/adap/flower/pull/4095), [#3913](https://github.com/adap/flower/pull/3913), [#4059](https://github.com/adap/flower/pull/4059), [#3954](https://github.com/adap/flower/pull/3954), [#4071](https://github.com/adap/flower/pull/4071), [#3985](https://github.com/adap/flower/pull/3985), [#3988](https://github.com/adap/flower/pull/3988)) + + The Flower Simulation Engine comes with several updates, including improved run config support, verbose logging, simulation backend configuration via `flwr run`, and more. + +- **Improve** `RecordSet` ([#4052](https://github.com/adap/flower/pull/4052), [#3218](https://github.com/adap/flower/pull/3218), [#4016](https://github.com/adap/flower/pull/4016)) + + `RecordSet` is the core object to exchange model parameters, configuration values and metrics between `ClientApp` and `ServerApp`. This release ships several smaller improvements to `RecordSet` and related `*Record` types. + +- **Update documentation** ([#3972](https://github.com/adap/flower/pull/3972), [#3925](https://github.com/adap/flower/pull/3925), [#4061](https://github.com/adap/flower/pull/4061), [#3984](https://github.com/adap/flower/pull/3984), [#3917](https://github.com/adap/flower/pull/3917), [#3900](https://github.com/adap/flower/pull/3900), [#4066](https://github.com/adap/flower/pull/4066), [#3765](https://github.com/adap/flower/pull/3765), [#4021](https://github.com/adap/flower/pull/4021), [#3906](https://github.com/adap/flower/pull/3906), [#4063](https://github.com/adap/flower/pull/4063), [#4076](https://github.com/adap/flower/pull/4076), [#3920](https://github.com/adap/flower/pull/3920), [#3916](https://github.com/adap/flower/pull/3916)) + + Many parts of the documentation, including the main tutorial, have been migrated to show new Flower APIs and other new Flower features like the improved Docker support. + +- **Migrate code example to use new Flower APIs** ([#3758](https://github.com/adap/flower/pull/3758), [#3701](https://github.com/adap/flower/pull/3701), [#3919](https://github.com/adap/flower/pull/3919), [#3918](https://github.com/adap/flower/pull/3918), [#3934](https://github.com/adap/flower/pull/3934), [#3893](https://github.com/adap/flower/pull/3893), [#3833](https://github.com/adap/flower/pull/3833), [#3922](https://github.com/adap/flower/pull/3922), [#3846](https://github.com/adap/flower/pull/3846), [#3777](https://github.com/adap/flower/pull/3777), [#3874](https://github.com/adap/flower/pull/3874), [#3873](https://github.com/adap/flower/pull/3873), [#3935](https://github.com/adap/flower/pull/3935), [#3754](https://github.com/adap/flower/pull/3754), [#3980](https://github.com/adap/flower/pull/3980), [#4089](https://github.com/adap/flower/pull/4089), [#4046](https://github.com/adap/flower/pull/4046), [#3314](https://github.com/adap/flower/pull/3314), [#3316](https://github.com/adap/flower/pull/3316), [#3295](https://github.com/adap/flower/pull/3295), [#3313](https://github.com/adap/flower/pull/3313)) + + Many code examples have been migrated to use new Flower APIs. + +- **Update Flower framework, framework internals and quality infrastructure** ([#4018](https://github.com/adap/flower/pull/4018), [#4053](https://github.com/adap/flower/pull/4053), [#4098](https://github.com/adap/flower/pull/4098), [#4067](https://github.com/adap/flower/pull/4067), [#4105](https://github.com/adap/flower/pull/4105), [#4048](https://github.com/adap/flower/pull/4048), [#4107](https://github.com/adap/flower/pull/4107), [#4069](https://github.com/adap/flower/pull/4069), [#3915](https://github.com/adap/flower/pull/3915), [#4101](https://github.com/adap/flower/pull/4101), [#4108](https://github.com/adap/flower/pull/4108), [#3914](https://github.com/adap/flower/pull/3914), [#4068](https://github.com/adap/flower/pull/4068), [#4041](https://github.com/adap/flower/pull/4041), [#4040](https://github.com/adap/flower/pull/4040), [#3986](https://github.com/adap/flower/pull/3986), [#4026](https://github.com/adap/flower/pull/4026), [#3961](https://github.com/adap/flower/pull/3961), [#3975](https://github.com/adap/flower/pull/3975), [#3983](https://github.com/adap/flower/pull/3983), [#4091](https://github.com/adap/flower/pull/4091), [#3982](https://github.com/adap/flower/pull/3982), [#4079](https://github.com/adap/flower/pull/4079), [#4073](https://github.com/adap/flower/pull/4073), [#4060](https://github.com/adap/flower/pull/4060), [#4106](https://github.com/adap/flower/pull/4106), [#4080](https://github.com/adap/flower/pull/4080), [#3974](https://github.com/adap/flower/pull/3974), [#3996](https://github.com/adap/flower/pull/3996), [#3991](https://github.com/adap/flower/pull/3991), [#3981](https://github.com/adap/flower/pull/3981), [#4093](https://github.com/adap/flower/pull/4093), [#4100](https://github.com/adap/flower/pull/4100), [#3939](https://github.com/adap/flower/pull/3939), [#3955](https://github.com/adap/flower/pull/3955), [#3940](https://github.com/adap/flower/pull/3940), [#4038](https://github.com/adap/flower/pull/4038)) + + As always, many parts of the Flower framework and quality infrastructure were improved and updated. + +### Deprecations + +- **Deprecate accessing `Context` via `Client.context`** ([#3797](https://github.com/adap/flower/pull/3797)) + + Now that both `client_fn` and `server_fn` receive a `Context` object, accessing `Context` via `Client.context` is deprecated. `Client.context` will be removed in a future release. If you need to access `Context` in your `Client` implementation, pass it manually when creating the `Client` instance in `client_fn`: + + ```python + def client_fn(context: Context) -> Client: + return FlowerClient(context).to_client() + ``` + +### Incompatible changes + +- **Update CLIs to accept an app directory instead of** `ClientApp` **and** `ServerApp` ([#3952](https://github.com/adap/flower/pull/3952), [#4077](https://github.com/adap/flower/pull/4077), [#3850](https://github.com/adap/flower/pull/3850)) + + The CLI commands `flower-supernode` and `flower-server-app` now accept an app directory as argument (instead of references to a `ClientApp` or `ServerApp`). An app directory is any directory containing a `pyproject.toml` file (with the appropriate Flower config fields set). The easiest way to generate a compatible project structure is to use `flwr new`. + +- **Disable** `flower-client-app` **CLI command** ([#4022](https://github.com/adap/flower/pull/4022)) + + `flower-client-app` has been disabled. Use `flower-supernode` instead. + +- **Use spaces instead of commas for separating config args** ([#4000](https://github.com/adap/flower/pull/4000)) + + When passing configs (run config, node config) to Flower, you now need to separate key-value pairs using spaces instead of commas. For example: + + ```bash + flwr run . --run-config "learning-rate=0.01 num_rounds=10" # Works + ``` + + Previously, you could pass configs using commas, like this: + + ```bash + flwr run . --run-config "learning-rate=0.01,num_rounds=10" # Doesn't work + ``` + +- **Remove** `flwr example` **CLI command** ([#4084](https://github.com/adap/flower/pull/4084)) + + The experimental `flwr example` CLI command has been removed. Use `flwr new` to generate a project and then run it using `flwr run`. ## v1.10.0 (2024-07-24) diff --git a/doc/source/tutorial-quickstart-mlx.rst b/doc/source/tutorial-quickstart-mlx.rst index 0999bf44d3b7..675a08502d26 100644 --- a/doc/source/tutorial-quickstart-mlx.rst +++ b/doc/source/tutorial-quickstart-mlx.rst @@ -109,7 +109,7 @@ You can also override the parameters defined in .. code:: shell # Override some arguments - $ flwr run . --run-config num-server-rounds=5,lr=0.05 + $ flwr run . --run-config "num-server-rounds=5 lr=0.05" What follows is an explanation of each component in the project you just created: dataset partition, the model, defining the ``ClientApp`` and diff --git a/doc/source/tutorial-quickstart-pytorch.rst b/doc/source/tutorial-quickstart-pytorch.rst index 4515e8d0eeb5..d00b9efbe16b 100644 --- a/doc/source/tutorial-quickstart-pytorch.rst +++ b/doc/source/tutorial-quickstart-pytorch.rst @@ -108,7 +108,7 @@ You can also override the parameters defined in the .. code:: shell # Override some arguments - $ flwr run . --run-config num-server-rounds=5,local-epochs=3 + $ flwr run . --run-config "num-server-rounds=5 local-epochs=3" What follows is an explanation of each component in the project you just created: dataset partition, the model, defining the ``ClientApp`` and diff --git a/e2e/test_superlink.sh b/e2e/test_superlink.sh index 684f386bd388..2016f6da1933 100755 --- a/e2e/test_superlink.sh +++ b/e2e/test_superlink.sh @@ -2,7 +2,7 @@ set -e case "$1" in - e2e-bare-https) + e2e-bare-https | e2e-bare-auth) ./generate.sh server_arg="--ssl-ca-certfile certificates/ca.crt --ssl-certfile certificates/server.pem --ssl-keyfile certificates/server.key" client_arg="--root-certificates certificates/ca.crt" @@ -37,14 +37,11 @@ case "$2" in client_auth_2="" ;; client-auth) - ./generate.sh rest_arg_superlink="" rest_arg_supernode="" server_address="127.0.0.1:9092" server_app_address="127.0.0.1:9091" db_arg="--database :flwr-in-memory-state:" - server_arg="--ssl-ca-certfile certificates/ca.crt --ssl-certfile certificates/server.pem --ssl-keyfile certificates/server.key" - client_arg="--root-certificates certificates/ca.crt" server_auth="--auth-list-public-keys keys/client_public_keys.csv --auth-superlink-private-key keys/server_credentials --auth-superlink-public-key keys/server_credentials.pub" client_auth_1="--auth-supernode-private-key keys/client_credentials_1 --auth-supernode-public-key keys/client_credentials_1.pub" client_auth_2="--auth-supernode-private-key keys/client_credentials_2 --auth-supernode-public-key keys/client_credentials_2.pub" diff --git a/examples/doc/source/conf.py b/examples/doc/source/conf.py index 4e4b7b210051..3500d7f0b59c 100644 --- a/examples/doc/source/conf.py +++ b/examples/doc/source/conf.py @@ -29,7 +29,7 @@ author = "The Flower Authors" # The full version, including alpha/beta/rc tags -release = "1.11.0" +release = "1.12.0" # -- General configuration --------------------------------------------------- diff --git a/examples/federated-kaplan-meier-fitter/README.md b/examples/federated-kaplan-meier-fitter/README.md index 1964ec4e5653..cc68a331bbba 100644 --- a/examples/federated-kaplan-meier-fitter/README.md +++ b/examples/federated-kaplan-meier-fitter/README.md @@ -69,7 +69,7 @@ flwr run . You can also override some of the settings for your `ClientApp` and `ServerApp` defined in `pyproject.toml`. For example: ```bash -flwr run . --run-config num-server-rounds=5,learning-rate=0.05 +flwr run . --run-config "num-server-rounds=5 learning-rate=0.05" ``` You can also check that the results match the centralized version. diff --git a/examples/federated-kaplan-meier-fitter/pyproject.toml b/examples/federated-kaplan-meier-fitter/pyproject.toml index 47cb0a4ba286..159ccc15efe4 100644 --- a/examples/federated-kaplan-meier-fitter/pyproject.toml +++ b/examples/federated-kaplan-meier-fitter/pyproject.toml @@ -8,7 +8,7 @@ version = "1.0.0" description = "Federated Kaplan Meier Fitter with Flower" license = "Apache-2.0" dependencies = [ - "flwr[simulation]>=1.10.0", + "flwr[simulation]>=1.11.0", "flwr-datasets>=0.3.0", "numpy>=1.23.2", "pandas>=2.0.0", diff --git a/examples/fl-dp-sa/README.md b/examples/fl-dp-sa/README.md index 65c8a5b18fa8..61a6c80f3556 100644 --- a/examples/fl-dp-sa/README.md +++ b/examples/fl-dp-sa/README.md @@ -1,28 +1,63 @@ --- -tags: [basic, vision, fds] +tags: [DP, SecAgg, vision, fds] dataset: [MNIST] framework: [torch, torchvision] --- -# Example of Flower App with DP and SA +# Flower Example on MNIST with Differential Privacy and Secure Aggregation -This is a simple example that utilizes central differential privacy with client-side fixed clipping and secure aggregation. -Note: This example is designed for a small number of rounds and is intended for demonstration purposes. +This example demonstrates a federated learning setup using the Flower, incorporating central differential privacy (DP) with client-side fixed clipping and secure aggregation (SA). It is intended for a small number of rounds for demonstration purposes. -## Install dependencies +This example is similar to the [quickstart-pytorch example](https://github.com/adap/flower/tree/main/examples/quickstart-pytorch) and extends it by integrating central differential privacy and secure aggregation. For more details on differential privacy and secure aggregation in Flower, please refer to the documentation [here](https://flower.ai/docs/framework/how-to-use-differential-privacy.html) and [here](https://flower.ai/docs/framework/contributor-ref-secure-aggregation-protocols.html). -```bash -# Using pip -pip install . +## Set up the project + +### Clone the project + +Start by cloning the example project: + +```shell +git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/fl-dp-sa . && rm -rf flower && cd fl-dp-sa +``` + +This will create a new directory called `fl-dp-sa` containing the following files: -# Or using Poetry -poetry install +```shell +fl-dp-sa +├── fl_dp_sa +│ ├── client_app.py # Defines your ClientApp +│ ├── server_app.py # Defines your ServerApp +│ └── task.py # Defines your model, training, and data loading +├── pyproject.toml # Project metadata like dependencies and configs +└── README.md ``` -## Run +### Install dependencies and project -The example uses the MNIST dataset with a total of 100 clients, with 20 clients sampled in each round. The hyperparameters for DP and SecAgg are specified in `server.py`. +Install the dependencies defined in `pyproject.toml` as well as the `fl_dp_sa` package. ```shell -flower-simulation --server-app fl_dp_sa.server:app --client-app fl_dp_sa.client:app --num-supernodes 100 +# From a new python environment, run: +pip install -e . +``` + +## Run the project + +You can run your Flower project in both _simulation_ and _deployment_ mode without making changes to the code. If you are starting with Flower, we recommend you using the _simulation_ mode as it requires fewer components to be launched manually. By default, `flwr run` will make use of the Simulation Engine. + +### Run with the Simulation Engine + +```bash +flwr run . +``` + +You can also override some of the settings for your `ClientApp` and `ServerApp` defined in `pyproject.toml`. For example: + +```bash +flwr run . --run-config "noise-multiplier=0.1 clipping-norm=5" ``` + +### Run with the Deployment Engine + +> \[!NOTE\] +> An update to this example will show how to run this Flower project with the Deployment Engine and TLS certificates, or with Docker. diff --git a/examples/fl-dp-sa/fl_dp_sa/__init__.py b/examples/fl-dp-sa/fl_dp_sa/__init__.py index 741260348ab8..c5c9a7e9581c 100644 --- a/examples/fl-dp-sa/fl_dp_sa/__init__.py +++ b/examples/fl-dp-sa/fl_dp_sa/__init__.py @@ -1 +1 @@ -"""fl_dp_sa: A Flower / PyTorch app.""" +"""fl_dp_sa: Flower Example using Differential Privacy and Secure Aggregation.""" diff --git a/examples/fl-dp-sa/fl_dp_sa/client.py b/examples/fl-dp-sa/fl_dp_sa/client.py deleted file mode 100644 index b3b02c6e9d61..000000000000 --- a/examples/fl-dp-sa/fl_dp_sa/client.py +++ /dev/null @@ -1,42 +0,0 @@ -"""fl_dp_sa: A Flower / PyTorch app.""" - -from flwr.client import ClientApp, NumPyClient -from flwr.client.mod import fixedclipping_mod, secaggplus_mod - -from fl_dp_sa.task import DEVICE, Net, get_weights, load_data, set_weights, test, train - -# Load model and data (simple CNN, CIFAR-10) -net = Net().to(DEVICE) - - -# Define FlowerClient and client_fn -class FlowerClient(NumPyClient): - def __init__(self, trainloader, testloader) -> None: - self.trainloader = trainloader - self.testloader = testloader - - def fit(self, parameters, config): - set_weights(net, parameters) - results = train(net, self.trainloader, self.testloader, epochs=1, device=DEVICE) - return get_weights(net), len(self.trainloader.dataset), results - - def evaluate(self, parameters, config): - set_weights(net, parameters) - loss, accuracy = test(net, self.testloader) - return loss, len(self.testloader.dataset), {"accuracy": accuracy} - - -def client_fn(cid: str): - """Create and return an instance of Flower `Client`.""" - trainloader, testloader = load_data(partition_id=int(cid)) - return FlowerClient(trainloader, testloader).to_client() - - -# Flower ClientApp -app = ClientApp( - client_fn=client_fn, - mods=[ - secaggplus_mod, - fixedclipping_mod, - ], -) diff --git a/examples/fl-dp-sa/fl_dp_sa/client_app.py b/examples/fl-dp-sa/fl_dp_sa/client_app.py new file mode 100644 index 000000000000..5630d4f4d14f --- /dev/null +++ b/examples/fl-dp-sa/fl_dp_sa/client_app.py @@ -0,0 +1,50 @@ +"""fl_dp_sa: Flower Example using Differential Privacy and Secure Aggregation.""" + +import torch +from flwr.client import ClientApp, NumPyClient +from flwr.common import Context +from flwr.client.mod import fixedclipping_mod, secaggplus_mod + +from fl_dp_sa.task import Net, get_weights, load_data, set_weights, test, train + + +class FlowerClient(NumPyClient): + def __init__(self, trainloader, testloader) -> None: + self.net = Net() + self.trainloader = trainloader + self.testloader = testloader + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + def fit(self, parameters, config): + set_weights(self.net, parameters) + results = train( + self.net, + self.trainloader, + self.testloader, + epochs=1, + device=self.device, + ) + return get_weights(self.net), len(self.trainloader.dataset), results + + def evaluate(self, parameters, config): + set_weights(self.net, parameters) + loss, accuracy = test(self.net, self.testloader, self.device) + return loss, len(self.testloader.dataset), {"accuracy": accuracy} + + +def client_fn(context: Context): + partition_id = context.node_config["partition-id"] + trainloader, testloader = load_data( + partition_id=partition_id, num_partitions=context.node_config["num-partitions"] + ) + return FlowerClient(trainloader, testloader).to_client() + + +# Flower ClientApp +app = ClientApp( + client_fn=client_fn, + mods=[ + secaggplus_mod, + fixedclipping_mod, + ], +) diff --git a/examples/fl-dp-sa/fl_dp_sa/server.py b/examples/fl-dp-sa/fl_dp_sa/server_app.py similarity index 56% rename from examples/fl-dp-sa/fl_dp_sa/server.py rename to examples/fl-dp-sa/fl_dp_sa/server_app.py index 3ec0ba757b0d..1704b4942ff8 100644 --- a/examples/fl-dp-sa/fl_dp_sa/server.py +++ b/examples/fl-dp-sa/fl_dp_sa/server_app.py @@ -1,20 +1,22 @@ -"""fl_dp_sa: A Flower / PyTorch app.""" +"""fl_dp_sa: Flower Example using Differential Privacy and Secure Aggregation.""" from typing import List, Tuple from flwr.common import Context, Metrics, ndarrays_to_parameters -from flwr.server import Driver, LegacyContext, ServerApp, ServerConfig +from flwr.server import ( + Driver, + LegacyContext, + ServerApp, + ServerConfig, +) from flwr.server.strategy import DifferentialPrivacyClientSideFixedClipping, FedAvg from flwr.server.workflow import DefaultWorkflow, SecAggPlusWorkflow from fl_dp_sa.task import Net, get_weights -# Define metric aggregation function def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: examples = [num_examples for num_examples, _ in metrics] - - # Multiply accuracy of each client by number of examples used train_losses = [num_examples * m["train_loss"] for num_examples, m in metrics] train_accuracies = [ num_examples * m["train_accuracy"] for num_examples, m in metrics @@ -22,7 +24,6 @@ def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: val_losses = [num_examples * m["val_loss"] for num_examples, m in metrics] val_accuracies = [num_examples * m["val_accuracy"] for num_examples, m in metrics] - # Aggregate and return custom metric (weighted average) return { "train_loss": sum(train_losses) / sum(examples), "train_accuracy": sum(train_accuracies) / sum(examples), @@ -31,30 +32,36 @@ def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics: } -# Initialize model parameters -ndarrays = get_weights(Net()) -parameters = ndarrays_to_parameters(ndarrays) +app = ServerApp() -# Define strategy -strategy = FedAvg( - fraction_fit=0.2, - fraction_evaluate=0.0, # Disable evaluation for demo purpose - min_fit_clients=20, - min_available_clients=20, - fit_metrics_aggregation_fn=weighted_average, - initial_parameters=parameters, -) -strategy = DifferentialPrivacyClientSideFixedClipping( - strategy, noise_multiplier=0.2, clipping_norm=10, num_sampled_clients=20 -) +@app.main() +def main(driver: Driver, context: Context) -> None: + # Initialize global model + model_weights = get_weights(Net()) + parameters = ndarrays_to_parameters(model_weights) + + # Note: The fraction_fit value is configured based on the DP hyperparameter `num-sampled-clients`. + strategy = FedAvg( + fraction_fit=0.2, + fraction_evaluate=0.0, + min_fit_clients=20, + fit_metrics_aggregation_fn=weighted_average, + initial_parameters=parameters, + ) -app = ServerApp() + noise_multiplier = context.run_config["noise-multiplier"] + clipping_norm = context.run_config["clipping-norm"] + num_sampled_clients = context.run_config["num-sampled-clients"] + strategy = DifferentialPrivacyClientSideFixedClipping( + strategy, + noise_multiplier=noise_multiplier, + clipping_norm=clipping_norm, + num_sampled_clients=num_sampled_clients, + ) -@app.main() -def main(driver: Driver, context: Context) -> None: # Construct the LegacyContext context = LegacyContext( context=context, @@ -65,8 +72,8 @@ def main(driver: Driver, context: Context) -> None: # Create the train/evaluate workflow workflow = DefaultWorkflow( fit_workflow=SecAggPlusWorkflow( - num_shares=7, - reconstruction_threshold=4, + num_shares=context.run_config["num-shares"], + reconstruction_threshold=context.run_config["reconstruction-threshold"], ) ) diff --git a/examples/fl-dp-sa/fl_dp_sa/task.py b/examples/fl-dp-sa/fl_dp_sa/task.py index 5b4fd7dee592..c145cebe1378 100644 --- a/examples/fl-dp-sa/fl_dp_sa/task.py +++ b/examples/fl-dp-sa/fl_dp_sa/task.py @@ -1,24 +1,22 @@ -"""fl_dp_sa: A Flower / PyTorch app.""" +"""fl_dp_sa: Flower Example using Differential Privacy and Secure Aggregation.""" from collections import OrderedDict -from logging import INFO import torch import torch.nn as nn import torch.nn.functional as F -from flwr.common.logger import log from flwr_datasets import FederatedDataset +from flwr_datasets.partitioner import IidPartitioner from torch.utils.data import DataLoader from torchvision.transforms import Compose, Normalize, ToTensor -DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") +fds = None # Cache FederatedDataset -class Net(nn.Module): - """Model.""" +class Net(nn.Module): def __init__(self) -> None: - super(Net, self).__init__() + super().__init__() self.conv1 = nn.Conv2d(1, 6, 3, padding=1) self.pool = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(6, 16, 5) @@ -36,9 +34,16 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return self.fc3(x) -def load_data(partition_id): +def load_data(partition_id: int, num_partitions: int): """Load partition MNIST data.""" - fds = FederatedDataset(dataset="mnist", partitioners={"train": 100}) + + global fds + if fds is None: + partitioner = IidPartitioner(num_partitions=num_partitions) + fds = FederatedDataset( + dataset="ylecun/mnist", + partitioners={"train": partitioner}, + ) partition = fds.load_partition(partition_id) # Divide data on each node: 80% train, 20% test partition_train_test = partition.train_test_split(test_size=0.2, seed=42) @@ -70,8 +75,8 @@ def train(net, trainloader, valloader, epochs, device): loss.backward() optimizer.step() - train_loss, train_acc = test(net, trainloader) - val_loss, val_acc = test(net, valloader) + train_loss, train_acc = test(net, trainloader, device) + val_loss, val_acc = test(net, valloader, device) results = { "train_loss": train_loss, @@ -82,17 +87,17 @@ def train(net, trainloader, valloader, epochs, device): return results -def test(net, testloader): +def test(net, testloader, device): """Validate the model on the test set.""" - net.to(DEVICE) + net.to(device) criterion = torch.nn.CrossEntropyLoss() correct, loss = 0, 0.0 with torch.no_grad(): for batch in testloader: - images = batch["image"].to(DEVICE) - labels = batch["label"].to(DEVICE) - outputs = net(images.to(DEVICE)) - labels = labels.to(DEVICE) + images = batch["image"].to(device) + labels = batch["label"].to(device) + outputs = net(images.to(device)) + labels = labels.to(device) loss += criterion(outputs, labels).item() correct += (torch.max(outputs.data, 1)[1] == labels).sum().item() accuracy = correct / len(testloader.dataset) diff --git a/examples/fl-dp-sa/flower.toml b/examples/fl-dp-sa/flower.toml deleted file mode 100644 index ea2e98206791..000000000000 --- a/examples/fl-dp-sa/flower.toml +++ /dev/null @@ -1,13 +0,0 @@ -[project] -name = "fl_dp_sa" -version = "1.0.0" -description = "" -license = "Apache-2.0" -authors = [ - "The Flower Authors ", -] -readme = "README.md" - -[flower.components] -serverapp = "fl_dp_sa.server:app" -clientapp = "fl_dp_sa.client:app" diff --git a/examples/fl-dp-sa/pyproject.toml b/examples/fl-dp-sa/pyproject.toml index 1ca343b072d9..fbb463cc1c05 100644 --- a/examples/fl-dp-sa/pyproject.toml +++ b/examples/fl-dp-sa/pyproject.toml @@ -1,21 +1,40 @@ [build-system] -requires = ["poetry-core>=1.4.0"] -build-backend = "poetry.core.masonry.api" +requires = ["hatchling"] +build-backend = "hatchling.build" -[tool.poetry] +[project] name = "fl-dp-sa" -version = "0.1.0" -description = "" +version = "1.0.0" +description = "Central Differential Privacy and Secure Aggregation in Flower" license = "Apache-2.0" -authors = [ - "The Flower Authors ", +dependencies = [ + "flwr[simulation]>=1.11.0", + "flwr-datasets[vision]>=0.3.0", + "torch==2.2.1", + "torchvision==0.17.1", ] -readme = "README.md" -[tool.poetry.dependencies] -python = "^3.9" -# Mandatory dependencies -flwr = { version = "^1.8.0", extras = ["simulation"] } -flwr-datasets = { version = "0.0.2", extras = ["vision"] } -torch = "2.2.1" -torchvision = "0.17.1" +[tool.hatch.build.targets.wheel] +packages = ["."] + +[tool.flwr.app] +publisher = "flwrlabs" + +[tool.flwr.app.components] +serverapp = "fl_dp_sa.server_app:app" +clientapp = "fl_dp_sa.client_app:app" + +[tool.flwr.app.config] +# Parameters for the DP +noise-multiplier = 0.2 +clipping-norm = 10 +num-sampled-clients = 20 +# Parameters for the SecAgg+ protocol +num-shares = 7 +reconstruction-threshold = 4 + +[tool.flwr.federations] +default = "local-simulation" + +[tool.flwr.federations.local-simulation] +options.num-supernodes = 100 \ No newline at end of file diff --git a/examples/fl-dp-sa/requirements.txt b/examples/fl-dp-sa/requirements.txt deleted file mode 100644 index f20b9d71e339..000000000000 --- a/examples/fl-dp-sa/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -flwr[simulation]>=1.8.0 -flwr-datasets[vision]==0.0.2 -torch==2.2.1 -torchvision==0.17.1 diff --git a/examples/flower-secure-aggregation/README.md b/examples/flower-secure-aggregation/README.md index 9e92aed01d9e..0a9056263db3 100644 --- a/examples/flower-secure-aggregation/README.md +++ b/examples/flower-secure-aggregation/README.md @@ -57,7 +57,7 @@ flwr run . You can also override some of the settings for your `ClientApp` and `ServerApp` defined in `pyproject.toml`. For example ```bash -flwr run . --run-config num-server-rounds=5,learning-rate=0.25 +flwr run . --run-config "num-server-rounds=5 learning-rate=0.25" ``` To adapt the example for a practial usage, set `is-demo=false` like shown below. You might want to adjust the `num-shares` and `reconstruction-threshold` settings to suit your requirements. You can override those via `--run-config` as well. diff --git a/examples/flower-secure-aggregation/pyproject.toml b/examples/flower-secure-aggregation/pyproject.toml index d9be719653b0..6ac94253e839 100644 --- a/examples/flower-secure-aggregation/pyproject.toml +++ b/examples/flower-secure-aggregation/pyproject.toml @@ -8,7 +8,7 @@ version = "1.0.0" description = "Secure Aggregation in Flower" license = "Apache-2.0" dependencies = [ - "flwr[simulation]>=1.10.0", + "flwr[simulation]>=1.11.0", "flwr-datasets[vision]>=0.3.0", "torch==2.2.1", "torchvision==0.17.1", diff --git a/examples/flowertune-llm/pyproject.toml b/examples/flowertune-llm/pyproject.toml index 8171d7680620..20aa7267d9d5 100644 --- a/examples/flowertune-llm/pyproject.toml +++ b/examples/flowertune-llm/pyproject.toml @@ -8,7 +8,7 @@ version = "1.0.0" description = "FlowerTune LLM: Federated LLM Fine-tuning with Flower" license = "Apache-2.0" dependencies = [ - "flwr-nightly[simulation]==1.11.0.dev20240826", + "flwr[simulation]==1.11.0", "flwr-datasets>=0.3.0", "trl==0.8.1", "bitsandbytes==0.43.0", diff --git a/examples/flowertune-vit/README.md b/examples/flowertune-vit/README.md index 9e2b0fd6b079..48327880f412 100644 --- a/examples/flowertune-vit/README.md +++ b/examples/flowertune-vit/README.md @@ -59,7 +59,7 @@ flwr run . You can also override some of the settings for your `ClientApp` and `ServerApp` defined in `pyproject.toml`. For example: ```bash -flwr run . --run-config num-server-rounds=5,batch-size=64 +flwr run . --run-config "num-server-rounds=5 batch-size=64" ``` Run the project in the `local-simulation-gpu` federation that gives CPU and GPU resources to each `ClientApp`. By default, at most 5x`ClientApp` will run in parallel in the available GPU. You can tweak the degree of parallelism by adjusting the settings of this federation in the `pyproject.toml`. diff --git a/examples/flowertune-vit/pyproject.toml b/examples/flowertune-vit/pyproject.toml index 0f11dc54c81a..d0feabc14212 100644 --- a/examples/flowertune-vit/pyproject.toml +++ b/examples/flowertune-vit/pyproject.toml @@ -8,7 +8,7 @@ version = "1.0.0" description = "Federated Finetuning of a Vision Transformer with Flower" license = "Apache-2.0" dependencies = [ - "flwr-nightly[simulation]==1.11.0.dev20240823", + "flwr[simulation]==1.11.0", "flwr-datasets[vision]>=0.3.0", "torch==2.2.1", "torchvision==0.17.1", diff --git a/examples/quickstart-fastai/pyproject.toml b/examples/quickstart-fastai/pyproject.toml index 4d160bae0eec..25219ffcac4c 100644 --- a/examples/quickstart-fastai/pyproject.toml +++ b/examples/quickstart-fastai/pyproject.toml @@ -8,7 +8,7 @@ version = "1.0.0" description = "Federated Learning with Fastai and Flower (Quickstart Example)" license = "Apache-2.0" dependencies = [ - "flwr[simulation]>=1.10.0", + "flwr[simulation]>=1.11.0", "flwr-datasets[vision]>=0.3.0", "fastai==2.7.14", "torch==2.2.0", diff --git a/examples/quickstart-huggingface/pyproject.toml b/examples/quickstart-huggingface/pyproject.toml index af48b2429635..696f05b33ebf 100644 --- a/examples/quickstart-huggingface/pyproject.toml +++ b/examples/quickstart-huggingface/pyproject.toml @@ -12,7 +12,7 @@ authors = [ { name = "Kaushik Amar Das", email = "kaushik.das@iiitg.ac.in" }, ] dependencies = [ - "flwr-nightly[simulation]==1.11.0.dev20240823", + "flwr[simulation]==1.11.0", "flwr-datasets>=0.3.0", "torch==2.4.0", "transformers>=4.30.0,<5.0", diff --git a/examples/quickstart-mlx/README.md b/examples/quickstart-mlx/README.md index 95b9ccf605b5..ef28c3728279 100644 --- a/examples/quickstart-mlx/README.md +++ b/examples/quickstart-mlx/README.md @@ -58,7 +58,7 @@ flwr run . You can also override some of the settings for your `ClientApp` and `ServerApp` defined in `pyproject.toml`. For example: ```bash -flwr run . --run-config num-server-rounds=5,learning-rate=0.05 +flwr run . --run-config "num-server-rounds=5 learning-rate=0.05" ``` > \[!TIP\] diff --git a/examples/quickstart-mlx/pyproject.toml b/examples/quickstart-mlx/pyproject.toml index 36e39bcd6d78..459cac86f5d6 100644 --- a/examples/quickstart-mlx/pyproject.toml +++ b/examples/quickstart-mlx/pyproject.toml @@ -8,7 +8,7 @@ version = "1.0.0" description = "Federated Learning with MLX and Flower (Quickstart Example)" license = "Apache-2.0" dependencies = [ - "flwr[simulation]>=1.10.0", + "flwr[simulation]>=1.11.0", "flwr-datasets[vision]>=0.3.0", "mlx==0.16.0", "numpy==1.26.4", diff --git a/examples/quickstart-monai/README.md b/examples/quickstart-monai/README.md index c470a6a6c86f..8189a8e98406 100644 --- a/examples/quickstart-monai/README.md +++ b/examples/quickstart-monai/README.md @@ -70,7 +70,7 @@ flwr run . local-simulation-gpu You can also override some of the settings for your `ClientApp` and `ServerApp` defined in `pyproject.toml`. For example: ```bash -flwr run . --run-config num-server-rounds=5,batch-size=32 +flwr run . --run-config "num-server-rounds=5 batch-size=32" ``` ### Run with the Deployment Engine diff --git a/examples/quickstart-monai/pyproject.toml b/examples/quickstart-monai/pyproject.toml index 6ecf5011d24f..daa92fc0387d 100644 --- a/examples/quickstart-monai/pyproject.toml +++ b/examples/quickstart-monai/pyproject.toml @@ -8,7 +8,7 @@ version = "1.0.0" description = "Federated Learning with MONAI and Flower (Quickstart Example)" license = "Apache-2.0" dependencies = [ - "flwr-nightly[simulation]==1.11.0.dev20240823", + "flwr[simulation]==1.11.0", "flwr-datasets[vision]>=0.3.0", "monai==1.3.2", "filelock==3.15.4", diff --git a/examples/quickstart-pytorch-lightning/README.md b/examples/quickstart-pytorch-lightning/README.md index e520be856962..0aa34db9af75 100644 --- a/examples/quickstart-pytorch-lightning/README.md +++ b/examples/quickstart-pytorch-lightning/README.md @@ -52,7 +52,7 @@ flwr run . You can also override some of the settings for your `ClientApp` and `ServerApp` defined in `pyproject.toml`. For example: ```bash -flwr run . --run-config num-server-rounds=5,max-epochs=2 +flwr run . --run-config "num-server-rounds=5 max-epochs=2" ``` ### Run with the Deployment Engine diff --git a/examples/quickstart-pytorch-lightning/pyproject.toml b/examples/quickstart-pytorch-lightning/pyproject.toml index 482fc1356527..c5537ac6fcbe 100644 --- a/examples/quickstart-pytorch-lightning/pyproject.toml +++ b/examples/quickstart-pytorch-lightning/pyproject.toml @@ -8,7 +8,7 @@ version = "1.0.0" description = "Federated Learning with PyTorch Lightning and Flower (Quickstart Example)" license = "Apache-2.0" dependencies = [ - "flwr[simulation]>=1.10.0", + "flwr[simulation]>=1.11.0", "flwr-datasets[vision]>=0.3.0", "pytorch-lightning<2.0.0; sys_platform == 'darwin'", "pytorch-lightning==1.6.0; sys_platform != 'darwin'", diff --git a/examples/quickstart-pytorch/README.md b/examples/quickstart-pytorch/README.md index e37d49194b01..d07f83a7ea85 100644 --- a/examples/quickstart-pytorch/README.md +++ b/examples/quickstart-pytorch/README.md @@ -55,7 +55,7 @@ flwr run . You can also override some of the settings for your `ClientApp` and `ServerApp` defined in `pyproject.toml`. For example: ```bash -flwr run . --run-config num-server-rounds=5,learning-rate=0.05 +flwr run . --run-config "num-server-rounds=5 learning-rate=0.05" ``` > \[!TIP\] diff --git a/examples/quickstart-pytorch/pyproject.toml b/examples/quickstart-pytorch/pyproject.toml index 29414962ba6b..98f02626a429 100644 --- a/examples/quickstart-pytorch/pyproject.toml +++ b/examples/quickstart-pytorch/pyproject.toml @@ -8,7 +8,7 @@ version = "1.0.0" description = "Federated Learning with PyTorch and Flower (Quickstart Example)" license = "Apache-2.0" dependencies = [ - "flwr[simulation]>=1.10.0", + "flwr[simulation]>=1.11.0", "flwr-datasets[vision]>=0.3.0", "torch==2.2.1", "torchvision==0.17.1", diff --git a/examples/quickstart-tensorflow/README.md b/examples/quickstart-tensorflow/README.md index f1fa12a3393c..a162e756d799 100644 --- a/examples/quickstart-tensorflow/README.md +++ b/examples/quickstart-tensorflow/README.md @@ -56,7 +56,7 @@ flwr run . You can also override some of the settings for your `ClientApp` and `ServerApp` defined in `pyproject.toml`. For example: ```bash -flwr run . --run-config num-server-rounds=5,learning-rate=0.05 +flwr run . --run-config "num-server-rounds=5 learning-rate=0.05" ``` > \[!TIP\] diff --git a/examples/sklearn-logreg-mnist/README.md b/examples/sklearn-logreg-mnist/README.md index b56dbfc5dd3a..7c75e2ecfb85 100644 --- a/examples/sklearn-logreg-mnist/README.md +++ b/examples/sklearn-logreg-mnist/README.md @@ -55,7 +55,7 @@ flwr run . You can also override some of the settings for your `ClientApp` and `ServerApp` defined in `pyproject.toml`. For example: ```bash -flwr run . --run-config num-server-rounds=5,fraction-fit=0.25 +flwr run . --run-config "num-server-rounds=5 fraction-fit=0.25" ``` > \[!TIP\] diff --git a/examples/sklearn-logreg-mnist/pyproject.toml b/examples/sklearn-logreg-mnist/pyproject.toml index be1e4810b312..937f05e35eda 100644 --- a/examples/sklearn-logreg-mnist/pyproject.toml +++ b/examples/sklearn-logreg-mnist/pyproject.toml @@ -12,7 +12,7 @@ authors = [ { name = "Kaushik Amar Das", email = "kaushik.das@iiitg.ac.in" }, ] dependencies = [ - "flwr[simulation]>=1.10.0", + "flwr[simulation]>=1.11.0", "flwr-datasets[vision]>=0.3.0", "numpy<2.0.0", "scikit-learn~=1.2.2", diff --git a/examples/xgboost-quickstart/README.md b/examples/xgboost-quickstart/README.md index fa3e9d0dc6fb..a7b047c090f0 100644 --- a/examples/xgboost-quickstart/README.md +++ b/examples/xgboost-quickstart/README.md @@ -4,7 +4,7 @@ dataset: [HIGGS] framework: [xgboost] --- -# Flower Example using XGBoost +# Federated Learning with XGBoost and Flower (Quickstart Example) This example demonstrates how to perform EXtreme Gradient Boosting (XGBoost) within Flower using `xgboost` package. We use [HIGGS](https://archive.ics.uci.edu/dataset/280/higgs) dataset for this example to perform a binary classification task. @@ -12,72 +12,60 @@ Tree-based with bagging method is used for aggregation on the server. This project provides a minimal code example to enable you to get started quickly. For a more comprehensive code example, take a look at [xgboost-comprehensive](https://github.com/adap/flower/tree/main/examples/xgboost-comprehensive). -## Project Setup +## Set up the project -Start by cloning the example project. We prepared a single-line command that you can copy into your shell which will checkout the example for you: +### Clone the project -```shell -git clone --depth=1 https://github.com/adap/flower.git && mv flower/examples/xgboost-quickstart . && rm -rf flower && cd xgboost-quickstart -``` - -This will create a new directory called `xgboost-quickstart` containing the following files: - -``` --- README.md <- Your're reading this right now --- server.py <- Defines the server-side logic --- client.py <- Defines the client-side logic --- run.sh <- Commands to run experiments --- pyproject.toml <- Example dependencies -``` - -### Installing Dependencies - -Project dependencies (such as `xgboost` and `flwr`) are defined in `pyproject.toml`. You can install the dependencies by invoking `pip`: +Start by cloning the example project: ```shell -# From a new python environment, run: -pip install . +git clone --depth=1 https://github.com/adap/flower.git _tmp \ + && mv _tmp/examples/xgboost-quickstart . \ + && rm -rf _tmp \ + && cd xgboost-quickstart ``` -Then, to verify that everything works correctly you can run the following command: +This will create a new directory called `xgboost-quickstart` with the following structure: ```shell -python3 -c "import flwr" +xgboost-quickstart +├── xgboost_quickstart +│ ├── __init__.py +│ ├── client_app.py # Defines your ClientApp +│ ├── server_app.py # Defines your ServerApp +│ └── task.py # Defines your utilities and data loading +├── pyproject.toml # Project metadata like dependencies and configs +└── README.md ``` -If you don't see any errors you're good to go! +### Install dependencies and project -## Run Federated Learning with XGBoost and Flower +Install the dependencies defined in `pyproject.toml` as well as the `xgboost_quickstart` package. -Afterwards you are ready to start the Flower server as well as the clients. -You can simply start the server in a terminal as follows: - -```shell -python3 server.py +```bash +pip install -e . ``` -Now you are ready to start the Flower clients which will participate in the learning. -To do so simply open two more terminal windows and run the following commands. +## Run the project -Start client 1 in the first terminal: +You can run your Flower project in both _simulation_ and _deployment_ mode without making changes to the code. If you are starting with Flower, we recommend you using the _simulation_ mode as it requires fewer components to be launched manually. By default, `flwr run` will make use of the Simulation Engine. -```shell -python3 client.py --partition-id=0 +### Run with the Simulation Engine + +```bash +flwr run . ``` -Start client 2 in the second terminal: +You can also override some of the settings for your `ClientApp` and `ServerApp` defined in `pyproject.toml`. For example: -```shell -python3 client.py --partition-id=1 +```bash +flwr run . --run-config "num-server-rounds=5 params.eta=0.05" ``` -You will see that XGBoost is starting a federated training. - -Alternatively, you can use `run.sh` to run the same experiment in a single terminal as follows: +> \[!TIP\] +> For a more detailed walk-through check our [quickstart XGBoost tutorial](https://flower.ai/docs/framework/tutorial-quickstart-xgboost.html) -```shell -poetry run ./run.sh -``` +### Run with the Deployment Engine -Look at the [code](https://github.com/adap/flower/tree/main/examples/xgboost-quickstart) -and [tutorial](https://flower.ai/docs/framework/tutorial-quickstart-xgboost.html) for a detailed explanation. +> \[!NOTE\] +> An update to this example will show how to run this Flower application with the Deployment Engine and TLS certificates, or with Docker. diff --git a/examples/xgboost-quickstart/client.py b/examples/xgboost-quickstart/client.py deleted file mode 100644 index d505a7ede785..000000000000 --- a/examples/xgboost-quickstart/client.py +++ /dev/null @@ -1,207 +0,0 @@ -import argparse -import warnings -from logging import INFO -from typing import Union - -import flwr as fl -import xgboost as xgb -from datasets import Dataset, DatasetDict -from flwr.common import ( - Code, - EvaluateIns, - EvaluateRes, - FitIns, - FitRes, - GetParametersIns, - GetParametersRes, - Parameters, - Status, -) -from flwr.common.logger import log -from flwr_datasets import FederatedDataset -from flwr_datasets.partitioner import IidPartitioner - -warnings.filterwarnings("ignore", category=UserWarning) - -# Define arguments parser for the client/partition ID. -parser = argparse.ArgumentParser() -parser.add_argument( - "--partition-id", - default=0, - type=int, - help="Partition ID used for the current client.", -) -args = parser.parse_args() - - -# Define data partitioning related functions -def train_test_split(partition: Dataset, test_fraction: float, seed: int): - """Split the data into train and validation set given split rate.""" - train_test = partition.train_test_split(test_size=test_fraction, seed=seed) - partition_train = train_test["train"] - partition_test = train_test["test"] - - num_train = len(partition_train) - num_test = len(partition_test) - - return partition_train, partition_test, num_train, num_test - - -def transform_dataset_to_dmatrix(data: Union[Dataset, DatasetDict]) -> xgb.core.DMatrix: - """Transform dataset to DMatrix format for xgboost.""" - x = data["inputs"] - y = data["label"] - new_data = xgb.DMatrix(x, label=y) - return new_data - - -# Load (HIGGS) dataset and conduct partitioning -# We use a small subset (num_partitions=30) of the dataset for demonstration to speed up the data loading process. -partitioner = IidPartitioner(num_partitions=30) -fds = FederatedDataset(dataset="jxie/higgs", partitioners={"train": partitioner}) - -# Load the partition for this `partition_id` -log(INFO, "Loading partition...") -partition = fds.load_partition(partition_id=args.partition_id, split="train") -partition.set_format("numpy") - -# Train/test splitting -train_data, valid_data, num_train, num_val = train_test_split( - partition, test_fraction=0.2, seed=42 -) - -# Reformat data to DMatrix for xgboost -log(INFO, "Reformatting data...") -train_dmatrix = transform_dataset_to_dmatrix(train_data) -valid_dmatrix = transform_dataset_to_dmatrix(valid_data) - -# Hyper-parameters for xgboost training -num_local_round = 1 -params = { - "objective": "binary:logistic", - "eta": 0.1, # Learning rate - "max_depth": 8, - "eval_metric": "auc", - "nthread": 16, - "num_parallel_tree": 1, - "subsample": 1, - "tree_method": "hist", -} - - -# Define Flower client -class XgbClient(fl.client.Client): - def __init__( - self, - train_dmatrix, - valid_dmatrix, - num_train, - num_val, - num_local_round, - params, - ): - self.train_dmatrix = train_dmatrix - self.valid_dmatrix = valid_dmatrix - self.num_train = num_train - self.num_val = num_val - self.num_local_round = num_local_round - self.params = params - - def get_parameters(self, ins: GetParametersIns) -> GetParametersRes: - _ = (self, ins) - return GetParametersRes( - status=Status( - code=Code.OK, - message="OK", - ), - parameters=Parameters(tensor_type="", tensors=[]), - ) - - def _local_boost(self, bst_input): - # Update trees based on local training data. - for i in range(self.num_local_round): - bst_input.update(self.train_dmatrix, bst_input.num_boosted_rounds()) - - # Bagging: extract the last N=num_local_round trees for sever aggregation - bst = bst_input[ - bst_input.num_boosted_rounds() - - self.num_local_round : bst_input.num_boosted_rounds() - ] - - return bst - - def fit(self, ins: FitIns) -> FitRes: - global_round = int(ins.config["global_round"]) - if global_round == 1: - # First round local training - bst = xgb.train( - self.params, - self.train_dmatrix, - num_boost_round=self.num_local_round, - evals=[(self.valid_dmatrix, "validate"), (self.train_dmatrix, "train")], - ) - else: - bst = xgb.Booster(params=self.params) - for item in ins.parameters.tensors: - global_model = bytearray(item) - - # Load global model into booster - bst.load_model(global_model) - - # Local training - bst = self._local_boost(bst) - - # Save model - local_model = bst.save_raw("json") - local_model_bytes = bytes(local_model) - - return FitRes( - status=Status( - code=Code.OK, - message="OK", - ), - parameters=Parameters(tensor_type="", tensors=[local_model_bytes]), - num_examples=self.num_train, - metrics={}, - ) - - def evaluate(self, ins: EvaluateIns) -> EvaluateRes: - # Load global model - bst = xgb.Booster(params=self.params) - for para in ins.parameters.tensors: - para_b = bytearray(para) - bst.load_model(para_b) - - # Run evaluation - eval_results = bst.eval_set( - evals=[(self.valid_dmatrix, "valid")], - iteration=bst.num_boosted_rounds() - 1, - ) - auc = round(float(eval_results.split("\t")[1].split(":")[1]), 4) - - global_round = ins.config["global_round"] - log(INFO, f"AUC = {auc} at round {global_round}") - - return EvaluateRes( - status=Status( - code=Code.OK, - message="OK", - ), - loss=0.0, - num_examples=self.num_val, - metrics={"AUC": auc}, - ) - - -# Start Flower client -fl.client.start_client( - server_address="127.0.0.1:8080", - client=XgbClient( - train_dmatrix, - valid_dmatrix, - num_train, - num_val, - num_local_round, - params, - ).to_client(), -) diff --git a/examples/xgboost-quickstart/pyproject.toml b/examples/xgboost-quickstart/pyproject.toml index f1e451fe779a..da3561bfded4 100644 --- a/examples/xgboost-quickstart/pyproject.toml +++ b/examples/xgboost-quickstart/pyproject.toml @@ -3,17 +3,45 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "quickstart-xgboost" -version = "0.1.0" -description = "XGBoost Federated Learning Quickstart with Flower" -authors = [ - { name = "The Flower Authors", email = "hello@flower.ai" }, -] +name = "xgboost_quickstart" +version = "1.0.0" +description = "Federated Learning with XGBoost and Flower (Quickstart Example)" +license = "Apache-2.0" dependencies = [ - "flwr>=1.8.0,<2.0", - "flwr-datasets>=0.1.0,<1.0.0", - "xgboost>=2.0.0,<3.0.0", + "flwr-nightly[simulation]==1.11.0.dev20240826", + "flwr-datasets>=0.3.0", + "xgboost>=2.0.0", ] [tool.hatch.build.targets.wheel] packages = ["."] + +[tool.flwr.app] +publisher = "flwrlabs" + +[tool.flwr.app.components] +serverapp = "xgboost_quickstart.server_app:app" +clientapp = "xgboost_quickstart.client_app:app" + +[tool.flwr.app.config] +# ServerApp +num-server-rounds = 3 +fraction-fit = 0.1 +fraction-evaluate = 0.1 + +# ClientApp +local-epochs = 1 +params.objective = "binary:logistic" +params.eta = 0.1 # Learning rate +params.max-depth = 8 +params.eval-metric = "auc" +params.nthread = 16 +params.num-parallel-tree = 1 +params.subsample = 1 +params.tree-method = "hist" + +[tool.flwr.federations] +default = "local-simulation" + +[tool.flwr.federations.local-simulation] +options.num-supernodes = 20 diff --git a/examples/xgboost-quickstart/run.sh b/examples/xgboost-quickstart/run.sh deleted file mode 100755 index b35af58222ab..000000000000 --- a/examples/xgboost-quickstart/run.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -set -e -cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"/ - -echo "Starting server" -python server.py & -sleep 5 # Sleep for 5s to give the server enough time to start - -for i in `seq 0 1`; do - echo "Starting client $i" - python3 client.py --partition-id=$i & -done - -# Enable CTRL+C to stop all background processes -trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM -# Wait for all background processes to complete -wait diff --git a/examples/xgboost-quickstart/server.py b/examples/xgboost-quickstart/server.py deleted file mode 100644 index 2246d32686a4..000000000000 --- a/examples/xgboost-quickstart/server.py +++ /dev/null @@ -1,48 +0,0 @@ -from typing import Dict - -import flwr as fl -from flwr.server.strategy import FedXgbBagging - -# FL experimental settings -pool_size = 2 -num_rounds = 5 -num_clients_per_round = 2 -num_evaluate_clients = 2 - - -def evaluate_metrics_aggregation(eval_metrics): - """Return an aggregated metric (AUC) for evaluation.""" - total_num = sum([num for num, _ in eval_metrics]) - auc_aggregated = ( - sum([metrics["AUC"] * num for num, metrics in eval_metrics]) / total_num - ) - metrics_aggregated = {"AUC": auc_aggregated} - return metrics_aggregated - - -def config_func(rnd: int) -> Dict[str, str]: - """Return a configuration with global epochs.""" - config = { - "global_round": str(rnd), - } - return config - - -# Define strategy -strategy = FedXgbBagging( - fraction_fit=(float(num_clients_per_round) / pool_size), - min_fit_clients=num_clients_per_round, - min_available_clients=pool_size, - min_evaluate_clients=num_evaluate_clients, - fraction_evaluate=1.0, - evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation, - on_evaluate_config_fn=config_func, - on_fit_config_fn=config_func, -) - -# Start Flower server -fl.server.start_server( - server_address="0.0.0.0:8080", - config=fl.server.ServerConfig(num_rounds=num_rounds), - strategy=strategy, -) diff --git a/examples/xgboost-quickstart/xgboost_quickstart/__init__.py b/examples/xgboost-quickstart/xgboost_quickstart/__init__.py new file mode 100644 index 000000000000..470360b377a6 --- /dev/null +++ b/examples/xgboost-quickstart/xgboost_quickstart/__init__.py @@ -0,0 +1 @@ +"""xgboost_quickstart: A Flower / XGBoost app.""" diff --git a/examples/xgboost-quickstart/xgboost_quickstart/client_app.py b/examples/xgboost-quickstart/xgboost_quickstart/client_app.py new file mode 100644 index 000000000000..3aa199a10274 --- /dev/null +++ b/examples/xgboost-quickstart/xgboost_quickstart/client_app.py @@ -0,0 +1,139 @@ +"""xgboost_quickstart: A Flower / XGBoost app.""" + +import warnings + +from flwr.common.context import Context + +import xgboost as xgb +from flwr.client import Client, ClientApp +from flwr.common.config import unflatten_dict +from flwr.common import ( + Code, + EvaluateIns, + EvaluateRes, + FitIns, + FitRes, + Parameters, + Status, +) + +from xgboost_quickstart.task import load_data, replace_keys + +warnings.filterwarnings("ignore", category=UserWarning) + + +# Define Flower Client and client_fn +class FlowerClient(Client): + def __init__( + self, + train_dmatrix, + valid_dmatrix, + num_train, + num_val, + num_local_round, + params, + ): + self.train_dmatrix = train_dmatrix + self.valid_dmatrix = valid_dmatrix + self.num_train = num_train + self.num_val = num_val + self.num_local_round = num_local_round + self.params = params + + def _local_boost(self, bst_input): + # Update trees based on local training data. + for i in range(self.num_local_round): + bst_input.update(self.train_dmatrix, bst_input.num_boosted_rounds()) + + # Bagging: extract the last N=num_local_round trees for sever aggregation + bst = bst_input[ + bst_input.num_boosted_rounds() + - self.num_local_round : bst_input.num_boosted_rounds() + ] + + return bst + + def fit(self, ins: FitIns) -> FitRes: + global_round = int(ins.config["global_round"]) + if global_round == 1: + # First round local training + bst = xgb.train( + self.params, + self.train_dmatrix, + num_boost_round=self.num_local_round, + evals=[(self.valid_dmatrix, "validate"), (self.train_dmatrix, "train")], + ) + else: + bst = xgb.Booster(params=self.params) + global_model = bytearray(ins.parameters.tensors[0]) + + # Load global model into booster + bst.load_model(global_model) + + # Local training + bst = self._local_boost(bst) + + # Save model + local_model = bst.save_raw("json") + local_model_bytes = bytes(local_model) + + return FitRes( + status=Status( + code=Code.OK, + message="OK", + ), + parameters=Parameters(tensor_type="", tensors=[local_model_bytes]), + num_examples=self.num_train, + metrics={}, + ) + + def evaluate(self, ins: EvaluateIns) -> EvaluateRes: + # Load global model + bst = xgb.Booster(params=self.params) + para_b = bytearray(ins.parameters.tensors[0]) + bst.load_model(para_b) + + # Run evaluation + eval_results = bst.eval_set( + evals=[(self.valid_dmatrix, "valid")], + iteration=bst.num_boosted_rounds() - 1, + ) + auc = round(float(eval_results.split("\t")[1].split(":")[1]), 4) + + return EvaluateRes( + status=Status( + code=Code.OK, + message="OK", + ), + loss=0.0, + num_examples=self.num_val, + metrics={"AUC": auc}, + ) + + +def client_fn(context: Context): + # Load model and data + partition_id = context.node_config["partition-id"] + num_partitions = context.node_config["num-partitions"] + train_dmatrix, valid_dmatrix, num_train, num_val = load_data( + partition_id, num_partitions + ) + + cfg = replace_keys(unflatten_dict(context.run_config)) + num_local_round = cfg["local_epochs"] + + # Return Client instance + return FlowerClient( + train_dmatrix, + valid_dmatrix, + num_train, + num_val, + num_local_round, + cfg["params"], + ) + + +# Flower ClientApp +app = ClientApp( + client_fn, +) diff --git a/examples/xgboost-quickstart/xgboost_quickstart/server_app.py b/examples/xgboost-quickstart/xgboost_quickstart/server_app.py new file mode 100644 index 000000000000..6b81c6caa785 --- /dev/null +++ b/examples/xgboost-quickstart/xgboost_quickstart/server_app.py @@ -0,0 +1,54 @@ +"""xgboost_quickstart: A Flower / XGBoost app.""" + +from typing import Dict + +from flwr.common import Context, Parameters +from flwr.server import ServerApp, ServerAppComponents, ServerConfig +from flwr.server.strategy import FedXgbBagging + + +def evaluate_metrics_aggregation(eval_metrics): + """Return an aggregated metric (AUC) for evaluation.""" + total_num = sum([num for num, _ in eval_metrics]) + auc_aggregated = ( + sum([metrics["AUC"] * num for num, metrics in eval_metrics]) / total_num + ) + metrics_aggregated = {"AUC": auc_aggregated} + return metrics_aggregated + + +def config_func(rnd: int) -> Dict[str, str]: + """Return a configuration with global epochs.""" + config = { + "global_round": str(rnd), + } + return config + + +def server_fn(context: Context): + # Read from config + num_rounds = context.run_config["num-server-rounds"] + fraction_fit = context.run_config["fraction-fit"] + fraction_evaluate = context.run_config["fraction-evaluate"] + + # Init an empty Parameter + parameters = Parameters(tensor_type="", tensors=[]) + + # Define strategy + strategy = FedXgbBagging( + fraction_fit=fraction_fit, + fraction_evaluate=fraction_evaluate, + evaluate_metrics_aggregation_fn=evaluate_metrics_aggregation, + on_evaluate_config_fn=config_func, + on_fit_config_fn=config_func, + initial_parameters=parameters, + ) + config = ServerConfig(num_rounds=num_rounds) + + return ServerAppComponents(strategy=strategy, config=config) + + +# Create ServerApp +app = ServerApp( + server_fn=server_fn, +) diff --git a/examples/xgboost-quickstart/xgboost_quickstart/task.py b/examples/xgboost-quickstart/xgboost_quickstart/task.py new file mode 100644 index 000000000000..09916d9ac04a --- /dev/null +++ b/examples/xgboost-quickstart/xgboost_quickstart/task.py @@ -0,0 +1,71 @@ +"""xgboost_quickstart: A Flower / XGBoost app.""" + +from logging import INFO + +import xgboost as xgb +from flwr.common import log +from flwr_datasets import FederatedDataset +from flwr_datasets.partitioner import IidPartitioner + + +def train_test_split(partition, test_fraction, seed): + """Split the data into train and validation set given split rate.""" + train_test = partition.train_test_split(test_size=test_fraction, seed=seed) + partition_train = train_test["train"] + partition_test = train_test["test"] + + num_train = len(partition_train) + num_test = len(partition_test) + + return partition_train, partition_test, num_train, num_test + + +def transform_dataset_to_dmatrix(data): + """Transform dataset to DMatrix format for xgboost.""" + x = data["inputs"] + y = data["label"] + new_data = xgb.DMatrix(x, label=y) + return new_data + + +fds = None # Cache FederatedDataset + + +def load_data(partition_id, num_clients): + """Load partition HIGGS data.""" + # Only initialize `FederatedDataset` once + global fds + if fds is None: + partitioner = IidPartitioner(num_partitions=num_clients) + fds = FederatedDataset( + dataset="jxie/higgs", + partitioners={"train": partitioner}, + ) + + # Load the partition for this `partition_id` + partition = fds.load_partition(partition_id, split="train") + partition.set_format("numpy") + + # Train/test splitting + train_data, valid_data, num_train, num_val = train_test_split( + partition, test_fraction=0.2, seed=42 + ) + + # Reformat data to DMatrix for xgboost + log(INFO, "Reformatting data...") + train_dmatrix = transform_dataset_to_dmatrix(train_data) + valid_dmatrix = transform_dataset_to_dmatrix(valid_data) + + return train_dmatrix, valid_dmatrix, num_train, num_val + + +def replace_keys(input_dict, match="-", target="_"): + """Recursively replace match string with target string in dictionary keys.""" + new_dict = {} + for key, value in input_dict.items(): + new_key = key.replace(match, target) + if isinstance(value, dict): + new_dict[new_key] = replace_keys(value, match, target) + else: + new_dict[new_key] = value + return new_dict diff --git a/pyproject.toml b/pyproject.toml index 0d0138a5689b..6df9180ac3f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "flwr" -version = "1.11.0" +version = "1.12.0" description = "Flower: A Friendly Federated Learning Framework" license = "Apache-2.0" authors = ["The Flower Authors "] @@ -52,14 +52,18 @@ exclude = [ ] [tool.poetry.scripts] +# `flwr` CLI flwr = "flwr.cli.app:app" -flower-superlink = "flwr.server:run_superlink" -flower-superexec = "flwr.superexec:run_superexec" -flower-supernode = "flwr.client:run_supernode" -flower-client-app = "flwr.client:run_client_app" -flower-server-app = "flwr.server:run_server_app" +# SuperExec (can run with either Deployment Engine or Simulation Engine) +flower-superexec = "flwr.superexec.app:run_superexec" +# Simulation Engine flower-simulation = "flwr.simulation.run_simulation:run_simulation_from_cli" +# Deployment Engine +flower-superlink = "flwr.server.app:run_superlink" +flower-supernode = "flwr.client.supernode.app:run_supernode" +flower-server-app = "flwr.server.run_serverapp:run_server_app" flwr-clientapp = "flwr.client.clientapp:flwr_clientapp" +flower-client-app = "flwr.client.supernode:run_client_app" # Deprecated [tool.poetry.dependencies] python = "^3.8" diff --git a/src/docker/base/README.md b/src/docker/base/README.md new file mode 100644 index 000000000000..16822c18782e --- /dev/null +++ b/src/docker/base/README.md @@ -0,0 +1,43 @@ +# Flower Base + +

+ + Flower Website + +

+ +## Quick reference + +- **Learn more:**
+ [Flower Docs](https://flower.ai/docs/framework/how-to-run-flower-using-docker.html) + +- **Where to get help:**
+ [Flower Discuss](https://discuss.flower.ai), [Slack](https://flower.ai/join-slack) or [GitHub](https://github.com/adap/flower) + +- **Supported architectures:**
+ `amd64`, `arm64v8` + +## Supported tags + +- `nightly`, `.dev` e.g. `1.12.0.dev20240830` + - nightly image uses Python 3.11 and Ubuntu 22.04 +- `1.11.0-py3.11-alpine3.19` +- `1.11.0-py3.11-ubuntu22.04` +- `1.11.0-py3.10-ubuntu22.04` +- `1.11.0-py3.9-ubuntu22.04` +- `1.11.0-py3.8-ubuntu22.04` +- `1.10.0-py3.11-alpine3.19` +- `1.10.0-py3.11-ubuntu22.04` +- `1.10.0-py3.10-ubuntu22.04` +- `1.10.0-py3.9-ubuntu22.04` +- `1.10.0-py3.8-ubuntu22.04` +- `1.9.0-py3.11-alpine3.19` +- `1.9.0-py3.11-ubuntu22.04` +- `1.9.0-py3.10-ubuntu22.04` +- `1.9.0-py3.9-ubuntu22.04` +- `1.9.0-py3.8-ubuntu22.04` +- `1.8.0-py3.11-alpine3.19` +- `1.8.0-py3.11-ubuntu22.04` +- `1.8.0-py3.10-ubuntu22.04` +- `1.8.0-py3.9-ubuntu22.04` +- `1.8.0-py3.8-ubuntu22.04` diff --git a/src/docker/base/alpine/Dockerfile b/src/docker/base/alpine/Dockerfile index 441e0fdd9b85..ee1e11b2d070 100644 --- a/src/docker/base/alpine/Dockerfile +++ b/src/docker/base/alpine/Dockerfile @@ -33,6 +33,8 @@ RUN apk add --no-cache \ # require for compiling grpcio on ARM64 g++ \ libffi-dev \ + # required for installing flwr via git + git \ # create virtual env && python -m venv /python/venv @@ -42,18 +44,27 @@ ENV PATH=/python/venv/bin:$PATH # Install specific version of pip, setuptools and flwr ARG PIP_VERSION ARG SETUPTOOLS_VERSION -ARG FLWR_VERSION -ARG FLWR_PACKAGE=flwr RUN pip install -U --no-cache-dir \ pip==${PIP_VERSION} \ - setuptools==${SETUPTOOLS_VERSION} \ - ${FLWR_PACKAGE}==${FLWR_VERSION} + setuptools==${SETUPTOOLS_VERSION} + +ARG FLWR_VERSION +ARG FLWR_VERSION_REF +ARG FLWR_PACKAGE=flwr +# hadolint ignore=DL3013 +RUN if [ -z "${FLWR_VERSION_REF}" ]; then \ + pip install -U --no-cache-dir ${FLWR_PACKAGE}==${FLWR_VERSION}; \ + else \ + pip install -U --no-cache-dir ${FLWR_PACKAGE}@${FLWR_VERSION_REF}; \ + fi FROM python:${PYTHON_VERSION}-${DISTRO}${DISTRO_VERSION} AS base -# Upgrade system Python pip and setuptools -# hadolint ignore=DL3013 -RUN pip install -U --no-cache-dir pip setuptools +# Keep the version of system Python pip and setuptools in sync with those installed in the +# virtualenv. +ARG PIP_VERSION +ARG SETUPTOOLS_VERSION +RUN pip install -U --no-cache-dir pip==${PIP_VERSION} setuptools==${SETUPTOOLS_VERSION} # required by the grpc package RUN apk add --no-cache \ diff --git a/src/docker/base/ubuntu/Dockerfile b/src/docker/base/ubuntu/Dockerfile index 31cc8381b7c5..47655b1a52a1 100644 --- a/src/docker/base/ubuntu/Dockerfile +++ b/src/docker/base/ubuntu/Dockerfile @@ -32,7 +32,7 @@ RUN apt-get update \ # Install PyEnv and Python ARG PYTHON_VERSION=3.11 ENV PYENV_ROOT=/root/.pyenv -ENV PATH $PYENV_ROOT/bin:$PATH +ENV PATH=$PYENV_ROOT/bin:$PATH # https://github.com/hadolint/hadolint/wiki/DL4006 SHELL ["/bin/bash", "-o", "pipefail", "-c"] RUN curl -L https://github.com/pyenv/pyenv-installer/raw/master/bin/pyenv-installer | bash @@ -50,22 +50,29 @@ RUN LATEST=$(pyenv latest -k ${PYTHON_VERSION}) \ ENV PATH=/usr/local/bin/python/bin:$PATH -# Upgrade system Python pip and setuptools -# hadolint ignore=DL3013 -RUN pip install -U --no-cache-dir pip setuptools \ +ARG PIP_VERSION +ARG SETUPTOOLS_VERSION +# Keep the version of system Python pip and setuptools in sync with those installed in the +# virtualenv. +RUN pip install -U --no-cache-dir pip==${PIP_VERSION} setuptools==${SETUPTOOLS_VERSION} \ # Use a virtual environment to ensure that Python packages are installed in the same location # regardless of whether the subsequent image build is run with the app or the root user && python -m venv /python/venv ENV PATH=/python/venv/bin:$PATH -ARG PIP_VERSION -ARG SETUPTOOLS_VERSION -ARG FLWR_VERSION -ARG FLWR_PACKAGE=flwr RUN pip install -U --no-cache-dir \ pip==${PIP_VERSION} \ - setuptools==${SETUPTOOLS_VERSION} \ - ${FLWR_PACKAGE}==${FLWR_VERSION} + setuptools==${SETUPTOOLS_VERSION} + +ARG FLWR_VERSION +ARG FLWR_VERSION_REF +ARG FLWR_PACKAGE=flwr +# hadolint ignore=DL3013 +RUN if [ -z "${FLWR_VERSION_REF}" ]; then \ + pip install -U --no-cache-dir ${FLWR_PACKAGE}==${FLWR_VERSION}; \ + else \ + pip install -U --no-cache-dir ${FLWR_PACKAGE}@${FLWR_VERSION_REF}; \ + fi FROM $DISTRO:$DISTRO_VERSION AS base diff --git a/src/docker/clientapp/README.md b/src/docker/clientapp/README.md new file mode 100644 index 000000000000..5827cb8974df --- /dev/null +++ b/src/docker/clientapp/README.md @@ -0,0 +1,26 @@ +# Flower ClientApp + +

+ + Flower Website + +

+ +## Quick reference + +- **Learn more:**
+ [Flower Docs](https://flower.ai/docs/framework/how-to-run-flower-using-docker.html) + +- **Where to get help:**
+ [Flower Discuss](https://discuss.flower.ai), [Slack](https://flower.ai/join-slack) or [GitHub](https://github.com/adap/flower) + +- **Supported architectures:**
+ `amd64`, `arm64v8` + +## Supported tags + +- `nightly`, `.dev` e.g. `1.12.0.dev20240830` +- `1.11.0`, `1.11.0-py3.11-ubuntu22.04` +- `1.11.0-py3.10-ubuntu22.04` +- `1.11.0-py3.9-ubuntu22.04` +- `1.11.0-py3.8-ubuntu22.04` diff --git a/src/docker/complete/compose.yml b/src/docker/complete/compose.yml index 90261249f322..60279adceb37 100644 --- a/src/docker/complete/compose.yml +++ b/src/docker/complete/compose.yml @@ -1,17 +1,16 @@ services: # create a SuperLink service superlink: - image: flwr/superlink:${FLWR_VERSION:-1.10.0} + image: flwr/superlink:${FLWR_VERSION:-1.11.0} command: - --insecure # create a SuperExec service superexec: - user: root build: context: ${PROJECT_DIR:-.} dockerfile_inline: | - FROM flwr/superexec:${FLWR_VERSION:-1.10.0} + FROM flwr/superexec:${FLWR_VERSION:-1.11.0} WORKDIR /app COPY --chown=app:app pyproject.toml . @@ -29,89 +28,122 @@ services: - superlink="superlink:9091" depends_on: - superlink - volumes: - - apps-volume:/app/.flwr/apps/:rw # create a two SuperNode service with different node configs supernode-1: - user: root - deploy: - resources: - limits: - cpus: "2" + image: flwr/supernode:${FLWR_VERSION:-1.11.0} command: + - --insecure - --superlink - superlink:9092 + - --supernode-address + - 0.0.0.0:9094 + - --isolation + - process + - --node-config + - "partition-id=0 num-partitions=2" + depends_on: + - superlink + + supernode-2: + image: flwr/supernode:${FLWR_VERSION:-1.11.0} + command: - --insecure + - --superlink + - superlink:9092 + - --supernode-address + - 0.0.0.0:9095 + - --isolation + - process + - --node-config + - "partition-id=1 num-partitions=2" depends_on: - superlink - volumes: - - apps-volume:/app/.flwr/apps/:ro + + # uncomment to add another SuperNode + # + # supernode-3: + # image: flwr/supernode:${FLWR_VERSION:-1.11.0} + # command: + # - --insecure + # - --superlink + # - superlink:9092 + # - --supernode-address + # - 0.0.0.0:9096 + # - --isolation + # - process + # - --node-config + # - "partition-id=1 num-partitions=2" + # depends_on: + # - superlink + + clientapp-1: build: context: ${PROJECT_DIR:-.} dockerfile_inline: | - FROM flwr/supernode:${FLWR_VERSION:-1.10.0} + FROM flwr/clientapp:${FLWR_VERSION:-1.11.0} WORKDIR /app COPY --chown=app:app pyproject.toml . RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ && python -m pip install -U --no-cache-dir . - ENTRYPOINT ["flower-supernode", "--node-config", "partition-id=0,num-partitions=2"] - - supernode-2: - user: root + ENTRYPOINT ["flwr-clientapp"] + command: + - --supernode + - supernode-1:9094 deploy: resources: limits: cpus: "2" - command: - - --superlink - - superlink:9092 - - --insecure + stop_signal: SIGINT depends_on: - - superlink - volumes: - - apps-volume:/app/.flwr/apps/:ro + - supernode-1 + + clientapp-2: build: context: ${PROJECT_DIR:-.} dockerfile_inline: | - FROM flwr/supernode:${FLWR_VERSION:-1.10.0} + FROM flwr/clientapp:${FLWR_VERSION:-1.11.0} WORKDIR /app COPY --chown=app:app pyproject.toml . RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ && python -m pip install -U --no-cache-dir . - ENTRYPOINT ["flower-supernode", "--node-config", "partition-id=1,num-partitions=2"] + ENTRYPOINT ["flwr-clientapp"] + command: + - --supernode + - supernode-2:9095 + deploy: + resources: + limits: + cpus: "2" + stop_signal: SIGINT + depends_on: + - supernode-2 - # uncomment to add another supernode + # uncomment to add another ClientApp # - # supernode-3: - # user: root - # deploy: - # resources: - # limits: - # cpus: "2" - # command: - # - --superlink - # - superlink:9092 - # - --insecure - # depends_on: - # - superlink - # volumes: - # - apps-volume:/app/.flwr/apps/:ro + # clientapp-3: # build: # context: ${PROJECT_DIR:-.} # dockerfile_inline: | - # FROM flwr/supernode:${FLWR_VERSION:-1.10.0} + # FROM flwr/clientapp:${FLWR_VERSION:-1.11.0} # WORKDIR /app # COPY --chown=app:app pyproject.toml . # RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ # && python -m pip install -U --no-cache-dir . - # ENTRYPOINT ["flower-supernode", "--node-config", "partition-id=0,num-partitions=2"] - -volumes: - apps-volume: + # ENTRYPOINT ["flwr-clientapp"] + # command: + # - --supernode + # - supernode-3:9096 + # deploy: + # resources: + # limits: + # cpus: "2" + # stop_signal: SIGINT + # depends_on: + # - supernode-3 diff --git a/src/docker/complete/with-tls.yml b/src/docker/complete/with-tls.yml index 1b8540e09b64..6cbeb2ba7397 100644 --- a/src/docker/complete/with-tls.yml +++ b/src/docker/complete/with-tls.yml @@ -17,7 +17,7 @@ services: - --executor - flwr.superexec.deployment:executor - --executor-config - - superlink="superlink:9091",root-certificates="certificates/superlink-ca.crt" + - superlink="superlink:9091" root-certificates="certificates/superlink-ca.crt" - --ssl-ca-certfile=certificates/ca.crt - --ssl-certfile=certificates/server.pem - --ssl-keyfile=certificates/server.key @@ -35,6 +35,12 @@ services: command: - --superlink - superlink:9092 + - --supernode-address + - 0.0.0.0:9094 + - --isolation + - process + - --node-config + - "partition-id=0 num-partitions=2" - --root-certificates - certificates/ca.crt secrets: @@ -45,18 +51,30 @@ services: command: - --superlink - superlink:9092 + - --supernode-address + - 0.0.0.0:9095 + - --isolation + - process + - --node-config + - "partition-id=1 num-partitions=2" - --root-certificates - certificates/ca.crt secrets: - source: superlink-ca-certfile target: /app/certificates/ca.crt - # uncomment to enable TLS on another supernode + # uncomment to enable TLS on another SuperNode # # supernode-3: # command: # - --superlink # - superlink:9092 + # - --supernode-address + # - 0.0.0.0:9096 + # - --isolation + # - process + # - --node-config + # - "partition-id=1 num-partitions=2" # - --root-certificates # - certificates/ca.crt # secrets: diff --git a/src/docker/distributed/.gitignore b/src/docker/distributed/.gitignore new file mode 100644 index 000000000000..1a11330c6e95 --- /dev/null +++ b/src/docker/distributed/.gitignore @@ -0,0 +1,3 @@ +superexec-certificates +superlink-certificates +server/state diff --git a/src/docker/distributed/certs.yml b/src/docker/distributed/certs.yml new file mode 100644 index 000000000000..48e157582e40 --- /dev/null +++ b/src/docker/distributed/certs.yml @@ -0,0 +1,6 @@ +services: + gen-certs: + build: + args: + SUPERLINK_IP: ${SUPERLINK_IP:-127.0.0.1} + SUPEREXEC_IP: ${SUPEREXEC_IP:-127.0.0.1} diff --git a/src/docker/distributed/client/compose.yml b/src/docker/distributed/client/compose.yml new file mode 100644 index 000000000000..ef69e40cc425 --- /dev/null +++ b/src/docker/distributed/client/compose.yml @@ -0,0 +1,128 @@ +services: + supernode-1: + image: flwr/supernode:${FLWR_VERSION:-1.11.0} + command: + - --superlink + - ${SUPERLINK_IP:-127.0.0.1}:9092 + - --supernode-address + - 0.0.0.0:9094 + - --isolation + - process + - --node-config + - "partition-id=0 num-partitions=2" + - --root-certificates + - certificates/ca.crt + secrets: + - source: superlink-ca-certfile + target: /app/certificates/ca.crt + + supernode-2: + image: flwr/supernode:${FLWR_VERSION:-1.11.0} + command: + - --superlink + - ${SUPERLINK_IP:-127.0.0.1}:9092 + - --supernode-address + - 0.0.0.0:9095 + - --isolation + - process + - --node-config + - "partition-id=1 num-partitions=2" + - --root-certificates + - certificates/ca.crt + secrets: + - source: superlink-ca-certfile + target: /app/certificates/ca.crt + + # uncomment to add another SuperNode + # + # supernode-3: + # image: flwr/supernode:${FLWR_VERSION:-1.11.0} + # command: + # - --superlink + # - ${SUPERLINK_IP:-127.0.0.1}:9092 + # - --supernode-address + # - 0.0.0.0:9096 + # - --isolation + # - process + # - --node-config + # - "partition-id=1 num-partitions=2" + # - --root-certificates + # - certificates/ca.crt + # secrets: + # - source: superlink-ca-certfile + # target: /app/certificates/ca.crt + + clientapp-1: + build: + context: ${PROJECT_DIR:-.} + dockerfile_inline: | + FROM flwr/clientapp:${FLWR_VERSION:-1.11.0} + + WORKDIR /app + COPY --chown=app:app pyproject.toml . + RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ + && python -m pip install -U --no-cache-dir . + + ENTRYPOINT ["flwr-clientapp"] + command: + - --supernode + - supernode-1:9094 + deploy: + resources: + limits: + cpus: "2" + stop_signal: SIGINT + depends_on: + - supernode-1 + + clientapp-2: + build: + context: ${PROJECT_DIR:-.} + dockerfile_inline: | + FROM flwr/clientapp:${FLWR_VERSION:-1.11.0} + + WORKDIR /app + COPY --chown=app:app pyproject.toml . + RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ + && python -m pip install -U --no-cache-dir . + + ENTRYPOINT ["flwr-clientapp"] + command: + - --supernode + - supernode-2:9095 + deploy: + resources: + limits: + cpus: "2" + stop_signal: SIGINT + depends_on: + - supernode-2 + + # uncomment to add another ClientApp + # + # clientapp-3: + # build: + # context: ${PROJECT_DIR:-.} + # dockerfile_inline: | + # FROM flwr/clientapp:${FLWR_VERSION:-1.11.0} + + # WORKDIR /app + # COPY --chown=app:app pyproject.toml . + # RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ + # && python -m pip install -U --no-cache-dir . + + # ENTRYPOINT ["flwr-clientapp"] + # command: + # - --supernode + # - supernode-3:9096 + # deploy: + # resources: + # limits: + # cpus: "2" + # stop_signal: SIGINT + # depends_on: + # - supernode-3 + +secrets: + superlink-ca-certfile: + file: ../superlink-certificates/ca.crt diff --git a/src/docker/distributed/server/compose.yml b/src/docker/distributed/server/compose.yml new file mode 100644 index 000000000000..fc6dd6f58717 --- /dev/null +++ b/src/docker/distributed/server/compose.yml @@ -0,0 +1,67 @@ +services: + superlink: + image: flwr/superlink:${FLWR_VERSION:-1.11.0} + command: + - --ssl-ca-certfile=certificates/ca.crt + - --ssl-certfile=certificates/server.pem + - --ssl-keyfile=certificates/server.key + - --database=state/state.db + volumes: + - ./state/:/app/state/:rw + secrets: + - source: superlink-ca-certfile + target: /app/certificates/ca.crt + - source: superlink-certfile + target: /app/certificates/server.pem + - source: superlink-keyfile + target: /app/certificates/server.key + ports: + - 9092:9092 + + superexec: + build: + context: ${PROJECT_DIR:-.} + dockerfile_inline: | + FROM flwr/superexec:${FLWR_VERSION:-1.11.0} + + WORKDIR /app + COPY --chown=app:app pyproject.toml . + RUN sed -i 's/.*flwr\[simulation\].*//' pyproject.toml \ + && python -m pip install -U --no-cache-dir . + + ENTRYPOINT ["flower-superexec"] + command: + - --executor + - flwr.superexec.deployment:executor + - --executor-config + - superlink="superlink:9091" root-certificates="certificates/superlink-ca.crt" + - --ssl-ca-certfile=certificates/ca.crt + - --ssl-certfile=certificates/server.pem + - --ssl-keyfile=certificates/server.key + secrets: + - source: superlink-ca-certfile + target: /app/certificates/superlink-ca.crt + - source: superexec-ca-certfile + target: /app/certificates/ca.crt + - source: superexec-certfile + target: /app/certificates/server.pem + - source: superexec-keyfile + target: /app/certificates/server.key + ports: + - 9093:9093 + depends_on: + - superlink + +secrets: + superlink-ca-certfile: + file: ../superlink-certificates/ca.crt + superlink-certfile: + file: ../superlink-certificates/server.pem + superlink-keyfile: + file: ../superlink-certificates/server.key + superexec-ca-certfile: + file: ../superexec-certificates/ca.crt + superexec-certfile: + file: ../superexec-certificates/server.pem + superexec-keyfile: + file: ../superexec-certificates/server.key diff --git a/src/docker/serverapp/README.md b/src/docker/serverapp/README.md new file mode 100644 index 000000000000..f75704ad7bbb --- /dev/null +++ b/src/docker/serverapp/README.md @@ -0,0 +1,38 @@ +# Flower ServerApp + +

+ + Flower Website + +

+ +## Quick reference + +- **Learn more:**
+ [Flower Docs](https://flower.ai/docs/framework/how-to-run-flower-using-docker.html) + +- **Where to get help:**
+ [Flower Discuss](https://discuss.flower.ai), [Slack](https://flower.ai/join-slack) or [GitHub](https://github.com/adap/flower) + +- **Supported architectures:**
+ `amd64`, `arm64v8` + +## Supported tags + +- `nightly`, `.dev` e.g. `1.12.0.dev20240830` +- `1.11.0`, `1.11.0-py3.11-ubuntu22.04` +- `1.11.0-py3.10-ubuntu22.04` +- `1.11.0-py3.9-ubuntu22.04` +- `1.11.0-py3.8-ubuntu22.04` +- `1.10.0`, `1.10.0-py3.11-ubuntu22.04` +- `1.10.0-py3.10-ubuntu22.04` +- `1.10.0-py3.9-ubuntu22.04` +- `1.10.0-py3.8-ubuntu22.04` +- `1.9.0`, `1.9.0-py3.11-ubuntu22.04` +- `1.9.0-py3.10-ubuntu22.04` +- `1.9.0-py3.9-ubuntu22.04` +- `1.9.0-py3.8-ubuntu22.04` +- `1.8.0`, `1.8.0-py3.11-ubuntu22.04` +- `1.8.0-py3.10-ubuntu22.04` +- `1.8.0-py3.9-ubuntu22.04` +- `1.8.0-py3.8-ubuntu22.04` diff --git a/src/docker/superexec/README.md b/src/docker/superexec/README.md new file mode 100644 index 000000000000..c5c102313ccb --- /dev/null +++ b/src/docker/superexec/README.md @@ -0,0 +1,30 @@ +# Flower SuperExec + +

+ + Flower Website + +

+ +## Quick reference + +- **Learn more:**
+ [Flower Docs](https://flower.ai/docs/framework/how-to-run-flower-using-docker.html) + +- **Where to get help:**
+ [Flower Discuss](https://discuss.flower.ai), [Slack](https://flower.ai/join-slack) or [GitHub](https://github.com/adap/flower) + +- **Supported architectures:**
+ `amd64`, `arm64v8` + +## Supported tags + +- `nightly`, `.dev` e.g. `1.12.0.dev20240830` +- `1.11.0`, `1.11.0-py3.11-ubuntu22.04` +- `1.11.0-py3.10-ubuntu22.04` +- `1.11.0-py3.9-ubuntu22.04` +- `1.11.0-py3.8-ubuntu22.04` +- `1.10.0`, `1.10.0-py3.11-ubuntu22.04` +- `1.10.0-py3.10-ubuntu22.04` +- `1.10.0-py3.9-ubuntu22.04` +- `1.10.0-py3.8-ubuntu22.04` diff --git a/src/docker/superlink/README.md b/src/docker/superlink/README.md new file mode 100644 index 000000000000..729a1f7ba7fb --- /dev/null +++ b/src/docker/superlink/README.md @@ -0,0 +1,30 @@ +# Flower SuperLink + +

+ + Flower Website + +

+ +## Quick reference + +- **Learn more:**
+ [Flower Docs](https://flower.ai/docs/framework/how-to-run-flower-using-docker.html) + +- **Where to get help:**
+ [Flower Discuss](https://discuss.flower.ai), [Slack](https://flower.ai/join-slack) or [GitHub](https://github.com/adap/flower) + +- **Supported architectures:**
+ `amd64`, `arm64v8` + +## Supported tags + +- `nightly`, `.dev` e.g. `1.12.0.dev20240830` +- `1.11.0`, `1.11.0-py3.11-alpine3.19` +- `1.11.0-py3.11-ubuntu22.04` +- `1.10.0`, `1.10.0-py3.11-alpine3.19` +- `1.10.0-py3.11-ubuntu22.04` +- `1.9.0`, `1.9.0-py3.11-alpine3.19` +- `1.9.0-py3.11-ubuntu22.04` +- `1.8.0`, `1.8.0-py3.11-alpine3.19` +- `1.8.0-py3.11-ubuntu22.04` diff --git a/src/docker/supernode/README.md b/src/docker/supernode/README.md new file mode 100644 index 000000000000..becc2323ca2d --- /dev/null +++ b/src/docker/supernode/README.md @@ -0,0 +1,35 @@ +# Flower SuperNode + +

+ + Flower Website + +

+ +## Quick reference + +- **Learn more:**
+ [Flower Docs](https://flower.ai/docs/framework/how-to-run-flower-using-docker.html) + +- **Where to get help:**
+ [Flower Discuss](https://discuss.flower.ai), [Slack](https://flower.ai/join-slack) or [GitHub](https://github.com/adap/flower) + +- **Supported architectures:**
+ `amd64`, `arm64v8` + +## Supported tags + +- `nightly`, `.dev` e.g. `1.12.0.dev20240830` +- `1.11.0`, `1.11.0-py3.11-alpine3.19` +- `1.11.0-py3.11-ubuntu22.04` +- `1.11.0-py3.10-ubuntu22.04` +- `1.11.0-py3.9-ubuntu22.04` +- `1.11.0-py3.8-ubuntu22.04` +- `1.10.0`, `1.10.0-py3.11-ubuntu22.04` +- `1.10.0-py3.10-ubuntu22.04` +- `1.10.0-py3.9-ubuntu22.04` +- `1.10.0-py3.8-ubuntu22.04` +- `1.9.0`, `1.9.0-py3.11-ubuntu22.04` +- `1.9.0-py3.10-ubuntu22.04` +- `1.9.0-py3.9-ubuntu22.04` +- `1.9.0-py3.8-ubuntu22.04` diff --git a/src/py/flwr/cli/new/new.py b/src/py/flwr/cli/new/new.py index 9f2d32ddf99c..520f683a47d8 100644 --- a/src/py/flwr/cli/new/new.py +++ b/src/py/flwr/cli/new/new.py @@ -196,7 +196,6 @@ def new( f"{import_name}/client_app.py": { "template": "app/code/flwr_tune/client_app.py.tpl" }, - f"{import_name}/app.py": {"template": "app/code/flwr_tune/app.py.tpl"}, f"{import_name}/models.py": { "template": "app/code/flwr_tune/models.py.tpl" }, diff --git a/src/py/flwr/cli/new/templates/app/code/client.tensorflow.py.tpl b/src/py/flwr/cli/new/templates/app/code/client.tensorflow.py.tpl index 48ee3b4f5356..f8c148691561 100644 --- a/src/py/flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +++ b/src/py/flwr/cli/new/templates/app/code/client.tensorflow.py.tpl @@ -17,9 +17,6 @@ class FlowerClient(NumPyClient): self.batch_size = batch_size self.verbose = verbose - def get_parameters(self, config): - return self.model.get_weights() - def fit(self, parameters, config): self.model.set_weights(parameters) self.model.fit( diff --git a/src/py/flwr/client/__init__.py b/src/py/flwr/client/__init__.py index 218f2fe20d62..dce3be9036bb 100644 --- a/src/py/flwr/client/__init__.py +++ b/src/py/flwr/client/__init__.py @@ -20,8 +20,6 @@ from .client import Client as Client from .client_app import ClientApp as ClientApp from .numpy_client import NumPyClient as NumPyClient -from .supernode import run_client_app as run_client_app -from .supernode import run_supernode as run_supernode from .typing import ClientFn as ClientFn from .typing import ClientFnExt as ClientFnExt @@ -32,8 +30,6 @@ "ClientFnExt", "NumPyClient", "mod", - "run_client_app", - "run_supernode", "start_client", "start_numpy_client", ] diff --git a/src/py/flwr/client/client_app.py b/src/py/flwr/client/client_app.py index 2a913b3a248d..c322ba747114 100644 --- a/src/py/flwr/client/client_app.py +++ b/src/py/flwr/client/client_app.py @@ -41,11 +41,11 @@ def _alert_erroneous_client_fn() -> None: def _inspect_maybe_adapt_client_fn_signature(client_fn: ClientFnExt) -> ClientFnExt: client_fn_args = inspect.signature(client_fn).parameters - first_arg = list(client_fn_args.keys())[0] if len(client_fn_args) != 1: _alert_erroneous_client_fn() + first_arg = list(client_fn_args.keys())[0] first_arg_type = client_fn_args[first_arg].annotation if first_arg_type is str or first_arg == "cid": @@ -263,7 +263,7 @@ def _registration_error(fn_name: str) -> ValueError: >>> class FlowerClient(NumPyClient): >>> # ... >>> - >>> def client_fn(cid) -> Client: + >>> def client_fn(context: Context): >>> return FlowerClient().to_client() >>> >>> app = ClientApp( diff --git a/src/py/flwr/client/grpc_rere_client/client_interceptor.py b/src/py/flwr/client/grpc_rere_client/client_interceptor.py index d2dded8a73d9..8e8b701ca272 100644 --- a/src/py/flwr/client/grpc_rere_client/client_interceptor.py +++ b/src/py/flwr/client/grpc_rere_client/client_interceptor.py @@ -17,11 +17,13 @@ import base64 import collections +from logging import WARNING from typing import Any, Callable, Optional, Sequence, Tuple, Union import grpc from cryptography.hazmat.primitives.asymmetric import ec +from flwr.common.logger import log from flwr.common.secure_aggregation.crypto.symmetric_encryption import ( bytes_to_public_key, compute_hmac, @@ -128,13 +130,12 @@ def intercept_unary_unary( if self.shared_secret is None: raise RuntimeError("Failure to compute hmac") + message_bytes = request.SerializeToString(deterministic=True) metadata.append( ( _AUTH_TOKEN_HEADER, base64.urlsafe_b64encode( - compute_hmac( - self.shared_secret, request.SerializeToString(True) - ) + compute_hmac(self.shared_secret, message_bytes) ), ) ) @@ -151,8 +152,15 @@ def intercept_unary_unary( server_public_key_bytes = base64.urlsafe_b64decode( _get_value_from_tuples(_PUBLIC_KEY_HEADER, response.initial_metadata()) ) - self.server_public_key = bytes_to_public_key(server_public_key_bytes) - self.shared_secret = generate_shared_key( - self.private_key, self.server_public_key - ) + + if server_public_key_bytes != b"": + self.server_public_key = bytes_to_public_key(server_public_key_bytes) + else: + log(WARNING, "Can't get server public key, SuperLink may be offline") + + if self.server_public_key is not None: + self.shared_secret = generate_shared_key( + self.private_key, self.server_public_key + ) + return response diff --git a/src/py/flwr/client/grpc_rere_client/client_interceptor_test.py b/src/py/flwr/client/grpc_rere_client/client_interceptor_test.py index 79416a8eb31b..72ac20738ad6 100644 --- a/src/py/flwr/client/grpc_rere_client/client_interceptor_test.py +++ b/src/py/flwr/client/grpc_rere_client/client_interceptor_test.py @@ -73,7 +73,7 @@ def unary_unary( """Handle unary call.""" with self._lock: self._received_client_metadata = context.invocation_metadata() - self._received_message_bytes = request.SerializeToString(True) + self._received_message_bytes = request.SerializeToString(deterministic=True) if isinstance(request, CreateNodeRequest): context.send_initial_metadata( @@ -164,7 +164,7 @@ def _init_retry_invoker() -> RetryInvoker: return RetryInvoker( wait_gen_factory=exponential, recoverable_exceptions=grpc.RpcError, - max_tries=None, + max_tries=1, max_time=None, on_giveup=lambda retry_state: ( log( @@ -415,6 +415,27 @@ def test_client_auth_get_run(self) -> None: assert actual_public_key == expected_public_key assert actual_hmac == expected_hmac + def test_without_servicer(self) -> None: + """Test client authentication without servicer.""" + # Prepare + self._server.stop(grace=None) + retry_invoker = _init_retry_invoker() + + # Execute and Assert + with self._connection( + self._address, + True, + retry_invoker, + GRPC_MAX_MESSAGE_LENGTH, + None, + (self._client_private_key, self._client_public_key), + ) as conn: + _, _, create_node, _, _, _ = conn + assert create_node is not None + create_node() + + assert self._servicer.received_client_metadata() is None + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/src/py/flwr/common/record/recordset.py b/src/py/flwr/common/record/recordset.py index f16a22695d6e..b2d1da4411bb 100644 --- a/src/py/flwr/common/record/recordset.py +++ b/src/py/flwr/common/record/recordset.py @@ -119,7 +119,7 @@ class RecordSet: Let's see an example. >>> from flwr.common import RecordSet - >>> from flwr.common import ConfigsRecords, MetricsRecords, ParametersRecord + >>> from flwr.common import ConfigsRecord, MetricsRecord, ParametersRecord >>> >>> # Let's begin with an empty record >>> my_recordset = RecordSet() diff --git a/src/py/flwr/common/record/typeddict.py b/src/py/flwr/common/record/typeddict.py index 791077d8eff2..37d98b01a306 100644 --- a/src/py/flwr/common/record/typeddict.py +++ b/src/py/flwr/common/record/typeddict.py @@ -15,7 +15,18 @@ """Typed dict base class for *Records.""" -from typing import Callable, Dict, Generic, Iterator, MutableMapping, TypeVar, cast +from typing import ( + Callable, + Dict, + Generic, + ItemsView, + Iterator, + KeysView, + MutableMapping, + TypeVar, + ValuesView, + cast, +) K = TypeVar("K") # Key type V = TypeVar("V") # Value type @@ -73,3 +84,15 @@ def __eq__(self, other: object) -> bool: if isinstance(other, dict): return data == other return NotImplemented + + def keys(self) -> KeysView[K]: + """D.keys() -> a set-like object providing a view on D's keys.""" + return cast(Dict[K, V], self.__dict__["_data"]).keys() + + def values(self) -> ValuesView[V]: + """D.values() -> an object providing a view on D's values.""" + return cast(Dict[K, V], self.__dict__["_data"]).values() + + def items(self) -> ItemsView[K, V]: + """D.items() -> a set-like object providing a view on D's items.""" + return cast(Dict[K, V], self.__dict__["_data"]).items() diff --git a/src/py/flwr/common/telemetry.py b/src/py/flwr/common/telemetry.py index 399f400b7edc..981cfe79966a 100644 --- a/src/py/flwr/common/telemetry.py +++ b/src/py/flwr/common/telemetry.py @@ -132,53 +132,59 @@ def _generate_next_value_(name: str, start: int, count: int, last_values: List[A # Ping PING = auto() - # Client: start_client + # --- LEGACY FUNCTIONS ------------------------------------------------------------- + + # Legacy: `start_client` function START_CLIENT_ENTER = auto() START_CLIENT_LEAVE = auto() - # Server: start_server + # Legacy: `start_server` function START_SERVER_ENTER = auto() START_SERVER_LEAVE = auto() - # Driver API - RUN_DRIVER_API_ENTER = auto() - RUN_DRIVER_API_LEAVE = auto() + # Legacy: `start_simulation` function + START_SIMULATION_ENTER = auto() + START_SIMULATION_LEAVE = auto() - # Fleet API - RUN_FLEET_API_ENTER = auto() - RUN_FLEET_API_LEAVE = auto() + # --- `flwr` CLI ------------------------------------------------------------------- - # Driver API and Fleet API - RUN_SUPERLINK_ENTER = auto() - RUN_SUPERLINK_LEAVE = auto() + # Not yet implemented - # Simulation - START_SIMULATION_ENTER = auto() - START_SIMULATION_LEAVE = auto() + # --- SuperExec -------------------------------------------------------------------- - # Driver: Driver - DRIVER_CONNECT = auto() - DRIVER_DISCONNECT = auto() + # SuperExec + RUN_SUPEREXEC_ENTER = auto() + RUN_SUPEREXEC_LEAVE = auto() - # Driver: start_driver - START_DRIVER_ENTER = auto() - START_DRIVER_LEAVE = auto() + # --- Simulation Engine ------------------------------------------------------------ - # flower-client-app - RUN_CLIENT_APP_ENTER = auto() - RUN_CLIENT_APP_LEAVE = auto() + # CLI: flower-simulation + CLI_FLOWER_SIMULATION_ENTER = auto() + CLI_FLOWER_SIMULATION_LEAVE = auto() - # flower-server-app - RUN_SERVER_APP_ENTER = auto() - RUN_SERVER_APP_LEAVE = auto() + # Python API: `run_simulation` + PYTHON_API_RUN_SIMULATION_ENTER = auto() + PYTHON_API_RUN_SIMULATION_LEAVE = auto() - # SuperNode + # --- Deployment Engine ------------------------------------------------------------ + + # CLI: `flower-superlink` + RUN_SUPERLINK_ENTER = auto() + RUN_SUPERLINK_LEAVE = auto() + + # CLI: `flower-supernode` RUN_SUPERNODE_ENTER = auto() RUN_SUPERNODE_LEAVE = auto() - # SuperExec - RUN_SUPEREXEC_ENTER = auto() - RUN_SUPEREXEC_LEAVE = auto() + # CLI: `flower-server-app` + RUN_SERVER_APP_ENTER = auto() + RUN_SERVER_APP_LEAVE = auto() + + # --- DEPRECATED ------------------------------------------------------------------- + + # [DEPRECATED] CLI: `flower-client-app` + RUN_CLIENT_APP_ENTER = auto() + RUN_CLIENT_APP_LEAVE = auto() # Use the ThreadPoolExecutor with max_workers=1 to have a queue diff --git a/src/py/flwr/server/__init__.py b/src/py/flwr/server/__init__.py index 896b46298327..1dde95b6b047 100644 --- a/src/py/flwr/server/__init__.py +++ b/src/py/flwr/server/__init__.py @@ -17,14 +17,12 @@ from . import strategy from . import workflow as workflow -from .app import run_superlink as run_superlink from .app import start_server as start_server from .client_manager import ClientManager as ClientManager from .client_manager import SimpleClientManager as SimpleClientManager from .compat import LegacyContext as LegacyContext from .driver import Driver as Driver from .history import History as History -from .run_serverapp import run_server_app as run_server_app from .server import Server as Server from .server_app import ServerApp as ServerApp from .server_config import ServerConfig as ServerConfig @@ -40,8 +38,6 @@ "ServerAppComponents", "ServerConfig", "SimpleClientManager", - "run_server_app", - "run_superlink", "start_server", "strategy", "workflow", diff --git a/src/py/flwr/server/compat/app.py b/src/py/flwr/server/compat/app.py index e978359fa828..1d3e5024ba90 100644 --- a/src/py/flwr/server/compat/app.py +++ b/src/py/flwr/server/compat/app.py @@ -18,7 +18,6 @@ from logging import INFO from typing import Optional -from flwr.common import EventType, event from flwr.common.logger import log from flwr.server.client_manager import ClientManager from flwr.server.history import History @@ -65,8 +64,6 @@ def start_driver( # pylint: disable=too-many-arguments, too-many-locals hist : flwr.server.history.History Object containing training and evaluation metrics. """ - event(EventType.START_DRIVER_ENTER) - # Initialize the Driver API server and config initialized_server, initialized_config = init_defaults( server=server, @@ -96,6 +93,4 @@ def start_driver( # pylint: disable=too-many-arguments, too-many-locals f_stop.set() thread.join() - event(EventType.START_SERVER_LEAVE) - return hist diff --git a/src/py/flwr/server/driver/grpc_driver.py b/src/py/flwr/server/driver/grpc_driver.py index 80ce9623ab3f..ea6d1c9ea3e5 100644 --- a/src/py/flwr/server/driver/grpc_driver.py +++ b/src/py/flwr/server/driver/grpc_driver.py @@ -21,7 +21,7 @@ import grpc -from flwr.common import DEFAULT_TTL, EventType, Message, Metadata, RecordSet, event +from flwr.common import DEFAULT_TTL, Message, Metadata, RecordSet from flwr.common.grpc import create_channel from flwr.common.logger import log from flwr.common.serde import ( @@ -94,7 +94,6 @@ def _connect(self) -> None: This will not call GetRun. """ - event(EventType.DRIVER_CONNECT) if self._is_connected: log(WARNING, "Already connected") return @@ -108,7 +107,6 @@ def _connect(self) -> None: def _disconnect(self) -> None: """Disconnect from the Driver API.""" - event(EventType.DRIVER_DISCONNECT) if not self._is_connected: log(DEBUG, "Already disconnected") return diff --git a/src/py/flwr/server/superlink/fleet/grpc_rere/server_interceptor.py b/src/py/flwr/server/superlink/fleet/grpc_rere/server_interceptor.py index 70b38f8b625e..2c58d0049849 100644 --- a/src/py/flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +++ b/src/py/flwr/server/superlink/fleet/grpc_rere/server_interceptor.py @@ -188,7 +188,8 @@ def _verify_hmac( self, public_key: ec.EllipticCurvePublicKey, request: Request, hmac_value: bytes ) -> bool: shared_secret = generate_shared_key(self.server_private_key, public_key) - return verify_hmac(shared_secret, request.SerializeToString(True), hmac_value) + message_bytes = request.SerializeToString(deterministic=True) + return verify_hmac(shared_secret, message_bytes, hmac_value) def _create_authenticated_node( self, diff --git a/src/py/flwr/server/superlink/fleet/grpc_rere/server_interceptor_test.py b/src/py/flwr/server/superlink/fleet/grpc_rere/server_interceptor_test.py index 74914be68a8f..ec7a775a5dc3 100644 --- a/src/py/flwr/server/superlink/fleet/grpc_rere/server_interceptor_test.py +++ b/src/py/flwr/server/superlink/fleet/grpc_rere/server_interceptor_test.py @@ -166,7 +166,7 @@ def test_successful_delete_node_with_metadata(self) -> None: self._node_private_key, self._server_public_key ) hmac_value = base64.urlsafe_b64encode( - compute_hmac(shared_secret, request.SerializeToString(True)) + compute_hmac(shared_secret, request.SerializeToString(deterministic=True)) ) public_key_bytes = base64.urlsafe_b64encode( public_key_to_bytes(self._node_public_key) @@ -195,7 +195,7 @@ def test_unsuccessful_delete_node_with_metadata(self) -> None: node_private_key, _ = generate_key_pairs() shared_secret = generate_shared_key(node_private_key, self._server_public_key) hmac_value = base64.urlsafe_b64encode( - compute_hmac(shared_secret, request.SerializeToString(True)) + compute_hmac(shared_secret, request.SerializeToString(deterministic=True)) ) public_key_bytes = base64.urlsafe_b64encode( public_key_to_bytes(self._node_public_key) @@ -222,7 +222,7 @@ def test_successful_pull_task_ins_with_metadata(self) -> None: self._node_private_key, self._server_public_key ) hmac_value = base64.urlsafe_b64encode( - compute_hmac(shared_secret, request.SerializeToString(True)) + compute_hmac(shared_secret, request.SerializeToString(deterministic=True)) ) public_key_bytes = base64.urlsafe_b64encode( public_key_to_bytes(self._node_public_key) @@ -251,7 +251,7 @@ def test_unsuccessful_pull_task_ins_with_metadata(self) -> None: node_private_key, _ = generate_key_pairs() shared_secret = generate_shared_key(node_private_key, self._server_public_key) hmac_value = base64.urlsafe_b64encode( - compute_hmac(shared_secret, request.SerializeToString(True)) + compute_hmac(shared_secret, request.SerializeToString(deterministic=True)) ) public_key_bytes = base64.urlsafe_b64encode( public_key_to_bytes(self._node_public_key) @@ -280,7 +280,7 @@ def test_successful_push_task_res_with_metadata(self) -> None: self._node_private_key, self._server_public_key ) hmac_value = base64.urlsafe_b64encode( - compute_hmac(shared_secret, request.SerializeToString(True)) + compute_hmac(shared_secret, request.SerializeToString(deterministic=True)) ) public_key_bytes = base64.urlsafe_b64encode( public_key_to_bytes(self._node_public_key) @@ -311,7 +311,7 @@ def test_unsuccessful_push_task_res_with_metadata(self) -> None: node_private_key, _ = generate_key_pairs() shared_secret = generate_shared_key(node_private_key, self._server_public_key) hmac_value = base64.urlsafe_b64encode( - compute_hmac(shared_secret, request.SerializeToString(True)) + compute_hmac(shared_secret, request.SerializeToString(deterministic=True)) ) public_key_bytes = base64.urlsafe_b64encode( public_key_to_bytes(self._node_public_key) @@ -339,7 +339,7 @@ def test_successful_get_run_with_metadata(self) -> None: self._node_private_key, self._server_public_key ) hmac_value = base64.urlsafe_b64encode( - compute_hmac(shared_secret, request.SerializeToString(True)) + compute_hmac(shared_secret, request.SerializeToString(deterministic=True)) ) public_key_bytes = base64.urlsafe_b64encode( public_key_to_bytes(self._node_public_key) @@ -369,7 +369,7 @@ def test_unsuccessful_get_run_with_metadata(self) -> None: node_private_key, _ = generate_key_pairs() shared_secret = generate_shared_key(node_private_key, self._server_public_key) hmac_value = base64.urlsafe_b64encode( - compute_hmac(shared_secret, request.SerializeToString(True)) + compute_hmac(shared_secret, request.SerializeToString(deterministic=True)) ) public_key_bytes = base64.urlsafe_b64encode( public_key_to_bytes(self._node_public_key) @@ -396,7 +396,7 @@ def test_successful_ping_with_metadata(self) -> None: self._node_private_key, self._server_public_key ) hmac_value = base64.urlsafe_b64encode( - compute_hmac(shared_secret, request.SerializeToString(True)) + compute_hmac(shared_secret, request.SerializeToString(deterministic=True)) ) public_key_bytes = base64.urlsafe_b64encode( public_key_to_bytes(self._node_public_key) @@ -425,7 +425,7 @@ def test_unsuccessful_ping_with_metadata(self) -> None: node_private_key, _ = generate_key_pairs() shared_secret = generate_shared_key(node_private_key, self._server_public_key) hmac_value = base64.urlsafe_b64encode( - compute_hmac(shared_secret, request.SerializeToString(True)) + compute_hmac(shared_secret, request.SerializeToString(deterministic=True)) ) public_key_bytes = base64.urlsafe_b64encode( public_key_to_bytes(self._node_public_key) @@ -469,7 +469,7 @@ def test_successful_restore_node(self) -> None: self._node_private_key, self._server_public_key ) hmac_value = base64.urlsafe_b64encode( - compute_hmac(shared_secret, request.SerializeToString(True)) + compute_hmac(shared_secret, request.SerializeToString(deterministic=True)) ) public_key_bytes = base64.urlsafe_b64encode( public_key_to_bytes(self._node_public_key) diff --git a/src/py/flwr/simulation/run_simulation.py b/src/py/flwr/simulation/run_simulation.py index 1eddd91108d8..38a6ee7d6c14 100644 --- a/src/py/flwr/simulation/run_simulation.py +++ b/src/py/flwr/simulation/run_simulation.py @@ -109,6 +109,11 @@ def run_simulation_from_cli() -> None: """Run Simulation Engine from the CLI.""" args = _parse_args_run_simulation().parse_args() + event( + EventType.CLI_FLOWER_SIMULATION_ENTER, + event_details={"backend": args.backend, "num-supernodes": args.num_supernodes}, + ) + # Add warnings for deprecated server_app and client_app arguments if args.server_app: warn_deprecated_feature( @@ -214,6 +219,7 @@ def run_simulation_from_cli() -> None: verbose_logging=args.verbose, server_app_run_config=fused_config, is_app=is_app, + exit_event=EventType.CLI_FLOWER_SIMULATION_LEAVE, ) @@ -267,6 +273,11 @@ def run_simulation( When disabled, only INFO, WARNING and ERROR log messages will be shown. If enabled, DEBUG-level logs will be displayed. """ + event( + EventType.PYTHON_API_RUN_SIMULATION_ENTER, + event_details={"backend": backend_name, "num-supernodes": num_supernodes}, + ) + if enable_tf_gpu_growth: warn_deprecated_feature_with_example( "Passing `enable_tf_gpu_growth=True` is deprecated.", @@ -284,6 +295,7 @@ def run_simulation( backend_config=backend_config, enable_tf_gpu_growth=enable_tf_gpu_growth, verbose_logging=verbose_logging, + exit_event=EventType.PYTHON_API_RUN_SIMULATION_LEAVE, ) @@ -367,6 +379,7 @@ def _main_loop( is_app: bool, enable_tf_gpu_growth: bool, run: Run, + exit_event: EventType, flwr_dir: Optional[str] = None, client_app: Optional[ClientApp] = None, client_app_attr: Optional[str] = None, @@ -374,7 +387,7 @@ def _main_loop( server_app_attr: Optional[str] = None, server_app_run_config: Optional[UserConfig] = None, ) -> None: - """Launch SuperLink with Simulation Engine, then ServerApp on a separate thread.""" + """Start ServerApp on a separate thread, then launch Simulation Engine.""" # Initialize StateFactory state_factory = StateFactory(":flwr-in-memory-state:") @@ -382,6 +395,7 @@ def _main_loop( # A Threading event to indicate if an exception was raised in the ServerApp thread server_app_thread_has_exception = threading.Event() serverapp_th = None + success = True try: # Register run log(DEBUG, "Pre-registering run with id %s", run.run_id) @@ -405,8 +419,7 @@ def _main_loop( enable_tf_gpu_growth=enable_tf_gpu_growth, ) - # SuperLink with Simulation Engine - event(EventType.RUN_SUPERLINK_ENTER) + # Start Simulation Engine vce.start_vce( num_supernodes=num_supernodes, client_app_attr=client_app_attr, @@ -424,13 +437,13 @@ def _main_loop( except Exception as ex: log(ERROR, "An exception occurred !! %s", ex) log(ERROR, traceback.format_exc()) + success = False raise RuntimeError("An error was encountered. Ending simulation.") from ex finally: # Trigger stop event f_stop.set() - - event(EventType.RUN_SUPERLINK_LEAVE) + event(exit_event, event_details={"success": success}) if serverapp_th: serverapp_th.join() if server_app_thread_has_exception.is_set(): @@ -442,6 +455,7 @@ def _main_loop( # pylint: disable=too-many-arguments,too-many-locals def _run_simulation( num_supernodes: int, + exit_event: EventType, client_app: Optional[ClientApp] = None, server_app: Optional[ServerApp] = None, backend_name: str = "ray", @@ -508,6 +522,7 @@ def _run_simulation( is_app, enable_tf_gpu_growth, run, + exit_event, flwr_dir, client_app, client_app_attr, diff --git a/src/py/flwr/superexec/__init__.py b/src/py/flwr/superexec/__init__.py index a510c41f4182..0584ca663a02 100644 --- a/src/py/flwr/superexec/__init__.py +++ b/src/py/flwr/superexec/__init__.py @@ -13,9 +13,3 @@ # limitations under the License. # ============================================================================== """Flower SuperExec service.""" - -from .app import run_superexec as run_superexec - -__all__ = [ - "run_superexec", -] diff --git a/src/py/flwr/superexec/app.py b/src/py/flwr/superexec/app.py index 9510479ec8e1..67568b8378e0 100644 --- a/src/py/flwr/superexec/app.py +++ b/src/py/flwr/superexec/app.py @@ -56,7 +56,9 @@ def run_superexec() -> None: address=address, executor=_load_executor(args), certificates=certificates, - config=parse_config_args([args.executor_config]), + config=parse_config_args( + [args.executor_config] if args.executor_config else args.executor_config + ), ) grpc_servers = [superexec_server]