Skip to content

Commit

Permalink
Merge branch 'main' into add-flower-datasets-glossary-entry
Browse files Browse the repository at this point in the history
  • Loading branch information
flwrmachine authored Sep 27, 2024
2 parents 23ea568 + 86b75f0 commit 69920f2
Show file tree
Hide file tree
Showing 3 changed files with 172 additions and 0 deletions.
49 changes: 49 additions & 0 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,55 @@ jobs:
short_sha: ${{ steps.upload.outputs.SHORT_SHA }}
dir: ${{ steps.upload.outputs.DIR }}

superexec:
runs-on: ubuntu-22.04
timeout-minutes: 10
needs: wheel
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11"]
directory: [e2e-bare-auth]
connection: [secure, insecure]
engine: [deployment-engine, simulation-engine]
authentication: [no-auth, client-auth]
exclude:
- connection: insecure
authentication: client-auth
name: |
SuperExec /
Python ${{ matrix.python-version }} /
${{ matrix.connection }} /
${{ matrix.authentication }} /
${{ matrix.engine }}
defaults:
run:
working-directory: e2e/${{ matrix.directory }}
steps:
- uses: actions/checkout@v4
- name: Bootstrap
uses: ./.github/actions/bootstrap
with:
python-version: ${{ matrix.python-version }}
poetry-skip: 'true'
- name: Download and install Flower wheel from artifact store
if: ${{ github.repository == 'adap/flower' && !github.event.pull_request.head.repo.fork && github.actor != 'dependabot[bot]' }}
run: |
# Define base URL for wheel file
WHEEL_URL="https://${{ env.ARTIFACT_BUCKET }}/py/${{ needs.wheel.outputs.dir }}/${{ needs.wheel.outputs.short_sha }}/${{ needs.wheel.outputs.whl_path }}"
if [[ "${{ matrix.engine }}" == "simulation-engine" ]]; then
python -m pip install "flwr[simulation] @ ${WHEEL_URL}"
else
python -m pip install "${WHEEL_URL}"
fi
- name: >
Run SuperExec test /
${{ matrix.connection }} /
${{ matrix.authentication }} /
${{ matrix.engine }}
working-directory: e2e/${{ matrix.directory }}
run: ./../test_superexec.sh "${{ matrix.connection }}" "${{ matrix.authentication}}" "${{ matrix.engine }}"
frameworks:
runs-on: ubuntu-22.04
timeout-minutes: 10
Expand Down
1 change: 1 addition & 0 deletions e2e/e2e-bare-auth/certificate.conf
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ subjectAltName = @alt_names
DNS.1 = localhost
IP.1 = ::1
IP.2 = 127.0.0.1
IP.3 = 0.0.0.0
122 changes: 122 additions & 0 deletions e2e/test_superexec.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#!/bin/bash
set -e

# Set connectivity parameters
case "$1" in
secure)
./generate.sh
server_arg='--ssl-ca-certfile ../certificates/ca.crt
--ssl-certfile ../certificates/server.pem
--ssl-keyfile ../certificates/server.key'
client_arg='--root-certificates ../certificates/ca.crt'
# For $superexec_arg, note special ordering of single- and double-quotes
superexec_arg='--executor-config 'root-certificates=\"../certificates/ca.crt\"''
superexec_arg="$server_arg $superexec_arg"
;;
insecure)
server_arg='--insecure'
client_arg=$server_arg
superexec_arg=$server_arg
;;
esac

# Set authentication parameters
case "$2" in
client-auth)
server_auth='--auth-list-public-keys ../keys/client_public_keys.csv
--auth-superlink-private-key ../keys/server_credentials
--auth-superlink-public-key ../keys/server_credentials.pub'
client_auth_1='--auth-supernode-private-key ../keys/client_credentials_1
--auth-supernode-public-key ../keys/client_credentials_1.pub'
client_auth_2='--auth-supernode-private-key ../keys/client_credentials_2
--auth-supernode-public-key ../keys/client_credentials_2.pub'
server_address='127.0.0.1:9092'
;;
*)
server_auth=''
client_auth_1=''
client_auth_2=''
server_address='127.0.0.1:9092'
;;
esac

# Set engine
case "$3" in
deployment-engine)
superexec_engine_arg='--executor flwr.superexec.deployment:executor'
;;
simulation-engine)
superexec_engine_arg='--executor flwr.superexec.simulation:executor
--executor-config 'num-supernodes=10''
;;
esac


# Create and install Flower app
flwr new e2e-tmp-test --framework numpy --username flwrlabs
cd e2e-tmp-test
# Remove flwr dependency from `pyproject.toml`. Seems necessary so that it does
# not override the wheel dependency
if [[ "$OSTYPE" == "darwin"* ]]; then
# macOS (Darwin) system
sed -i '' '/flwr\[simulation\]/d' pyproject.toml
else
# Non-macOS system (Linux)
sed -i '/flwr\[simulation\]/d' pyproject.toml
fi
pip install -e . --no-deps

# Check if the first argument is 'insecure'
if [ "$1" == "insecure" ]; then
# If $1 is 'insecure', append the first line
echo -e $"\n[tool.flwr.federations.superexec]\naddress = \"127.0.0.1:9093\"\ninsecure = true" >> pyproject.toml
else
# Otherwise, append the second line
echo -e $"\n[tool.flwr.federations.superexec]\naddress = \"127.0.0.1:9093\"\nroot-certificates = \"../certificates/ca.crt\"" >> pyproject.toml
fi

timeout 2m flower-superlink $server_arg $server_auth &
sl_pid=$!
sleep 2

timeout 2m flower-supernode ./ $client_arg \
--superlink $server_address $client_auth_1 \
--node-config "partition-id=0 num-partitions=2" --max-retries 0 &
cl1_pid=$!
sleep 2

timeout 2m flower-supernode ./ $client_arg \
--superlink $server_address $client_auth_2 \
--node-config "partition-id=1 num-partitions=2" --max-retries 0 &
cl2_pid=$!
sleep 2

timeout 2m flower-superexec $superexec_arg $superexec_engine_arg 2>&1 | tee flwr_output.log &
se_pid=$(pgrep -f "flower-superexec")
sleep 2

timeout 1m flwr run --run-config num-server-rounds=1 ../e2e-tmp-test superexec

# Initialize a flag to track if training is successful
found_success=false
timeout=120 # Timeout after 120 seconds
elapsed=0

# Check for "Success" in a loop with a timeout
while [ "$found_success" = false ] && [ $elapsed -lt $timeout ]; do
if grep -q "Run finished" flwr_output.log; then
echo "Training worked correctly!"
found_success=true
kill $cl1_pid; kill $cl2_pid; sleep 1; kill $sl_pid; kill $se_pid;
else
echo "Waiting for training ... ($elapsed seconds elapsed)"
fi
# Sleep for a short period and increment the elapsed time
sleep 2
elapsed=$((elapsed + 2))
done

if [ "$found_success" = false ]; then
echo "Training had an issue and timed out."
kill $cl1_pid; kill $cl2_pid; kill $sl_pid; kill $se_pid;
fi

0 comments on commit 69920f2

Please sign in to comment.