From 9d227e9bf1d124f1c276e202b1d7106fee6b94bb Mon Sep 17 00:00:00 2001 From: Chong Shen Ng Date: Thu, 7 Nov 2024 15:35:30 +0000 Subject: [PATCH] ci(framework:skip) Add condition to spin up and kill SuperNodes (#4446) --- .github/workflows/e2e.yml | 2 +- e2e/test_exec_api.sh | 32 ++++++++++++++++++++------------ 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 5e93da349602..5323633bc274 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -67,7 +67,7 @@ jobs: - connection: insecure authentication: client-auth name: | - SuperExec / + Exec API / Python ${{ matrix.python-version }} / ${{ matrix.connection }} / ${{ matrix.authentication }} / diff --git a/e2e/test_exec_api.sh b/e2e/test_exec_api.sh index fd5e8c69d1de..8c2b1d5f0501 100755 --- a/e2e/test_exec_api.sh +++ b/e2e/test_exec_api.sh @@ -81,17 +81,19 @@ timeout 2m flower-superlink $combined_args --executor-config "$executor_config" sl_pid=$(pgrep -f "flower-superlink") sleep 2 -timeout 2m flower-supernode ./ $client_arg \ - --superlink $server_address $client_auth_1 \ - --node-config "partition-id=0 num-partitions=2" --max-retries 0 & -cl1_pid=$! -sleep 2 +if [ "$3" = "deployment-engine" ]; then + timeout 2m flower-supernode ./ $client_arg \ + --superlink $server_address $client_auth_1 \ + --node-config "partition-id=0 num-partitions=2" --max-retries 0 & + cl1_pid=$! + sleep 2 -timeout 2m flower-supernode ./ $client_arg \ - --superlink $server_address $client_auth_2 \ - --node-config "partition-id=1 num-partitions=2" --max-retries 0 & -cl2_pid=$! -sleep 2 + timeout 2m flower-supernode ./ $client_arg \ + --superlink $server_address $client_auth_2 \ + --node-config "partition-id=1 num-partitions=2" --max-retries 0 & + cl2_pid=$! + sleep 2 +fi timeout 1m flwr run --run-config num-server-rounds=1 ../e2e-tmp-test e2e @@ -105,7 +107,10 @@ while [ "$found_success" = false ] && [ $elapsed -lt $timeout ]; do if grep -q "Run finished" flwr_output.log; then echo "Training worked correctly!" found_success=true - kill $cl1_pid; kill $cl2_pid; sleep 1; kill $sl_pid; + if $3 = "deployment-engine"; then + kill $cl1_pid; kill $cl2_pid; + fi + sleep 1; kill $sl_pid; else echo "Waiting for training ... ($elapsed seconds elapsed)" fi @@ -116,5 +121,8 @@ done if [ "$found_success" = false ]; then echo "Training had an issue and timed out." - kill $cl1_pid; kill $cl2_pid; kill $sl_pid; + if $3 = "deployment-engine"; then + kill $cl1_pid; kill $cl2_pid; + fi + kill $sl_pid; fi