From aa4442f7be0512ebe75210bd8524731a8088fc9b Mon Sep 17 00:00:00 2001 From: Heng Pan Date: Sun, 19 Jan 2025 18:47:23 +0000 Subject: [PATCH] feat(framework) Introduce `flwr_exit` function (#4801) Co-authored-by: Javier Co-authored-by: Daniel J. Beutel --- framework/docs/source/conf.py | 2 +- framework/docs/source/index.rst | 1 + framework/docs/source/ref-exit-codes-dir.rst | 27 +++++ framework/docs/source/ref-exit-codes/000.rst | 7 ++ framework/docs/source/ref-exit-codes/001.rst | 7 ++ framework/docs/source/ref-exit-codes/002.rst | 7 ++ framework/docs/source/ref-exit-codes/003.rst | 7 ++ framework/docs/source/ref-exit-codes/100.rst | 23 +++++ framework/docs/source/ref-exit-codes/300.rst | 20 ++++ framework/docs/source/ref-exit-codes/301.rst | 26 +++++ framework/docs/source/ref-exit-codes/302.rst | 32 ++++++ framework/docs/source/ref-exit-codes/500.rst | 16 +++ framework/docs/source/ref-exit-codes/501.rst | 18 ++++ framework/docs/source/ref-exit-codes/502.rst | 16 +++ .../docs/source/ref-exit-codes/_template.rst | 12 +++ src/py/flwr/client/app.py | 14 +-- src/py/flwr/client/clientapp/app.py | 10 +- src/py/flwr/client/rest_client/connection.py | 5 +- src/py/flwr/client/supernode/app.py | 24 ++--- src/py/flwr/common/constant.py | 8 -- src/py/flwr/common/exit/__init__.py | 24 +++++ src/py/flwr/common/exit/exit.py | 99 +++++++++++++++++++ src/py/flwr/common/exit/exit_code.py | 90 +++++++++++++++++ src/py/flwr/common/exit/exit_code_test.py | 54 ++++++++++ src/py/flwr/common/exit_handlers.py | 34 +++++-- src/py/flwr/server/app.py | 15 +-- src/py/flwr/server/serverapp/app.py | 10 +- .../superlink/fleet/rest_rere/rest_api.py | 5 +- 28 files changed, 548 insertions(+), 65 deletions(-) create mode 100644 framework/docs/source/ref-exit-codes-dir.rst create mode 100644 framework/docs/source/ref-exit-codes/000.rst create mode 100644 framework/docs/source/ref-exit-codes/001.rst create mode 100644 framework/docs/source/ref-exit-codes/002.rst create mode 100644 framework/docs/source/ref-exit-codes/003.rst create mode 100644 framework/docs/source/ref-exit-codes/100.rst create mode 100644 framework/docs/source/ref-exit-codes/300.rst create mode 100644 framework/docs/source/ref-exit-codes/301.rst create mode 100644 framework/docs/source/ref-exit-codes/302.rst create mode 100644 framework/docs/source/ref-exit-codes/500.rst create mode 100644 framework/docs/source/ref-exit-codes/501.rst create mode 100644 framework/docs/source/ref-exit-codes/502.rst create mode 100644 framework/docs/source/ref-exit-codes/_template.rst create mode 100644 src/py/flwr/common/exit/__init__.py create mode 100644 src/py/flwr/common/exit/exit.py create mode 100644 src/py/flwr/common/exit/exit_code.py create mode 100644 src/py/flwr/common/exit/exit_code_test.py diff --git a/framework/docs/source/conf.py b/framework/docs/source/conf.py index 96720ed483c5..71f0f4259d9b 100644 --- a/framework/docs/source/conf.py +++ b/framework/docs/source/conf.py @@ -176,7 +176,7 @@ def find_test_modules(package_path): # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "ref-exit-codes/_template.rst"] # Sphinx redirects, implemented after the doc filename changes. # To prevent 404 errors and redirect to the new pages. diff --git a/framework/docs/source/index.rst b/framework/docs/source/index.rst index b349318a5b7e..104ea2ac3bc0 100644 --- a/framework/docs/source/index.rst +++ b/framework/docs/source/index.rst @@ -146,6 +146,7 @@ Information-oriented API reference and other reference material. ref-telemetry ref-changelog ref-flower-network-communication + ref-exit-codes-dir ref-faq Contributor docs diff --git a/framework/docs/source/ref-exit-codes-dir.rst b/framework/docs/source/ref-exit-codes-dir.rst new file mode 100644 index 000000000000..10789ef9cb98 --- /dev/null +++ b/framework/docs/source/ref-exit-codes-dir.rst @@ -0,0 +1,27 @@ +Exit Codes +========== + +This reference provides an index of all exit codes and recommended resolutions. + +Categories +---------- + +- **Success exit codes (0-99)**: Indicate successful completion of processes. +- **SuperLink-specific exit codes (100-199)**: Specific to ``flower-superlink`` + (SuperLink) errors. +- **ServerApp-specific exit codes (200-299)**: Specific to ``flwr-serverapp`` + (ServerApp) errors. +- **SuperNode-specific exit codes (300-399)**: Specific to ``flower-supernode`` + (SuperNode) errors. +- **ClientApp-specific exit codes (400-499)**: Specific to ``flwr-clientapp`` + (ClientApp) errors. +- **Common exit codes (500-)**: Shared across multiple components. + +Indices +------- + +.. toctree:: + :maxdepth: 1 + :glob: + + ref-exit-codes/* diff --git a/framework/docs/source/ref-exit-codes/000.rst b/framework/docs/source/ref-exit-codes/000.rst new file mode 100644 index 000000000000..1fa6bbed9179 --- /dev/null +++ b/framework/docs/source/ref-exit-codes/000.rst @@ -0,0 +1,7 @@ +[0] SUCCESS +=========== + +Description +----------- + +The process completed successfully. diff --git a/framework/docs/source/ref-exit-codes/001.rst b/framework/docs/source/ref-exit-codes/001.rst new file mode 100644 index 000000000000..0c2e3561eaa1 --- /dev/null +++ b/framework/docs/source/ref-exit-codes/001.rst @@ -0,0 +1,7 @@ +[1] GRACEFUL_EXIT_SIGINT +======================== + +Description +----------- + +The process exited gracefully, triggered by ``SIGINT``. diff --git a/framework/docs/source/ref-exit-codes/002.rst b/framework/docs/source/ref-exit-codes/002.rst new file mode 100644 index 000000000000..170df541b7bd --- /dev/null +++ b/framework/docs/source/ref-exit-codes/002.rst @@ -0,0 +1,7 @@ +[2] GRACEFUL_EXIT_SIGQUIT +========================= + +Description +----------- + +The process exited gracefully, triggered by ``SIGQUIT``. diff --git a/framework/docs/source/ref-exit-codes/003.rst b/framework/docs/source/ref-exit-codes/003.rst new file mode 100644 index 000000000000..4c916c46f855 --- /dev/null +++ b/framework/docs/source/ref-exit-codes/003.rst @@ -0,0 +1,7 @@ +[3] GRACEFUL_EXIT_SIGTERM +========================= + +Description +----------- + +The process exited gracefully, triggered by ``SIGTERM``. diff --git a/framework/docs/source/ref-exit-codes/100.rst b/framework/docs/source/ref-exit-codes/100.rst new file mode 100644 index 000000000000..720e48984047 --- /dev/null +++ b/framework/docs/source/ref-exit-codes/100.rst @@ -0,0 +1,23 @@ +[100] SUPERLINK_THREAD_CRASH +============================ + +Description +----------- + +A critical background thread has crashed in the SuperLink, causing it to exit +prematurely. This indicates a serious issue with the SuperLink that requires immediate +investigation. + +Critical background threads include: + +1. **Scheduler**: Schedules the execution of runs. This thread exists only when the + isolation mode is set to ``subprocess`` (default). +2. **REST API server**: Manages the REST API server. This thread exists only when the + Fleet API type is set to ``rest``. + +How to Resolve +-------------- + +1. Check the logs for any errors that may have caused the thread to crash. +2. Ensure the SuperLink's configuration is correct. +3. If the issue persists, please contact support for further assistance. diff --git a/framework/docs/source/ref-exit-codes/300.rst b/framework/docs/source/ref-exit-codes/300.rst new file mode 100644 index 000000000000..d28c6298005b --- /dev/null +++ b/framework/docs/source/ref-exit-codes/300.rst @@ -0,0 +1,20 @@ +[300] SUPERNODE_REST_ADDRESS_INVALID +==================================== + +Description +----------- + +The provided SuperLink address for the REST API is invalid. The address must start with +``http://`` or ``https://`` to be recognized correctly. + +How to Resolve +-------------- + +When using the REST API, ensure that the server address starts with either ``https://`` +or ``http://``. For example: + +- ``http://127.0.0.1:8080`` +- ``https://example.com:8080`` + +Verify the address in your configuration or command-line arguments and correct it as +needed. diff --git a/framework/docs/source/ref-exit-codes/301.rst b/framework/docs/source/ref-exit-codes/301.rst new file mode 100644 index 000000000000..cf9a732905ba --- /dev/null +++ b/framework/docs/source/ref-exit-codes/301.rst @@ -0,0 +1,26 @@ +[301] SUPERNODE_NODE_AUTH_KEYS_REQUIRED +======================================= + +Description +----------- + +To run the SuperNode with authentication, file paths for both the private and public +keys must be provided. Specifically, the following options must be specified: + +- ``--auth-supernode-private-key`` +- ``--auth-supernode-public-key`` + +Providing only one of these options is insufficient for authentication. + +How to Resolve +-------------- + +1. Ensure that the paths to both the private key and public key files are specified in +command-line arguments. For example: + +.. code-block:: bash + + --auth-supernode-private-key /path/to/private_key.pem + --auth-supernode-public-key /path/to/public_key.pem + +2. Verify that the specified file paths are correct and that the files exist. diff --git a/framework/docs/source/ref-exit-codes/302.rst b/framework/docs/source/ref-exit-codes/302.rst new file mode 100644 index 000000000000..6a7803976464 --- /dev/null +++ b/framework/docs/source/ref-exit-codes/302.rst @@ -0,0 +1,32 @@ +[302] SUPERNODE_NODE_AUTH_KEYS_INVALID +====================================== + +Description +----------- + +The provided key files are invalid. Authentication requires a valid elliptic curve +private and public key pair. This error occurs when either: + +- The **private key** file specified in ``--auth-supernode-private-key`` is invalid or + unreadable. +- The **public key** file specified in ``--auth-supernode-public-key`` is invalid or + unreadable. + +How to Resolve +-------------- + +1. Ensure that the file paths provided for the private and public key options are + correct. + +2. Verify that both files exist and contain valid elliptic curve keys. - The private key +file should be in a format compatible with elliptic curve cryptography. - The public key +file should match the private key. + +3. If the files are corrupted or not in the correct format, regenerate the elliptic +curve key pair and update the file paths accordingly. For example, in Linux/MacOS, **for +rapid prototyping only** (not production; follow company procedures for key management): + +.. code-block:: bash + + openssl ecparam-genkey -name secp384r1 -out private_key.pem + openssl ec -in private_key.pem -pubout -out public_key.pem diff --git a/framework/docs/source/ref-exit-codes/500.rst b/framework/docs/source/ref-exit-codes/500.rst new file mode 100644 index 000000000000..cc619e302fb0 --- /dev/null +++ b/framework/docs/source/ref-exit-codes/500.rst @@ -0,0 +1,16 @@ +[500] COMMON_ADDRESS_INVALID +============================ + +Description +----------- + +The provided address is invalid and cannot be parsed. It must be a valid URL, IPv4, or +IPv6 address. + +How to Resolve +-------------- + +Verify that the address is correctly formatted as one of the following: + - URL (e.g., ``https://127.0.0.1:8080`` or ``https://example.com:8080``) + - IPv4 (e.g., ``192.168.1.1:9091``) + - IPv6 (e.g., ``[2001:0db8::1]:9092``) diff --git a/framework/docs/source/ref-exit-codes/501.rst b/framework/docs/source/ref-exit-codes/501.rst new file mode 100644 index 000000000000..b997e549482a --- /dev/null +++ b/framework/docs/source/ref-exit-codes/501.rst @@ -0,0 +1,18 @@ +[501] COMMON_MISSING_EXTRA_REST +=============================== + +Description +----------- + +Extra dependencies required for using the REST-based Fleet API are missing. + +How to Resolve +-------------- + +To enable the REST-based Fleet API, install ``flwr`` with the ``rest`` extra: + +.. code-block:: bash + + pip install "flwr[rest]" + +Ensure that the installation completes successfully, and then retry. diff --git a/framework/docs/source/ref-exit-codes/502.rst b/framework/docs/source/ref-exit-codes/502.rst new file mode 100644 index 000000000000..69d445caa522 --- /dev/null +++ b/framework/docs/source/ref-exit-codes/502.rst @@ -0,0 +1,16 @@ +[502] COMMON_TLS_NOT_SUPPORTED +============================== + +Description +----------- + +The ``flwr-serverapp`` and ``flwr-clientapp`` do not currently support TLS, as they are +assumed to be executed within the same network as their respective long-running +processes: ``flower-superlink`` and ``flower-supernode``. Please refer to the `Flower +Network Communication <../ref-flower-network-communication.html>`_ guide for further +details. + +How to Resolve +-------------- + +Use the ``--insecure`` flag to proceed without TLS. diff --git a/framework/docs/source/ref-exit-codes/_template.rst b/framework/docs/source/ref-exit-codes/_template.rst new file mode 100644 index 000000000000..07da0962ba1f --- /dev/null +++ b/framework/docs/source/ref-exit-codes/_template.rst @@ -0,0 +1,12 @@ +[] +=============== + +Description +----------- + + + +How to Resolve +-------------- + + diff --git a/src/py/flwr/client/app.py b/src/py/flwr/client/app.py index ba7f8c3f8872..15b39c470443 100644 --- a/src/py/flwr/client/app.py +++ b/src/py/flwr/client/app.py @@ -45,7 +45,6 @@ ISOLATION_MODE_PROCESS, ISOLATION_MODE_SUBPROCESS, MAX_RETRY_DELAY, - MISSING_EXTRA_REST, RUN_ID_NUM_BYTES, SERVER_OCTET, TRANSPORT_TYPE_GRPC_ADAPTER, @@ -55,6 +54,7 @@ TRANSPORT_TYPES, ErrorCode, ) +from flwr.common.exit import ExitCode, flwr_exit from flwr.common.grpc import generic_create_grpc_server from flwr.common.logger import log, warn_deprecated_feature from flwr.common.message import Error @@ -763,7 +763,10 @@ def _init_connection(transport: Optional[str], server_address: str) -> tuple[ # Parse IP address parsed_address = parse_address(server_address) if not parsed_address: - sys.exit(f"Server address ({server_address}) cannot be parsed.") + flwr_exit( + ExitCode.COMMON_ADDRESS_INVALID, + f"SuperLink address ({server_address}) cannot be parsed.", + ) host, port, is_v6 = parsed_address address = f"[{host}]:{port}" if is_v6 else f"{host}:{port}" @@ -778,12 +781,9 @@ def _init_connection(transport: Optional[str], server_address: str) -> tuple[ from .rest_client.connection import http_request_response except ModuleNotFoundError: - sys.exit(MISSING_EXTRA_REST) + flwr_exit(ExitCode.COMMON_MISSING_EXTRA_REST) if server_address[:4] != "http": - sys.exit( - "When using the REST API, please provide `https://` or " - "`http://` before the server address (e.g. `http://127.0.0.1:8080`)" - ) + flwr_exit(ExitCode.SUPERNODE_REST_ADDRESS_INVALID) connection, error_type = http_request_response, RequestsConnectionError elif transport == TRANSPORT_TYPE_GRPC_RERE: connection, error_type = grpc_request_response, RpcError diff --git a/src/py/flwr/client/clientapp/app.py b/src/py/flwr/client/clientapp/app.py index 32813205478a..6561c5f855ac 100644 --- a/src/py/flwr/client/clientapp/app.py +++ b/src/py/flwr/client/clientapp/app.py @@ -16,7 +16,6 @@ import argparse -import sys import time from logging import DEBUG, ERROR, INFO from typing import Optional @@ -29,6 +28,7 @@ from flwr.common.args import add_args_flwr_app_common from flwr.common.config import get_flwr_dir from flwr.common.constant import CLIENTAPPIO_API_DEFAULT_CLIENT_ADDRESS, ErrorCode +from flwr.common.exit import ExitCode, flwr_exit from flwr.common.grpc import create_channel from flwr.common.logger import log from flwr.common.message import Error @@ -61,12 +61,10 @@ def flwr_clientapp() -> None: """Run process-isolated Flower ClientApp.""" args = _parse_args_run_flwr_clientapp().parse_args() if not args.insecure: - log( - ERROR, - "flwr-clientapp does not support TLS yet. " - "Please use the '--insecure' flag.", + flwr_exit( + ExitCode.COMMON_TLS_NOT_SUPPORTED, + "flwr-clientapp does not support TLS yet.", ) - sys.exit(1) log(INFO, "Starting Flower ClientApp") log( diff --git a/src/py/flwr/client/rest_client/connection.py b/src/py/flwr/client/rest_client/connection.py index 803170770da9..74ab3321164e 100644 --- a/src/py/flwr/client/rest_client/connection.py +++ b/src/py/flwr/client/rest_client/connection.py @@ -16,7 +16,6 @@ import random -import sys import threading from collections.abc import Iterator from contextlib import contextmanager @@ -32,12 +31,12 @@ from flwr.client.message_handler.message_handler import validate_out_message from flwr.common import GRPC_MAX_MESSAGE_LENGTH from flwr.common.constant import ( - MISSING_EXTRA_REST, PING_BASE_MULTIPLIER, PING_CALL_TIMEOUT, PING_DEFAULT_INTERVAL, PING_RANDOM_RANGE, ) +from flwr.common.exit import ExitCode, flwr_exit from flwr.common.logger import log from flwr.common.message import Message, Metadata from flwr.common.retry_invoker import RetryInvoker @@ -62,7 +61,7 @@ try: import requests except ModuleNotFoundError: - sys.exit(MISSING_EXTRA_REST) + flwr_exit(ExitCode.COMMON_MISSING_EXTRA_REST) PATH_CREATE_NODE: str = "api/v0/fleet/create-node" diff --git a/src/py/flwr/client/supernode/app.py b/src/py/flwr/client/supernode/app.py index c7c9841bdb59..f94263cebb9b 100644 --- a/src/py/flwr/client/supernode/app.py +++ b/src/py/flwr/client/supernode/app.py @@ -40,6 +40,7 @@ TRANSPORT_TYPE_GRPC_RERE, TRANSPORT_TYPE_REST, ) +from flwr.common.exit import ExitCode, flwr_exit from flwr.common.exit_handlers import register_exit_handlers from flwr.common.logger import log, warn_deprecated_feature @@ -89,6 +90,7 @@ def run_supernode() -> None: # Register handlers for graceful shutdown register_exit_handlers( event_type=EventType.RUN_SUPERNODE_LEAVE, + exit_message="SuperNode terminated gracefully.", ) start_client_internal( @@ -280,11 +282,7 @@ def _try_setup_client_authentication( return None if not args.auth_supernode_private_key or not args.auth_supernode_public_key: - sys.exit( - "Authentication requires file paths to both " - "'--auth-supernode-private-key' and '--auth-supernode-public-key'" - "to be provided (providing only one of them is not sufficient)." - ) + flwr_exit(ExitCode.SUPERNODE_NODE_AUTH_KEYS_REQUIRED) try: ssh_private_key = load_ssh_private_key( @@ -294,11 +292,9 @@ def _try_setup_client_authentication( if not isinstance(ssh_private_key, ec.EllipticCurvePrivateKey): raise ValueError() except (ValueError, UnsupportedAlgorithm): - sys.exit( - "Error: Unable to parse the private key file in " - "'--auth-supernode-private-key'. Authentication requires elliptic " - "curve private and public key pair. Please ensure that the file " - "path points to a valid private key file and try again." + flwr_exit( + ExitCode.SUPERNODE_NODE_AUTH_KEYS_INVALID, + "Unable to parse the private key file.", ) try: @@ -308,11 +304,9 @@ def _try_setup_client_authentication( if not isinstance(ssh_public_key, ec.EllipticCurvePublicKey): raise ValueError() except (ValueError, UnsupportedAlgorithm): - sys.exit( - "Error: Unable to parse the public key file in " - "'--auth-supernode-public-key'. Authentication requires elliptic " - "curve private and public key pair. Please ensure that the file " - "path points to a valid public key file and try again." + flwr_exit( + ExitCode.SUPERNODE_NODE_AUTH_KEYS_INVALID, + "Unable to parse the public key file.", ) return ( diff --git a/src/py/flwr/common/constant.py b/src/py/flwr/common/constant.py index f6e1da95538f..e0d31da2bddb 100644 --- a/src/py/flwr/common/constant.py +++ b/src/py/flwr/common/constant.py @@ -17,14 +17,6 @@ from __future__ import annotations -MISSING_EXTRA_REST = """ -Extra dependencies required for using the REST-based Fleet API are missing. - -To use the REST API, install `flwr` with the `rest` extra: - - `pip install flwr[rest]`. -""" - TRANSPORT_TYPE_GRPC_BIDI = "grpc-bidi" TRANSPORT_TYPE_GRPC_RERE = "grpc-rere" TRANSPORT_TYPE_GRPC_ADAPTER = "grpc-adapter" diff --git a/src/py/flwr/common/exit/__init__.py b/src/py/flwr/common/exit/__init__.py new file mode 100644 index 000000000000..46b5381fc0e1 --- /dev/null +++ b/src/py/flwr/common/exit/__init__.py @@ -0,0 +1,24 @@ +# Copyright 2025 Flower Labs GmbH. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Flower exit functionality.""" + + +from .exit import flwr_exit +from .exit_code import ExitCode + +__all__ = [ + "ExitCode", + "flwr_exit", +] diff --git a/src/py/flwr/common/exit/exit.py b/src/py/flwr/common/exit/exit.py new file mode 100644 index 000000000000..29072b00b423 --- /dev/null +++ b/src/py/flwr/common/exit/exit.py @@ -0,0 +1,99 @@ +# Copyright 2025 Flower Labs GmbH. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Unified exit function.""" + + +from __future__ import annotations + +import sys +from logging import ERROR, INFO +from typing import Any, NoReturn + +from flwr.common import EventType, event + +from ..logger import log +from .exit_code import EXIT_CODE_HELP + +HELP_PAGE_URL = "https://flower.ai/docs/framework/ref-exit-codes/" + + +def flwr_exit( + code: int, + message: str | None = None, + event_type: EventType | None = None, + event_details: dict[str, Any] | None = None, +) -> NoReturn: + """Handle application exit with an optional message. + + The exit message logged and displayed will follow this structure: + + >>> Exit Code: + >>> + >>> + >>> + >>> For more information, visit: + + - ``: The unique exit code representing the termination reason. + - ``: Optional context or additional information about the exit. + - ``: A brief explanation for the given exit code. + - ``: A URL providing detailed documentation and resolution steps. + """ + is_error = not 0 <= code < 100 # 0-99 are success exit codes + + # Construct exit message + exit_message = f"Exit Code: {code}\n" if is_error else "" + exit_message += message or "" + if short_help_message := EXIT_CODE_HELP.get(code, ""): + exit_message += f"\n{short_help_message}" + + # Set log level and system exit code + log_level = ERROR if is_error else INFO + sys_exit_code = 1 if is_error else 0 + + # Add help URL for non-successful/graceful exits + if is_error: + help_url = f"{HELP_PAGE_URL}{code}.html" + exit_message += f"\n\nFor more information, visit: <{help_url}>" + + # Telemetry event + event_type = event_type or _try_obtain_telemetry_event() + if event_type: + event_details = event_details or {} + event_details["exit_code"] = code + event(event_type, event_details).result() + + # Log the exit message + log(log_level, exit_message) + + # Exit + sys.exit(sys_exit_code) + + +# pylint: disable-next=too-many-return-statements +def _try_obtain_telemetry_event() -> EventType | None: + """Try to obtain a telemetry event.""" + if sys.argv[0].endswith("flower-superlink"): + return EventType.RUN_SUPERLINK_LEAVE + if sys.argv[0].endswith("flower-supernode"): + return EventType.RUN_SUPERNODE_LEAVE + if sys.argv[0].endswith("flwr-serverapp"): + return EventType.FLWR_SERVERAPP_RUN_LEAVE + if sys.argv[0].endswith("flwr-clientapp"): + return None # Not yet implemented + if sys.argv[0].endswith("flwr-simulation"): + return EventType.FLWR_SIMULATION_RUN_LEAVE + if sys.argv[0].endswith("flower-simulation"): + return EventType.CLI_FLOWER_SIMULATION_LEAVE + return None diff --git a/src/py/flwr/common/exit/exit_code.py b/src/py/flwr/common/exit/exit_code.py new file mode 100644 index 000000000000..a04b5ff14470 --- /dev/null +++ b/src/py/flwr/common/exit/exit_code.py @@ -0,0 +1,90 @@ +# Copyright 2025 Flower Labs GmbH. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Exit codes.""" + + +from __future__ import annotations + + +class ExitCode: + """Exit codes for Flower components.""" + + # Success exit codes (0-99) + SUCCESS = 0 # Successful exit without any errors or signals + GRACEFUL_EXIT_SIGINT = 1 # Graceful exit triggered by SIGINT + GRACEFUL_EXIT_SIGQUIT = 2 # Graceful exit triggered by SIGQUIT + GRACEFUL_EXIT_SIGTERM = 3 # Graceful exit triggered by SIGTERM + + # SuperLink-specific exit codes (100-199) + SUPERLINK_THREAD_CRASH = 100 + + # ServerApp-specific exit codes (200-299) + + # SuperNode-specific exit codes (300-399) + SUPERNODE_REST_ADDRESS_INVALID = 300 + SUPERNODE_NODE_AUTH_KEYS_REQUIRED = 301 + SUPERNODE_NODE_AUTH_KEYS_INVALID = 302 + + # ClientApp-specific exit codes (400-499) + + # Common exit codes (500-) + COMMON_ADDRESS_INVALID = 500 + COMMON_MISSING_EXTRA_REST = 501 + COMMON_TLS_NOT_SUPPORTED = 502 + + def __new__(cls) -> ExitCode: + """Prevent instantiation.""" + raise TypeError(f"{cls.__name__} cannot be instantiated.") + + +# All short help messages for exit codes +EXIT_CODE_HELP = { + # Success exit codes (0-99) + ExitCode.SUCCESS: "", + ExitCode.GRACEFUL_EXIT_SIGINT: "", + ExitCode.GRACEFUL_EXIT_SIGQUIT: "", + ExitCode.GRACEFUL_EXIT_SIGTERM: "", + # SuperLink-specific exit codes (100-199) + ExitCode.SUPERLINK_THREAD_CRASH: "An important background thread has crashed.", + # ServerApp-specific exit codes (200-299) + # SuperNode-specific exit codes (300-399) + ExitCode.SUPERNODE_REST_ADDRESS_INVALID: ( + "When using the REST API, please provide `https://` or " + "`http://` before the server address (e.g. `http://127.0.0.1:8080`)" + ), + ExitCode.SUPERNODE_NODE_AUTH_KEYS_REQUIRED: ( + "Node authentication requires file paths to both " + "'--auth-supernode-private-key' and '--auth-supernode-public-key' " + "to be provided (providing only one of them is not sufficient)." + ), + ExitCode.SUPERNODE_NODE_AUTH_KEYS_INVALID: ( + "Node uthentication requires elliptic curve private and public key pair. " + "Please ensure that the file path points to a valid private/public key " + "file and try again." + ), + # ClientApp-specific exit codes (400-499) + # Common exit codes (500-) + ExitCode.COMMON_ADDRESS_INVALID: ( + "Please provide a valid URL, IPv4 or IPv6 address." + ), + ExitCode.COMMON_MISSING_EXTRA_REST: """ +Extra dependencies required for using the REST-based Fleet API are missing. + +To use the REST API, install `flwr` with the `rest` extra: + + `pip install "flwr[rest]"`. +""", + ExitCode.COMMON_TLS_NOT_SUPPORTED: "Please use the '--insecure' flag.", +} diff --git a/src/py/flwr/common/exit/exit_code_test.py b/src/py/flwr/common/exit/exit_code_test.py new file mode 100644 index 000000000000..348ad224fe6d --- /dev/null +++ b/src/py/flwr/common/exit/exit_code_test.py @@ -0,0 +1,54 @@ +# Copyright 2025 Flower Labs GmbH. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for exit codes.""" + + +from pathlib import Path + +from .exit_code import EXIT_CODE_HELP, ExitCode + + +def test_exit_code_help_exist() -> None: + """Test if all exit codes have help message.""" + for name, code in ExitCode.__dict__.items(): + if name.startswith("__"): + continue + assert ( + code in EXIT_CODE_HELP + ), f"Exit code {name} ({code}) does not have help message." + + +def test_exit_code_help_url_exist() -> None: + """Test if all exit codes have help URL.""" + # Get all exit code help URLs + dir_path = Path("framework/docs/source/ref-exit-codes") + files = {int(f.stem): f for f in dir_path.glob("*.rst") if f.stem.isdigit()} + + # Check if all exit codes + for name, code in ExitCode.__dict__.items(): + if name.startswith("__"): + continue + + # Assert file exists + assert code in files, f"Exit code {name} ({code}) does not have help URL." + + # Retrieve the title from the help URL + f = files[code] + title = f.read_text().split("\n")[0] + + # Assert the title is correct + assert ( + title == f"[{code}] {name}" + ), f"Exit code {name} ({code}) help URL has incorrect title in {str(f)}" diff --git a/src/py/flwr/common/exit_handlers.py b/src/py/flwr/common/exit_handlers.py index e5898b46a537..8e8054793cee 100644 --- a/src/py/flwr/common/exit_handlers.py +++ b/src/py/flwr/common/exit_handlers.py @@ -15,28 +15,38 @@ """Common function to register exit handlers for server and client.""" -import sys -from signal import SIGINT, SIGTERM, signal +from signal import SIGINT, SIGQUIT, SIGTERM, signal from threading import Thread from types import FrameType from typing import Optional from grpc import Server -from flwr.common.telemetry import EventType, event +from flwr.common.telemetry import EventType + +from .exit import ExitCode, flwr_exit + +SIGNAL_TO_EXIT_CODE = { + SIGINT: ExitCode.GRACEFUL_EXIT_SIGINT, + SIGQUIT: ExitCode.GRACEFUL_EXIT_SIGQUIT, + SIGTERM: ExitCode.GRACEFUL_EXIT_SIGTERM, +} def register_exit_handlers( event_type: EventType, + exit_message: Optional[str] = None, grpc_servers: Optional[list[Server]] = None, bckg_threads: Optional[list[Thread]] = None, ) -> None: - """Register exit handlers for `SIGINT` and `SIGTERM` signals. + """Register exit handlers for `SIGINT`, `SIGTERM` and `SIGQUIT` signals. Parameters ---------- event_type : EventType The telemetry event that should be logged before exit. + exit_message : Optional[str] (default: None) + The message to be logged before exiting. grpc_servers: Optional[List[Server]] (default: None) An otpional list of gRPC servers that need to be gracefully terminated before exiting. @@ -46,6 +56,7 @@ def register_exit_handlers( """ default_handlers = { SIGINT: None, + SIGQUIT: None, SIGTERM: None, } @@ -61,8 +72,6 @@ def graceful_exit_handler( # type: ignore # Reset to default handler signal(signalnum, default_handlers[signalnum]) - event_res = event(event_type=event_type) - if grpc_servers is not None: for grpc_server in grpc_servers: grpc_server.stop(grace=1) @@ -71,16 +80,21 @@ def graceful_exit_handler( # type: ignore for bckg_thread in bckg_threads: bckg_thread.join() - # Ensure event has happend - event_res.result() - # Setup things for graceful exit - sys.exit(0) + flwr_exit( + code=SIGNAL_TO_EXIT_CODE[signalnum], + message=exit_message, + event_type=event_type, + ) default_handlers[SIGINT] = signal( # type: ignore SIGINT, graceful_exit_handler, # type: ignore ) + default_handlers[SIGQUIT] = signal( # type: ignore + SIGQUIT, + graceful_exit_handler, # type: ignore + ) default_handlers[SIGTERM] = signal( # type: ignore SIGTERM, graceful_exit_handler, # type: ignore diff --git a/src/py/flwr/server/app.py b/src/py/flwr/server/app.py index e8c8b73f642d..a841cedd6970 100644 --- a/src/py/flwr/server/app.py +++ b/src/py/flwr/server/app.py @@ -52,7 +52,6 @@ FLEET_API_REST_DEFAULT_ADDRESS, ISOLATION_MODE_PROCESS, ISOLATION_MODE_SUBPROCESS, - MISSING_EXTRA_REST, SERVER_OCTET, SERVERAPPIO_API_DEFAULT_SERVER_ADDRESS, SIMULATIONIO_API_DEFAULT_SERVER_ADDRESS, @@ -60,6 +59,7 @@ TRANSPORT_TYPE_GRPC_RERE, TRANSPORT_TYPE_REST, ) +from flwr.common.exit import ExitCode, flwr_exit from flwr.common.exit_handlers import register_exit_handlers from flwr.common.grpc import generic_create_grpc_server from flwr.common.logger import log, warn_deprecated_feature @@ -345,7 +345,7 @@ def run_superlink() -> None: and importlib.util.find_spec("starlette") and importlib.util.find_spec("uvicorn") ) is None: - sys.exit(MISSING_EXTRA_REST) + flwr_exit(ExitCode.COMMON_MISSING_EXTRA_REST) _, ssl_certfile, ssl_keyfile = ( certificates if certificates is not None else (None, None, None) @@ -437,6 +437,7 @@ def run_superlink() -> None: # Graceful shutdown register_exit_handlers( event_type=EventType.RUN_SUPERLINK_LEAVE, + exit_message="SuperLink terminated gracefully.", grpc_servers=grpc_servers, ) @@ -445,7 +446,8 @@ def run_superlink() -> None: sleep(0.1) # Exit if any thread has exited prematurely - sys.exit(1) + # This code will not be reached if the SuperLink stops gracefully + flwr_exit(ExitCode.SUPERLINK_THREAD_CRASH) def _run_flwr_command(args: list[str], main_pid: int) -> None: @@ -520,8 +522,9 @@ def _flwr_scheduler( def _format_address(address: str) -> tuple[str, str, int]: parsed_address = parse_address(address) if not parsed_address: - sys.exit( - f"Address ({address}) cannot be parsed (expected: URL or IPv4 or IPv6)." + flwr_exit( + ExitCode.COMMON_ADDRESS_INVALID, + f"Address ({address}) cannot be parsed.", ) host, port, is_v6 = parsed_address return (f"[{host}]:{port}" if is_v6 else f"{host}:{port}", host, port) @@ -712,7 +715,7 @@ def _run_fleet_api_rest( from flwr.server.superlink.fleet.rest_rere.rest_api import app as fast_api_app except ModuleNotFoundError: - sys.exit(MISSING_EXTRA_REST) + flwr_exit(ExitCode.COMMON_MISSING_EXTRA_REST) log(INFO, "Starting Flower REST server") diff --git a/src/py/flwr/server/serverapp/app.py b/src/py/flwr/server/serverapp/app.py index 35e1df92ab67..626c1bc71918 100644 --- a/src/py/flwr/server/serverapp/app.py +++ b/src/py/flwr/server/serverapp/app.py @@ -16,7 +16,6 @@ import argparse -import sys from logging import DEBUG, ERROR, INFO from pathlib import Path from queue import Queue @@ -38,6 +37,7 @@ Status, SubStatus, ) +from flwr.common.exit import ExitCode, flwr_exit from flwr.common.logger import ( log, mirror_output_to_queue, @@ -75,12 +75,10 @@ def flwr_serverapp() -> None: log(INFO, "Starting Flower ServerApp") if not args.insecure: - log( - ERROR, - "`flwr-serverapp` does not support TLS yet. " - "Please use the '--insecure' flag.", + flwr_exit( + ExitCode.COMMON_TLS_NOT_SUPPORTED, + "`flwr-serverapp` does not support TLS yet.", ) - sys.exit(1) log( DEBUG, diff --git a/src/py/flwr/server/superlink/fleet/rest_rere/rest_api.py b/src/py/flwr/server/superlink/fleet/rest_rere/rest_api.py index 692db3930f60..91abe7639c1c 100644 --- a/src/py/flwr/server/superlink/fleet/rest_rere/rest_api.py +++ b/src/py/flwr/server/superlink/fleet/rest_rere/rest_api.py @@ -17,13 +17,12 @@ from __future__ import annotations -import sys from collections.abc import Awaitable from typing import Callable, TypeVar, cast from google.protobuf.message import Message as GrpcMessage -from flwr.common.constant import MISSING_EXTRA_REST +from flwr.common.exit import ExitCode, flwr_exit from flwr.proto.fab_pb2 import GetFabRequest, GetFabResponse # pylint: disable=E0611 from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611 CreateNodeRequest, @@ -55,7 +54,7 @@ from starlette.responses import Response from starlette.routing import Route except ModuleNotFoundError: - sys.exit(MISSING_EXTRA_REST) + flwr_exit(ExitCode.COMMON_MISSING_EXTRA_REST) GrpcRequest = TypeVar("GrpcRequest", bound=GrpcMessage)