Skip to content

fix: container timeout issue and new environment variables #4171

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/changelog.d/4171.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
container timeout issue and new environment variables
4 changes: 4 additions & 0 deletions doc/source/contributing/environment_variables.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ Following is a list of environment variables that can be set to control various
- Specifies the port of the Fluent server in :func:`connect_to_fluent() <ansys.fluent.core.launcher.launcher.connect_to_fluent>`.
* - PYFLUENT_FLUENT_ROOT
- Specifies the Fluent root directory while launching Fluent in :func:`launch_fluent() <ansys.fluent.core.launcher.launcher.launch_fluent>`.
* - PYFLUENT_FLUENT_LAUNCH_TIMEOUT
- Specifies the timeout, in seconds, for launching Fluent through :func:`launch_fluent() <ansys.fluent.core.launcher.launcher.launch_fluent>`.
* - PYFLUENT_FLUENT_AUTOMATIC_TRANSCRIPT
- Can be used to enable automatic writing of transcript .trn file by Fluent. By default, it is disabled.
* - PYFLUENT_GRPC_LOG_BYTES_LIMIT
- Specifies the length of gRPC logging messages. Set to 0 to disable the limit.
* - PYFLUENT_LAUNCH_CONTAINER
Expand Down
4 changes: 2 additions & 2 deletions src/ansys/fluent/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
SolverIcing,
)
from ansys.fluent.core.streaming_services.events_streaming import * # noqa: F401, F403
from ansys.fluent.core.utils import fldoc, get_examples_download_dir
from ansys.fluent.core.utils import env_var_to_bool, fldoc, get_examples_download_dir
from ansys.fluent.core.utils.fluent_version import FluentVersion # noqa: F401
from ansys.fluent.core.utils.setup_for_fluent import setup_for_fluent # noqa: F401

Expand Down Expand Up @@ -155,7 +155,7 @@ def version_info() -> str:
FLUENT_SHOW_MESH_AFTER_CASE_READ = False

# Whether to write the automatic transcript in Fluent
FLUENT_AUTOMATIC_TRANSCRIPT = False
FLUENT_AUTOMATIC_TRANSCRIPT = env_var_to_bool("PYFLUENT_FLUENT_AUTOMATIC_TRANSCRIPT")

# Whether to interrupt Fluent solver from PyFluent
SUPPORT_SOLVER_INTERRUPT = False
Expand Down
5 changes: 3 additions & 2 deletions src/ansys/fluent/core/codegen/tuigen.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
convert_path_to_grpc_path,
convert_tui_menu_to_func_name,
)
from ansys.fluent.core.utils import env_var_to_bool
from ansys.fluent.core.utils.fix_doc import escape_wildcards
from ansys.fluent.core.utils.fluent_version import (
FluentVersion,
Expand Down Expand Up @@ -95,7 +96,7 @@ def _get_tui_docdir(mode: str):


def _copy_tui_help_xml_file(version: str):
if os.getenv("PYFLUENT_LAUNCH_CONTAINER") == "1":
if env_var_to_bool("PYFLUENT_LAUNCH_CONTAINER"):
image_tag = os.getenv("FLUENT_IMAGE_TAG", "v25.1.0")
image_name = f"ghcr.io/ansys/pyfluent:{image_tag}"
container_name = uuid.uuid4().hex
Expand Down Expand Up @@ -346,7 +347,7 @@ def generate(version, static_infos: dict, verbose: bool = False):
api_tree["<solver_session>"] = TUIGenerator(
"solver", version, static_infos, verbose
).generate()
if os.getenv("PYFLUENT_HIDE_LOG_SECRETS") != "1":
if not env_var_to_bool("PYFLUENT_HIDE_LOG_SECRETS"):
logger.info(
"XML help is available but not picked for the following %i paths: ",
len(_XML_HELPSTRINGS),
Expand Down
3 changes: 2 additions & 1 deletion src/ansys/fluent/core/examples/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import zipfile

import ansys.fluent.core as pyfluent
from ansys.fluent.core.utils import env_var_to_bool
from ansys.fluent.core.utils.networking import check_url_exists, get_url_content

logger = logging.getLogger("pyfluent.networking")
Expand Down Expand Up @@ -180,7 +181,7 @@ def download_file(
'bracket.iges'
"""
if return_without_path is None:
if os.getenv("PYFLUENT_LAUNCH_CONTAINER") == "1":
if env_var_to_bool("PYFLUENT_LAUNCH_CONTAINER"):
if pyfluent.USE_FILE_TRANSFER_SERVICE:
return_without_path = False
else:
Expand Down
7 changes: 5 additions & 2 deletions src/ansys/fluent/core/fluent_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,13 +746,16 @@ def exit(
env_timeout = os.getenv("PYFLUENT_TIMEOUT_FORCE_EXIT")

if env_timeout:
logger.debug("Found PYFLUENT_TIMEOUT_FORCE_EXIT env var")
logger.debug(
f"Found PYFLUENT_TIMEOUT_FORCE_EXIT env var: '{env_timeout}'"
)
try:
timeout = float(env_timeout)
logger.debug(f"Setting TIMEOUT_FORCE_EXIT to {timeout}")
except ValueError:
logger.debug(
"Off or unrecognized PYFLUENT_TIMEOUT_FORCE_EXIT value, not enabling timeout force exit"
"Off or unrecognized PYFLUENT_TIMEOUT_FORCE_EXIT value (floats and integers are also supported), "
"disabling timeout forced exit."
)

if timeout is None:
Expand Down
8 changes: 6 additions & 2 deletions src/ansys/fluent/core/launcher/container_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,13 +216,17 @@ def __call__(self):

if is_compose():
port, config_dict, container = start_fluent_container(
self._args, self.argvals["container_dict"]
self._args,
self.argvals["container_dict"],
self.argvals["start_timeout"],
)

_, _, password = _get_server_info_from_container(config_dict=config_dict)
else:
port, password, container = start_fluent_container(
self._args, self.argvals["container_dict"]
self._args,
self.argvals["container_dict"],
self.argvals["start_timeout"],
)

fluent_connection = FluentConnection(
Expand Down
2 changes: 1 addition & 1 deletion src/ansys/fluent/core/launcher/error_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class LaunchFluentError(Exception):

def __init__(self, launch_string):
"""__init__ method of LaunchFluentError class."""
details = "\n" + "Fluent Launch string: " + launch_string
details = "\n" + "Fluent Launch command: " + launch_string
super().__init__(details)


Expand Down
58 changes: 50 additions & 8 deletions src/ansys/fluent/core/launcher/fluent_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,15 @@
from pprint import pformat
import tempfile
from typing import Any, List
import warnings

import ansys.fluent.core as pyfluent
from ansys.fluent.core.docker.docker_compose import ComposeBasedLauncher
from ansys.fluent.core.launcher.error_handler import (
LaunchFluentError,
)
from ansys.fluent.core.launcher.launcher_utils import is_compose
from ansys.fluent.core.pyfluent_warnings import PyFluentDeprecationWarning
from ansys.fluent.core.session import _parse_server_info_file
from ansys.fluent.core.utils.deprecate import all_deprecators
from ansys.fluent.core.utils.execution import timeout_loop
Expand Down Expand Up @@ -155,7 +160,7 @@ def configure_container_dict(
args: List[str],
mount_source: str | Path | None = None,
mount_target: str | Path | None = None,
timeout: int = 60,
timeout: int | None = None,
port: int | None = None,
license_server: str | None = None,
container_server_info_file: str | Path | None = None,
Expand All @@ -180,7 +185,9 @@ def configure_container_dict(
Path inside the container where ``mount_source`` will be mounted. This will be the working directory path
visible to the Fluent process running inside the container.
timeout : int, optional
Time limit for the Fluent container to start, in seconds. By default, 30 seconds.
Time limit for the Fluent container to start, in seconds.
.. deprecated:: v0.33.dev0
Use the ``start_timeout`` argument of ``launch_fluent`` instead.
port : int, optional
Port for Fluent container to use.
license_server : str, optional
Expand Down Expand Up @@ -231,6 +238,12 @@ def configure_container_dict(
See also :func:`start_fluent_container`.
"""

if timeout is not None:
warnings.warn(
"configure_container_dict(timeout) is deprecated, use launch_fluent(start_timeout) instead.",
PyFluentDeprecationWarning,
)

logger.debug(f"container_dict before processing:\n{dict_to_str(container_dict)}")

# Starting with 'mount_source' because it is not tied to the 'working_dir'.
Expand Down Expand Up @@ -444,7 +457,7 @@ def configure_container_dict(
container_dict["mount_target"] = mount_target

logger.debug(
f"Fluent container timeout: {timeout}, container_grpc_port: {container_grpc_port}, "
f"Fluent container container_grpc_port: {container_grpc_port}, "
f"host_server_info_file: '{host_server_info_file}', "
f"remove_server_info_file: {remove_server_info_file}"
)
Expand All @@ -460,7 +473,7 @@ def configure_container_dict(


def start_fluent_container(
args: List[str], container_dict: dict | None = None
args: List[str], container_dict: dict | None = None, start_timeout: int = 60
) -> tuple[int, str, Any]:
"""Start a Fluent container.

Expand All @@ -470,6 +483,9 @@ def start_fluent_container(
List of Fluent launch arguments.
container_dict : dict, optional
Dictionary with Docker container configuration.
start_timeout : int, optional
Timeout in seconds for the container to start. If not specified, it defaults to 60
seconds.

Returns
-------
Expand Down Expand Up @@ -503,6 +519,14 @@ def start_fluent_container(
host_server_info_file,
remove_server_info_file,
) = container_vars
launch_string = " ".join(config_dict["command"])

if timeout:
logger.warning(
"launch_fluent(start_timeout) overridden by configure_container_dict(timeout) value."
)
start_timeout = timeout
del timeout

try:
if is_compose():
Expand Down Expand Up @@ -536,18 +560,36 @@ def start_fluent_container(
config_dict.pop("fluent_image"), **config_dict
)

logger.debug(
f"Waiting for Fluent container for up to {start_timeout} seconds..."
)

success = timeout_loop(
lambda: host_server_info_file.stat().st_mtime > last_mtime, timeout
lambda: host_server_info_file.stat().st_mtime > last_mtime,
start_timeout,
)

if not success:
raise TimeoutError(
"Fluent container launch has timed out, stop container manually."
)
try:
container.stop()
except Exception as stop_ex:
logger.error(f"Failed to stop container: {stop_ex}")
raise TimeoutError(
f"Fluent container launch has timed out after {start_timeout} seconds. "
f"Additionally, stopping the container failed: {stop_ex}"
) from stop_ex
else:
raise TimeoutError(
f"Fluent container launch has timed out after {start_timeout} seconds."
" The container was stopped."
)
else:
_, _, password = _parse_server_info_file(str(host_server_info_file))

return port, password, container
except Exception as ex:
logger.error(f"Exception caught - {type(ex).__name__}: {ex}")
raise LaunchFluentError(launch_string) from ex
finally:
if remove_server_info_file and host_server_info_file.exists():
host_server_info_file.unlink()
3 changes: 2 additions & 1 deletion src/ansys/fluent/core/launcher/launch_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from ansys.fluent.core.session_solver import Solver
from ansys.fluent.core.session_solver_aero import SolverAero
from ansys.fluent.core.session_solver_icing import SolverIcing
from ansys.fluent.core.utils import env_var_to_bool
from ansys.fluent.core.utils.fluent_version import FluentVersion
import ansys.platform.instancemanagement as pypim

Expand Down Expand Up @@ -271,7 +272,7 @@ def _get_fluent_launch_mode(start_container, container_dict, scheduler_options):
fluent_launch_mode = LaunchMode.PIM
elif start_container is True or (
start_container is None
and (container_dict or os.getenv("PYFLUENT_LAUNCH_CONTAINER") == "1")
and (container_dict or env_var_to_bool("PYFLUENT_LAUNCH_CONTAINER"))
):
fluent_launch_mode = LaunchMode.CONTAINER
# Currently, only Slurm scheduler is supported and within SlurmLauncher we check the value of the scheduler
Expand Down
6 changes: 5 additions & 1 deletion src/ansys/fluent/core/launcher/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
from ansys.fluent.core.session_pure_meshing import PureMeshing
from ansys.fluent.core.session_solver import Solver
from ansys.fluent.core.session_solver_icing import SolverIcing
from ansys.fluent.core.utils import env_var_to_bool
from ansys.fluent.core.utils.deprecate import all_deprecators
from ansys.fluent.core.utils.fluent_version import FluentVersion

Expand Down Expand Up @@ -103,7 +104,7 @@ def _show_gui_to_ui_mode(old_arg_val, **kwds):
return UIMode.NO_GUI
elif container_dict:
return UIMode.NO_GUI
elif os.getenv("PYFLUENT_LAUNCH_CONTAINER") == "1":
elif env_var_to_bool("PYFLUENT_LAUNCH_CONTAINER"):
return UIMode.NO_GUI
else:
return UIMode.GUI
Expand Down Expand Up @@ -322,6 +323,9 @@ def launch_fluent(
if env is None:
env = {}

if start_timeout is None:
start_timeout = int(os.getenv("PYFLUENT_FLUENT_LAUNCH_TIMEOUT", "60"))

def _mode_to_launcher_type(fluent_launch_mode: LaunchMode):
launcher_mode_type = {
LaunchMode.CONTAINER: DockerLauncher,
Expand Down
9 changes: 5 additions & 4 deletions src/ansys/fluent/core/launcher/watchdog.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import time

import ansys.fluent.core as pyfluent
from ansys.fluent.core.utils import env_var_to_bool
from ansys.fluent.core.utils.execution import timeout_loop

logger = pyfluent.logger.get_logger("pyfluent.launcher")
Expand Down Expand Up @@ -76,8 +77,8 @@ def launch(
)
)

env_watchdog_debug = os.getenv("PYFLUENT_WATCHDOG_DEBUG", "off").upper()
if env_watchdog_debug in ("1", "ON"):
debug_watchdog = env_var_to_bool("PYFLUENT_WATCHDOG_DEBUG")
if debug_watchdog:
logger.debug(
f"PYFLUENT_WATCHDOG_DEBUG environment variable found, "
f"enabling debugging for watchdog ID {watchdog_id}..."
Expand Down Expand Up @@ -131,7 +132,7 @@ def launch(
watchdog_id,
]

if env_watchdog_debug in ("1", "ON"):
if debug_watchdog:
logger.debug(f"Starting Watchdog logging to directory {os.getcwd()}")

kwargs = {"env": watchdog_env, "stdin": subprocess.DEVNULL, "close_fds": True}
Expand All @@ -151,7 +152,7 @@ def launch(
if os.name == "posix":
kwargs.update(start_new_session=True)

if env_watchdog_debug in ("1", "ON") and os.name != "nt":
if debug_watchdog and os.name != "nt":
kwargs.update(
stdout=open(f"pyfluent_watchdog_out_{watchdog_id}.log", mode="w"),
stderr=open(f"pyfluent_watchdog_err_{watchdog_id}.log", mode="w"),
Expand Down
3 changes: 2 additions & 1 deletion src/ansys/fluent/core/launcher/watchdog_exec
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ if __name__ == "__main__":
_pid_exists,
get_container,
)
from ansys.fluent.core.utils import env_var_to_bool
from ansys.fluent.core.utils.execution import timeout_exec, timeout_loop

watchdog_id = sys.argv[5]
Expand All @@ -34,7 +35,7 @@ if __name__ == "__main__":

logger = pyfluent.logger.get_logger("pyfluent.watchdog")

if os.getenv("PYFLUENT_WATCHDOG_DEBUG", "OFF").upper() in ("1", "ON"):
if env_var_to_bool("PYFLUENT_WATCHDOG_DEBUG"):
pyfluent.logger.enable(custom_config=log_config)
logger.setLevel("DEBUG")
logger.handlers = pyfluent.logger.get_logger(
Expand Down
2 changes: 2 additions & 0 deletions src/ansys/fluent/core/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
"PYFLUENT_FLUENT_IP",
"PYFLUENT_FLUENT_PORT",
"PYFLUENT_FLUENT_ROOT",
"PYFLUENT_FLUENT_LAUNCH_TIMEOUT",
"PYFLUENT_FLUENT_AUTOMATIC_TRANSCRIPT",
"PYFLUENT_GRPC_LOG_BYTES_LIMIT",
"PYFLUENT_LAUNCH_CONTAINER",
"PYFLUENT_LOGGING",
Expand Down
3 changes: 2 additions & 1 deletion src/ansys/fluent/core/services/interceptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import grpc

from ansys.fluent.core.services.batch_ops import BatchOps
from ansys.fluent.core.utils import env_var_to_bool

network_logger: logging.Logger = logging.getLogger("pyfluent.networking")
log_bytes_limit: int = int(os.getenv("PYFLUENT_GRPC_LOG_BYTES_LIMIT", 1000))
Expand Down Expand Up @@ -75,7 +76,7 @@ def _intercept_call(
if not response.exception():
# call _truncate_grpc_str early to get the size warning even when hiding secrets
response_str = _truncate_grpc_str(response.result())
if os.getenv("PYFLUENT_HIDE_LOG_SECRETS") != "1":
if not env_var_to_bool("PYFLUENT_HIDE_LOG_SECRETS"):
network_logger.debug(f"GRPC_TRACE: response = {response_str}")
return response

Expand Down
Loading
Loading