From 3707a1390752363be289e698e01f48dfc6929e6c Mon Sep 17 00:00:00 2001 From: Tomas Bajer Date: Sun, 29 Oct 2023 21:10:40 +0100 Subject: [PATCH] tbajer-retry-functionality-for-podman --- tmt/steps/provision/podman.py | 48 +++++++++++++++++++++++++++++++---- tmt/utils.py | 39 ++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 5 deletions(-) diff --git a/tmt/steps/provision/podman.py b/tmt/steps/provision/podman.py index 0ff59af0cc..ade91e785e 100644 --- a/tmt/steps/provision/podman.py +++ b/tmt/steps/provision/podman.py @@ -9,7 +9,7 @@ import tmt.steps import tmt.steps.provision import tmt.utils -from tmt.utils import Command, Path, ShellScript, field +from tmt.utils import Command, Path, ShellScript, field, retry # Timeout in seconds of waiting for a connection CONNECTION_TIMEOUT = 60 @@ -17,6 +17,8 @@ # Defaults DEFAULT_IMAGE = "fedora" DEFAULT_USER = "root" +DEFAULT_PULL_ATTEMPTS = 5 +DEFAULT_PULL_INTERVAL = 5 @dataclasses.dataclass @@ -44,6 +46,26 @@ class PodmanGuestData(tmt.steps.provision.GuestData): metavar='NAME', help='Name or id of an existing container to be used.') + pull_attempts: int = field( + default=DEFAULT_PULL_ATTEMPTS, + option='--pull-attempts', + metavar='COUNT', + help=f""" + How many times to try pulling the image, + {DEFAULT_PULL_ATTEMPTS} attempts by default. + """, + normalize=tmt.utils.normalize_int) + + pull_interval: int = field( + default=DEFAULT_PULL_INTERVAL, + option='--pull-interval', + metavar='SECONDS', + help=f""" + How long to wait before a new pull attempt, + {DEFAULT_PULL_INTERVAL} seconds by default. + """, + normalize=tmt.utils.normalize_int) + @dataclasses.dataclass class ProvisionPodmanData(PodmanGuestData, tmt.steps.provision.ProvisionStepData): @@ -60,6 +82,9 @@ class GuestContainer(tmt.Guest): user: str force_pull: bool parent: tmt.steps.Step + pull_attempts: int + pull_interval: int + logger: tmt.log.Logger @property def is_ready(self) -> bool: @@ -79,6 +104,15 @@ def wake(self) -> None: self.debug( f"Waking up container '{self.container}'.", level=2, shift=0) + def pull_image(self) -> None: + """ Pull image if not available or pull forced """ + assert self.image is not None # narrow type + + self.podman( + Command('pull', '-q', self.image), + message=f"Pull image '{self.image}'." + ) + def start(self) -> None: """ Start provisioned guest """ if self.is_dry_run: @@ -95,11 +129,15 @@ def start(self) -> None: except tmt.utils.RunError: needs_pull = True - # Pull image if not available or pull forced + # Retry pulling the image in case of network issues + # Temporary solution until configurable in podman itself if needs_pull or self.force_pull: - self.podman( - Command('pull', '-q', self.image), - message=f"Pull image '{self.image}'." + retry( + self.pull_image, + self.pull_attempts, + self.pull_interval, + f"Pulling '{self.image}' image", + self._logger ) # Mount the whole plan directory in the container diff --git a/tmt/utils.py b/tmt/utils.py index 8bacd2f805..795d57d8d3 100644 --- a/tmt/utils.py +++ b/tmt/utils.py @@ -1695,6 +1695,13 @@ def __init__( self.check_success = check_success +class RetryError(GeneralError): + """ Retries unsuccessful """ + + def __init__(self, label: str, causes: List[Exception]) -> None: + super().__init__(f"Retries of {label} unsuccessful.", causes) + + # Step exceptions @@ -6183,3 +6190,35 @@ def __exit__(self, *args: Any) -> None: @property def duration(self) -> datetime.timedelta: return self.end_time - self.start_time + + +def retry( + func: Callable[..., T], + attempts: int, + interval: int, + label: str, + logger: tmt.log.Logger, + *args: Any, + **kwargs: Any + ) -> T: + """ Retry functionality to be used elsewhere in the code. + + :param func: function to be called with any amount + of arguments of Any type, returning the value of type TypeVar + :param attempts: number of tries to call the function + :param interval: amount of seconds to wait before a new try + :param label: action to retry + """ + exceptions: List[Exception] = [] + for i in range(attempts): + try: + return func(*args, **kwargs) + except Exception as exc: + exceptions.append(exc) + logger.debug( + 'retry', + f"{label} failed, {attempts - i} retries left, " + f"trying again in {interval:.2f} seconds.") + logger.fail(str(exc)) + time.sleep(interval) + raise RetryError(label, causes=exceptions)