Skip to content

Commit

Permalink
tbajer-retry-functionality-for-podman
Browse files Browse the repository at this point in the history
  • Loading branch information
Tomas Bajer committed Nov 1, 2023
1 parent 8054cf1 commit 3707a13
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 5 deletions.
48 changes: 43 additions & 5 deletions tmt/steps/provision/podman.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@
import tmt.steps
import tmt.steps.provision
import tmt.utils
from tmt.utils import Command, Path, ShellScript, field
from tmt.utils import Command, Path, ShellScript, field, retry

# Timeout in seconds of waiting for a connection
CONNECTION_TIMEOUT = 60

# Defaults
DEFAULT_IMAGE = "fedora"
DEFAULT_USER = "root"
DEFAULT_PULL_ATTEMPTS = 5
DEFAULT_PULL_INTERVAL = 5


@dataclasses.dataclass
Expand Down Expand Up @@ -44,6 +46,26 @@ class PodmanGuestData(tmt.steps.provision.GuestData):
metavar='NAME',
help='Name or id of an existing container to be used.')

pull_attempts: int = field(
default=DEFAULT_PULL_ATTEMPTS,
option='--pull-attempts',
metavar='COUNT',
help=f"""
How many times to try pulling the image,
{DEFAULT_PULL_ATTEMPTS} attempts by default.
""",
normalize=tmt.utils.normalize_int)

pull_interval: int = field(
default=DEFAULT_PULL_INTERVAL,
option='--pull-interval',
metavar='SECONDS',
help=f"""
How long to wait before a new pull attempt,
{DEFAULT_PULL_INTERVAL} seconds by default.
""",
normalize=tmt.utils.normalize_int)


@dataclasses.dataclass
class ProvisionPodmanData(PodmanGuestData, tmt.steps.provision.ProvisionStepData):
Expand All @@ -60,6 +82,9 @@ class GuestContainer(tmt.Guest):
user: str
force_pull: bool
parent: tmt.steps.Step
pull_attempts: int
pull_interval: int
logger: tmt.log.Logger

@property
def is_ready(self) -> bool:
Expand All @@ -79,6 +104,15 @@ def wake(self) -> None:
self.debug(
f"Waking up container '{self.container}'.", level=2, shift=0)

def pull_image(self) -> None:
""" Pull image if not available or pull forced """
assert self.image is not None # narrow type

self.podman(
Command('pull', '-q', self.image),
message=f"Pull image '{self.image}'."
)

def start(self) -> None:
""" Start provisioned guest """
if self.is_dry_run:
Expand All @@ -95,11 +129,15 @@ def start(self) -> None:
except tmt.utils.RunError:
needs_pull = True

# Pull image if not available or pull forced
# Retry pulling the image in case of network issues
# Temporary solution until configurable in podman itself
if needs_pull or self.force_pull:
self.podman(
Command('pull', '-q', self.image),
message=f"Pull image '{self.image}'."
retry(
self.pull_image,
self.pull_attempts,
self.pull_interval,
f"Pulling '{self.image}' image",
self._logger
)

# Mount the whole plan directory in the container
Expand Down
39 changes: 39 additions & 0 deletions tmt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1695,6 +1695,13 @@ def __init__(
self.check_success = check_success


class RetryError(GeneralError):
""" Retries unsuccessful """

def __init__(self, label: str, causes: List[Exception]) -> None:
super().__init__(f"Retries of {label} unsuccessful.", causes)


# Step exceptions


Expand Down Expand Up @@ -6183,3 +6190,35 @@ def __exit__(self, *args: Any) -> None:
@property
def duration(self) -> datetime.timedelta:
return self.end_time - self.start_time


def retry(
func: Callable[..., T],
attempts: int,
interval: int,
label: str,
logger: tmt.log.Logger,
*args: Any,
**kwargs: Any
) -> T:
""" Retry functionality to be used elsewhere in the code.
:param func: function to be called with any amount
of arguments of Any type, returning the value of type TypeVar
:param attempts: number of tries to call the function
:param interval: amount of seconds to wait before a new try
:param label: action to retry
"""
exceptions: List[Exception] = []
for i in range(attempts):
try:
return func(*args, **kwargs)
except Exception as exc:
exceptions.append(exc)
logger.debug(
'retry',
f"{label} failed, {attempts - i} retries left, "
f"trying again in {interval:.2f} seconds.")
logger.fail(str(exc))
time.sleep(interval)
raise RetryError(label, causes=exceptions)

0 comments on commit 3707a13

Please sign in to comment.