Skip to content

Commit

Permalink
try retries on the signal_sysevent
Browse files Browse the repository at this point in the history
  • Loading branch information
msullivan committed Mar 1, 2024
1 parent 7e0999d commit 0750057
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 6 deletions.
33 changes: 29 additions & 4 deletions edb/common/retryloop.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,18 @@ def __init__(
self,
*,
backoff: Callable[[int], float] = const_backoff(0.5),
timeout: float,
timeout: float | None = None,
iterations: int | None = None,
ignore: Type[Exception] | Tuple[Type[Exception], ...] | None = None,
wait_for: Type[Exception] | Tuple[Type[Exception], ...] | None = None,
retry_cb: Callable[[Optional[BaseException]], None] | None = None,
) -> None:
assert timeout is not None or iterations is not None

self._iteration = 0
self._backoff = backoff
self._timeout = timeout
self._max_iterations = iterations
self._ignore = ignore
self._wait_for = wait_for
self._started_at = 0.0
Expand Down Expand Up @@ -113,10 +117,19 @@ async def __aexit__(
# Propagate, it's not the error we expected.
return False

if elapsed > self._loop._timeout:
if (
self._loop._timeout is not None
and elapsed > self._loop._timeout
):
# Propagate -- we've run it enough times.
return False

if (
self._loop._max_iterations is not None
and self._loop._iteration >= self._loop._max_iterations
):
return False

if self._loop._retry_cb is not None:
self._loop._retry_cb(e)

Expand All @@ -137,10 +150,22 @@ async def __aexit__(
# Propagate, it's not the error we expected.
return False

if elapsed > self._loop._timeout:
if (
self._loop._timeout is not None
and elapsed > self._loop._timeout
):
raise TimeoutError(
f'exception matching {self._loop._wait_for!r} '
f'has not happened in {self._loop._timeout} seconds')

if (
self._loop._max_iterations is not None
and self._loop._iteration >= self._loop._max_iterations
):
raise TimeoutError(
f'exception matching {self._loop._wait_for!r} '
f'has not happen in {self._loop._timeout} seconds')
f'has not happened in {self._loop._max_iterations} '
f'iterations')

# Ignore the exception until next run.
return True
19 changes: 17 additions & 2 deletions edb/server/tenant.py
Original file line number Diff line number Diff line change
Expand Up @@ -1308,9 +1308,24 @@ async def signal_sysevent(self, event: str, **kwargs) -> None:
# in flight.
return

rloop = retryloop.RetryLoop(
iterations=3,
ignore=pgcon.BackendError,
backoff=retryloop.const_backoff(0.0),
)
async with self.use_sys_pgcon() as con:
await con.signal_sysevent(event, **kwargs)
except Exception:
async for iteration in rloop:
async with iteration:
try:
await con.signal_sysevent(event, **kwargs)
except Exception as ex:
print("=== YES IT IS FUCKED", ex)
raise

except Exception as ex:
print("SYSEVENT ERROR")
debug.dump(ex)
sys.stdout.flush()
metrics.background_errors.inc(
1.0, self._instance_name, "signal_sysevent"
)
Expand Down

0 comments on commit 0750057

Please sign in to comment.