Skip to content

Commit

Permalink
fix(disrupt_mgr): skip take_snapshot errors per issue
Browse files Browse the repository at this point in the history
The change adds a decorator helper that uses SkipPerIssues machanism and applies (enters)
provided contexts to a function if issue in question is open.

Also the new decorator is used to decorate mgmt_backup Nemesis disruption to skip take_snapshot
related erros, until issue scylladb/scylla-manager#3389
is resolved.
  • Loading branch information
dimakr authored and fruch committed Sep 11, 2024
1 parent 9950447 commit bf7d027
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 0 deletions.
5 changes: 5 additions & 0 deletions sdcm/nemesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@
decorate_with_context,
ignore_reactor_stall_errors,
ignore_disk_quota_exceeded_errors,
decorate_with_context_if_issues_open,
ignore_take_snapshot_failing,
)
from sdcm.sct_events.health import DataValidatorEvent
from sdcm.sct_events.loaders import CassandraStressLogEvent, ScyllaBenchEvent
Expand Down Expand Up @@ -3004,6 +3006,9 @@ def _delete_existing_backups(self, mgr_cluster):
self.log.warning("Deleted the following backup tasks before the nemesis starts: %s",
", ".join(deleted_tasks))

@decorate_with_context_if_issues_open(
ignore_take_snapshot_failing,
issue_refs=['https://github.com/scylladb/scylla-manager/issues/3389'])
def _mgmt_backup(self, backup_specific_tables):
if not self.cluster.params.get('use_mgmt') and not self.cluster.params.get('use_cloud_manager'):
raise UnsupportedNemesis('Scylla-manager configuration is not defined!')
Expand Down
35 changes: 35 additions & 0 deletions sdcm/sct_events/group_common_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@
from functools import wraps
from typing import ContextManager, Callable, Sequence

from sdcm.cluster import TestConfig
from sdcm.sct_events import Severity
from sdcm.sct_events.filters import DbEventsFilter, EventsSeverityChangerFilter, EventsFilter
from sdcm.sct_events.loaders import YcsbStressEvent
from sdcm.sct_events.database import DatabaseLogEvent
from sdcm.sct_events.monitors import PrometheusAlertManagerEvent
from sdcm.utils.issues import SkipPerIssues


@contextmanager
Expand Down Expand Up @@ -347,6 +349,22 @@ def ignore_raft_transport_failing():
yield


@contextmanager
def ignore_take_snapshot_failing():
with ExitStack() as stack:
stack.enter_context(EventsSeverityChangerFilter(
new_severity=Severity.WARNING,
event_class=DatabaseLogEvent,
regex=r".*api - take_snapshot failed: std::filesystem::__cxx11::filesystem_error.*No such file or directory",
extra_time_to_expiration=60))
stack.enter_context(EventsSeverityChangerFilter(
new_severity=Severity.WARNING,
event_class=DatabaseLogEvent,
regex=r".*api - take_snapshot failed: std::runtime_error \(Keyspace.*snapshot.*already exists",
extra_time_to_expiration=60))
yield


def decorate_with_context(context_list: list[Callable | ContextManager] | Callable | ContextManager):
"""
helper to decorate a function to run with a list of callables that return context managers
Expand All @@ -373,3 +391,20 @@ def inner_func(*args, **kwargs):
return func(*args, **kwargs)
return inner_func
return inner_decorator


def decorate_with_context_if_issues_open(
contexts: list[Callable | ContextManager] | Callable | ContextManager, issue_refs: list[str]):
"""
Helper to decorate a function, to apply the provided contexts only if referenced GitHub issues are opened.
"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
if SkipPerIssues(issue_refs, TestConfig().tester_obj().params):
decorated_func = decorate_with_context(contexts)(func)
return decorated_func(*args, **kwargs)
else:
return func(*args, **kwargs)
return wrapper
return decorator

0 comments on commit bf7d027

Please sign in to comment.