Skip to content

Commit

Permalink
fix(nemesis): make sure set_target_node always set `current_disrupt…
Browse files Browse the repository at this point in the history
…ion`

before there was two seprate calls to `set_target_node` and to
`set_current_disruption` that could end up with `set_target_node`
setting None to the `target_node.running_nemesis`

this fix move `set_current_disruption` into `set_target_node`
to avoid this problem, and also introduce a new paramter to
`set_target_node` so any user of it can set what's the data
that would be saved on the target node (it's only for debugging)

Fixes: #8198
  • Loading branch information
fruch committed Aug 27, 2024
1 parent 07d228c commit a12ee91
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions sdcm/nemesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def _get_target_nodes(

def set_target_node(self, dc_idx: Optional[int] = None, rack: Optional[int] = None,
is_seed: Union[bool, DefaultValue, None] = DefaultValue,
allow_only_last_node_in_rack: bool = False):
allow_only_last_node_in_rack: bool = False, current_disruption=None):
"""Set a Scylla node as target node.
if is_seed is None - it will ignore seed status of the nodes
Expand All @@ -405,7 +405,13 @@ def set_target_node(self, dc_idx: Optional[int] = None, rack: Optional[int] = No
else:
self.target_node = random.choice(nodes)

self.target_node.running_nemesis = self.current_disruption
if current_disruption:
self.target_node.running_nemesis = current_disruption
self.set_current_disruption(current_disruption)
elif self.current_disruption:
self.target_node.running_nemesis = self.current_disruption
else:
raise ValueError("current_disruption is not set")
self.log.info('Current Target: %s with running nemesis: %s',
self.target_node, self.target_node.running_nemesis)

Expand Down Expand Up @@ -664,7 +670,6 @@ def _get_random_non_system_ks_cf(self, filter_empty_tables: bool = True) -> tupl
return None, None

def _prepare_start_stop_compaction(self) -> StartStopCompactionArgs:
self.set_target_node()
ks, cf = self._get_random_non_system_ks_cf()

if not ks or not cf:
Expand Down Expand Up @@ -1303,6 +1308,7 @@ def _terminate_cluster_node(self, node):

def _nodetool_decommission(self, add_node=True):
if self._is_it_on_kubernetes():
self.unset_current_running_nemesis(self.target_node)
self.set_target_node(allow_only_last_node_in_rack=True)
target_is_seed = self.target_node.is_seed
self.cluster.decommission(self.target_node)
Expand Down Expand Up @@ -5178,7 +5184,7 @@ def data_validation_prints(args):
args[0].log.debug(f'Data validator error: {err}')

@wraps(method)
def wrapper(*args, **kwargs): # pylint: disable=too-many-statements
def wrapper(*args, **kwargs): # pylint: disable=too-many-statements # noqa: PLR0914
# pylint: disable=too-many-locals
# pylint: disable=too-many-branches
method_name = method.__name__
Expand All @@ -5193,9 +5199,8 @@ def wrapper(*args, **kwargs): # pylint: disable=too-many-statements
# NOTE: exclusive nemesis will wait before the end of all other ones
time.sleep(10)

args[0].current_disruption = "".join(p.capitalize() for p in method_name.replace("disrupt_", "").split("_"))
args[0].set_current_disruption(f"{args[0].current_disruption}")
args[0].set_target_node()
current_disruption = "".join(p.capitalize() for p in method_name.replace("disrupt_", "").split("_"))
args[0].set_target_node(current_disruption=current_disruption)

args[0].cluster.check_cluster_health()
num_nodes_before = len(args[0].cluster.nodes)
Expand All @@ -5208,7 +5213,6 @@ def wrapper(*args, **kwargs): # pylint: disable=too-many-statements
status = True
# pylint: disable=protected-access

args[0].set_current_running_nemesis(node=args[0].target_node)
log_info = {
'operation': args[0].current_disruption,
'start': int(start_time),
Expand Down

0 comments on commit a12ee91

Please sign in to comment.