From 1a3ce1bd2caa09355167b248409d108f5ded28e5 Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 11 Oct 2024 14:24:57 +0800 Subject: [PATCH 01/36] Dev: utils: Introduced `detect_duplicate_device_path` function in utils - A new function `detect_duplicate_device_path` has been added to `utils.py`. - This function checks for duplicated device paths in a given list of devices, raising a ValueError if duplicates are found. --- crmsh/utils.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/crmsh/utils.py b/crmsh/utils.py index 85601ee379..5b0b193404 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -28,6 +28,7 @@ import json import socket from pathlib import Path +from collections import defaultdict from contextlib import contextmanager, closing from stat import S_ISBLK from lxml import etree @@ -2499,6 +2500,19 @@ def is_block_device(dev): return rc +def detect_duplicate_device_path(device_list: typing.List[str]): + """ + Resolve device path and check if there are duplicated device path + """ + path_dict = defaultdict(list) + for dev in device_list: + resolved_path = Path(dev).resolve() + path_dict[resolved_path].append(dev) + for path, dev_list in path_dict.items(): + if len(dev_list) > 1: + raise ValueError(f"Duplicated device path detected: {','.join(dev_list)}. They are all pointing to {path}") + + def has_stonith_running(): """ Check if any stonith device registered From 4856ee7e2dc92139ff67d03d5d0f2d51c633adc1 Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 14 Jun 2024 14:40:31 +0800 Subject: [PATCH 02/36] Dev: ui_sbd: Add new 'crm sbd' sublevel (jsc#PED-8256) ** Motivation The main configurations for sbd use cases are scattered among sysconfig, on-disk meta data, CIB, and even could be related to other OS components eg. coredump, SCSI, multipath. It's desirable to reduce the management complexity among them and to streamline the workflow for the main use case scenarios. ** Changed include **** Disk-based SBD scenarios 1. Show usage when syntax error 2. Completion 3. Display SBD related configuration (UC4 in PED-8256) 4. Change the on-disk meta data of the existing sbd disks (UC2.1 in PED-8256) 5. Add a sbd disk with the existing sbd configuration (UC2.2 in PED-8256) 6. Remove a sbd disk (UC2.3 in PED-8256) 7. Remove sbd from cluster 8. Replace the storage for a sbd disk (UC2.4 in PED-8256)] 9. display status (focusing on the runtime information only) (UC5 in PED-8256) **** Disk-less SBD scenarios 1. Show usage when syntax error (diskless) 2. completion (diskless) 3. Display SBD related configuration (UC4 in PED-8256, diskless) 4. Manipulate the basic diskless sbd configuration (UC3.1 in PED-8256) --- crmsh/bootstrap.py | 30 +- crmsh/constants.py | 3 + crmsh/qdevice.py | 8 +- crmsh/sbd.py | 791 ++++++++++++++++++++++++--------------------- crmsh/ui_root.py | 5 + crmsh/ui_sbd.py | 481 +++++++++++++++++++++++++++ crmsh/utils.py | 25 +- crmsh/watchdog.py | 22 +- 8 files changed, 976 insertions(+), 389 deletions(-) create mode 100644 crmsh/ui_sbd.py diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 8d24e568e7..d8101688f7 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -211,12 +211,15 @@ def _validate_sbd_option(self): """ Validate sbd options """ + from .sbd import SBDUtils if self.sbd_devices and self.diskless_sbd: utils.fatal("Can't use -s and -S options together") + if self.sbd_devices: + SBDUtils.verify_sbd_device(self.sbd_devices) if self.stage == "sbd": if not self.sbd_devices and not self.diskless_sbd and self.yes_to_all: utils.fatal("Stage sbd should specify sbd device by -s or diskless sbd by -S option") - if ServiceManager().service_is_active("sbd.service") and not config.core.force: + if ServiceManager().service_is_active(constants.SBD_SERVICE) and not config.core.force: utils.fatal("Can't configure stage sbd: sbd.service already running! Please use crm option '-F' if need to redeploy") if self.cluster_is_running: utils.check_all_nodes_reachable() @@ -292,7 +295,7 @@ def validate_option(self): def init_sbd_manager(self): from .sbd import SBDManager - self.sbd_manager = SBDManager(self) + self.sbd_manager = SBDManager(bootstrap_context=self) def detect_platform(self): """ @@ -394,7 +397,7 @@ def prompt_for_string(msg, match=None, default='', valid_func=None, prev_value=[ def confirm(msg): - if _context.yes_to_all: + if config.core.force or (_context and _context.yes_to_all): return True disable_completion() rc = logger_utils.confirm(msg) @@ -404,12 +407,12 @@ def confirm(msg): def disable_completion(): - if _context.ui_context: + if _context and _context.ui_context: _context.ui_context.disable_completion() def enable_completion(): - if _context.ui_context: + if _context and _context.ui_context: _context.ui_context.setup_readline() @@ -767,7 +770,7 @@ def start_pacemaker(node_list=[], enable_flag=False): # not _context means not in init or join process if not _context and \ utils.package_is_installed("sbd") and \ - ServiceManager().service_is_enabled("sbd.service") and \ + ServiceManager().service_is_enabled(constants.SBD_SERVICE) and \ SBDTimeout.is_sbd_delay_start(): target_dir = "/run/systemd/system/sbd.service.d/" cmd1 = "mkdir -p {}".format(target_dir) @@ -1394,7 +1397,7 @@ def init_sbd(): import crmsh.sbd if _context.stage == "sbd": crmsh.sbd.clean_up_existing_sbd_resource() - _context.sbd_manager.sbd_init() + _context.sbd_manager.init_and_deploy_sbd() def init_cluster(): @@ -1419,7 +1422,9 @@ def init_cluster(): rsc_defaults rsc-options: resource-stickiness=1 migration-threshold=3 """) - _context.sbd_manager.configure_sbd_resource_and_properties() + if ServiceManager().service_is_enabled(constants.SBD_SERVICE): + _context.sbd_manager.configure_sbd() + def init_admin(): @@ -2669,7 +2674,7 @@ def adjust_stonith_timeout(): """ Adjust stonith-timeout for sbd and other scenarios """ - if ServiceManager().service_is_active("sbd.service"): + if ServiceManager().service_is_active(constants.SBD_SERVICE): from .sbd import SBDTimeout SBDTimeout.adjust_sbd_timeout_related_cluster_configuration() else: @@ -2733,7 +2738,12 @@ def sync_file(path): """ Sync files between cluster nodes """ - if _context.skip_csync2: + if _context: + skip_csync2 = _context.skip_csync2 + else: + skip_csync2 = not ServiceManager().service_is_active(CSYNC2_SERVICE) + + if skip_csync2: utils.cluster_copy_file(path, nodes=_context.node_list_in_cluster, output=False) else: csync2_update(path) diff --git a/crmsh/constants.py b/crmsh/constants.py index 4bdb6704fa..89686aa83d 100644 --- a/crmsh/constants.py +++ b/crmsh/constants.py @@ -450,4 +450,7 @@ HIDDEN_COMMANDS = {'ms'} NO_SSH_ERROR_MSG = "ssh-related operations are disabled. crmsh works in local mode." + +PCMK_SERVICE = "pacemaker.service" +SBD_SERVICE = "sbd.service" # vim:ts=4:sw=4:et: diff --git a/crmsh/qdevice.py b/crmsh/qdevice.py index fd1dac0028..1f63022214 100644 --- a/crmsh/qdevice.py +++ b/crmsh/qdevice.py @@ -612,15 +612,15 @@ def adjust_sbd_watchdog_timeout_with_qdevice(self): """ Adjust SBD_WATCHDOG_TIMEOUT when configuring qdevice and diskless SBD """ - from .sbd import SBDManager, SBDTimeout + from .sbd import SBDManager, SBDTimeout, SBDUtils utils.check_all_nodes_reachable() - self.using_diskless_sbd = SBDManager.is_using_diskless_sbd() + self.using_diskless_sbd = SBDUtils.is_using_diskless_sbd() # add qdevice after diskless sbd started if self.using_diskless_sbd: - res = SBDManager.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT") + res = SBDUtils.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT") if not res or int(res) < SBDTimeout.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE: sbd_watchdog_timeout_qdevice = SBDTimeout.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE - SBDManager.update_configuration({"SBD_WATCHDOG_TIMEOUT": str(sbd_watchdog_timeout_qdevice)}) + SBDManager.update_sbd_configuration({"SBD_WATCHDOG_TIMEOUT": str(sbd_watchdog_timeout_qdevice)}) utils.set_property("stonith-timeout", SBDTimeout.get_stonith_timeout()) @qnetd_lock_for_same_cluster_name diff --git a/crmsh/sbd.py b/crmsh/sbd.py index c005126c44..7e5bb7f626 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -1,5 +1,6 @@ import os import re +import typing from . import utils, sh from . import bootstrap from .bootstrap import SYSCONFIG_SBD, SBD_SYSTEMD_DELAY_START_DIR @@ -7,17 +8,131 @@ from . import constants from . import corosync from . import xmlutil +from . import watchdog from .service_manager import ServiceManager from .sh import ShellUtils logger = log.setup_logger(__name__) -logger_utils = log.LoggerUtils(logger) + + +class SBDUtils: + ''' + Consolidate sbd related utility methods + ''' + @staticmethod + def get_sbd_device_metadata(dev, timeout_only=False, remote=None) -> dict: + ''' + Extract metadata from sbd device header + ''' + sbd_info = {} + try: + out = sh.cluster_shell().get_stdout_or_raise_error(f"sbd -d {dev} dump", remote) + except Exception: + return sbd_info + + pattern = r"UUID\s+:\s+(\S+)|Timeout\s+\((\w+)\)\s+:\s+(\d+)" + matches = re.findall(pattern, out) + for uuid, timeout_type, timeout_value in matches: + if uuid and not timeout_only: + sbd_info["uuid"] = uuid + elif timeout_type and timeout_value: + sbd_info[timeout_type] = int(timeout_value) + return sbd_info + + @staticmethod + def get_device_uuid(dev, node=None): + ''' + Get UUID for specific device and node + ''' + res = SBDUtils.get_sbd_device_metadata(dev, remote=node).get("uuid") + if not res: + raise ValueError(f"Cannot find sbd device UUID for {dev}") + return res + + @staticmethod + def compare_device_uuid(dev, node_list): + ''' + Compare local sbd device UUID with other node's sbd device UUID + ''' + if not node_list: + return + local_uuid = SBDUtils.get_device_uuid(dev) + for node in node_list: + remote_uuid = SBDUtils.get_device_uuid(dev, node) + if local_uuid != remote_uuid: + raise ValueError(f"Device {dev} doesn't have the same UUID with {node}") + + @staticmethod + def verify_sbd_device(dev_list, compare_node_list=[]): + if len(dev_list) > SBDManager.SBD_DEVICE_MAX: + raise ValueError(f"Maximum number of SBD device is {SBDManager.SBD_DEVICE_MAX}") + for dev in dev_list: + if not utils.is_block_device(dev): + raise ValueError(f"{dev} doesn't look like a block device") + SBDUtils.compare_device_uuid(dev, compare_node_list) + utils.detect_duplicate_device_path(dev_list) + + @staticmethod + def get_sbd_value_from_config(key): + ''' + Get value from /etc/sysconfig/sbd + ''' + return utils.parse_sysconfig(SYSCONFIG_SBD).get(key) + + @staticmethod + def get_sbd_device_from_config(): + ''' + Get sbd device list from config + ''' + res = SBDUtils.get_sbd_value_from_config("SBD_DEVICE") + return res.split(';') if res else [] + + @staticmethod + def is_using_diskless_sbd(): + ''' + Check if using diskless SBD + ''' + dev_list = SBDUtils.get_sbd_device_from_config() + return not dev_list and ServiceManager().service_is_active(constants.SBD_SERVICE) + + @staticmethod + def has_sbd_device_already_initialized(dev) -> bool: + ''' + Check if sbd device already initialized + ''' + cmd = "sbd -d {} dump".format(dev) + rc, _, _ = ShellUtils().get_stdout_stderr(cmd) + return rc == 0 + + @staticmethod + def no_overwrite_device_check(dev) -> bool: + ''' + Check if device already initialized and ask if need to overwrite + ''' + initialized = SBDUtils.has_sbd_device_already_initialized(dev) + return initialized and \ + not bootstrap.confirm(f"{dev} has already been initialized by SBD, do you want to overwrite it?") + + @staticmethod + def check_devices_metadata_consistent(dev_list) -> bool: + ''' + Check if all devices have the same metadata + ''' + consistent = True + if len(dev_list) < 2: + return consistent + first_dev_metadata = SBDUtils.get_sbd_device_metadata(dev_list[0], timeout_only=True) + for dev in dev_list[1:]: + if SBDUtils.get_sbd_device_metadata(dev, timeout_only=True) != first_dev_metadata: + logger.warning(f"Device {dev} doesn't have the same metadata as {dev_list[0]}") + consistent = False + return consistent class SBDTimeout(object): - """ + ''' Consolidate sbd related timeout methods and constants - """ + ''' STONITH_WATCHDOG_TIMEOUT_DEFAULT = -1 SBD_WATCHDOG_TIMEOUT_DEFAULT = 5 SBD_WATCHDOG_TIMEOUT_DEFAULT_S390 = 15 @@ -25,15 +140,14 @@ class SBDTimeout(object): QDEVICE_SYNC_TIMEOUT_MARGIN = 5 def __init__(self, context=None): - """ + ''' Init function - """ + ''' self.context = context self.sbd_msgwait = None self.stonith_timeout = None self.sbd_watchdog_timeout = self.SBD_WATCHDOG_TIMEOUT_DEFAULT self.stonith_watchdog_timeout = self.STONITH_WATCHDOG_TIMEOUT_DEFAULT - self.sbd_delay_start = None self.two_node_without_qdevice = False def initialize_timeout(self): @@ -44,10 +158,10 @@ def initialize_timeout(self): self._set_sbd_msgwait() def _set_sbd_watchdog_timeout(self): - """ + ''' Set sbd_watchdog_timeout from profiles.yml if exists Then adjust it if in s390 environment - """ + ''' if "sbd.watchdog_timeout" in self.context.profiles_dict: self.sbd_watchdog_timeout = int(self.context.profiles_dict["sbd.watchdog_timeout"]) if self.context.is_s390 and self.sbd_watchdog_timeout < self.SBD_WATCHDOG_TIMEOUT_DEFAULT_S390: @@ -55,10 +169,10 @@ def _set_sbd_watchdog_timeout(self): self.sbd_watchdog_timeout = self.SBD_WATCHDOG_TIMEOUT_DEFAULT_S390 def _set_sbd_msgwait(self): - """ + ''' Set sbd msgwait from profiles.yml if exists Default is 2 * sbd_watchdog_timeout - """ + ''' sbd_msgwait_default = 2 * self.sbd_watchdog_timeout sbd_msgwait = sbd_msgwait_default if "sbd.msgwait" in self.context.profiles_dict: @@ -68,10 +182,25 @@ def _set_sbd_msgwait(self): sbd_msgwait = sbd_msgwait_default self.sbd_msgwait = sbd_msgwait + @classmethod + def get_advised_sbd_timeout(cls, diskless=False) -> typing.Tuple[int, int]: + ''' + Get suitable sbd_watchdog_timeout and sbd_msgwait + ''' + ctx = bootstrap.Context() + ctx.diskless_sbd = diskless + ctx.load_profiles() + time_inst = cls(ctx) + time_inst.initialize_timeout() + + sbd_watchdog_timeout = time_inst.sbd_watchdog_timeout + sbd_msgwait = None if diskless else time_inst.sbd_msgwait + return sbd_watchdog_timeout, sbd_msgwait + def _adjust_sbd_watchdog_timeout_with_diskless_and_qdevice(self): - """ + ''' When using diskless SBD with Qdevice, adjust value of sbd_watchdog_timeout - """ + ''' # add sbd after qdevice started if corosync.is_qdevice_configured() and ServiceManager().service_is_active("corosync-qdevice.service"): qdevice_sync_timeout = utils.get_qdevice_sync_timeout() @@ -87,44 +216,42 @@ def _adjust_sbd_watchdog_timeout_with_diskless_and_qdevice(self): @staticmethod def get_sbd_msgwait(dev): - """ + ''' Get msgwait for sbd device - """ - out = sh.cluster_shell().get_stdout_or_raise_error("sbd -d {} dump".format(dev)) - # Format like "Timeout (msgwait) : 30" - res = re.search(r"\(msgwait\)\s+:\s+(\d+)", out) + ''' + res = SBDUtils.get_sbd_device_metadata(dev).get("msgwait") if not res: - raise ValueError("Cannot get sbd msgwait for {}".format(dev)) - return int(res.group(1)) + raise ValueError(f"Cannot get sbd msgwait for {dev}") + return res @staticmethod def get_sbd_watchdog_timeout(): - """ + ''' Get SBD_WATCHDOG_TIMEOUT from /etc/sysconfig/sbd - """ - res = SBDManager.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT") + ''' + res = SBDUtils.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT") if not res: raise ValueError("Cannot get the value of SBD_WATCHDOG_TIMEOUT") return int(res) @staticmethod def get_stonith_watchdog_timeout(): - """ + ''' For non-bootstrap case, get stonith-watchdog-timeout value from cluster property - """ + ''' default = SBDTimeout.STONITH_WATCHDOG_TIMEOUT_DEFAULT - if not ServiceManager().service_is_active("pacemaker.service"): + if not ServiceManager().service_is_active(constants.PCMK_SERVICE): return default value = utils.get_property("stonith-watchdog-timeout") return int(value.strip('s')) if value else default def _load_configurations(self): - """ + ''' Load necessary configurations for both disk-based/disk-less sbd - """ + ''' self.two_node_without_qdevice = utils.is_2node_cluster_without_qdevice() - dev_list = SBDManager.get_sbd_device_from_config() + dev_list = SBDUtils.get_sbd_device_from_config() if dev_list: # disk-based self.disk_based = True self.msgwait = SBDTimeout.get_sbd_msgwait(dev_list[0]) @@ -134,19 +261,19 @@ def _load_configurations(self): self.sbd_watchdog_timeout = SBDTimeout.get_sbd_watchdog_timeout() self.stonith_watchdog_timeout = SBDTimeout.get_stonith_watchdog_timeout() self.sbd_delay_start_value_expected = self.get_sbd_delay_start_expected() if utils.detect_virt() else "no" - self.sbd_delay_start_value_from_config = SBDManager.get_sbd_value_from_config("SBD_DELAY_START") + self.sbd_delay_start_value_from_config = SBDUtils.get_sbd_value_from_config("SBD_DELAY_START") logger.debug("Inspect SBDTimeout: %s", vars(self)) def get_stonith_timeout_expected(self): - """ + ''' Get stonith-timeout value for sbd cases, formulas are: value_from_sbd = 1.2 * (pcmk_delay_max + msgwait) # for disk-based sbd value_from_sbd = 1.2 * max (stonith_watchdog_timeout, 2*SBD_WATCHDOG_TIMEOUT) # for disk-less sbd stonith_timeout = max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus - """ + ''' if self.disk_based: value_from_sbd = int(1.2*(self.pcmk_delay_max + self.msgwait)) else: @@ -163,12 +290,12 @@ def get_stonith_timeout(cls): return cls_inst.get_stonith_timeout_expected() def get_sbd_delay_start_expected(self): - """ + ''' Get the value for SBD_DELAY_START, formulas are: SBD_DELAY_START = (token + consensus + pcmk_delay_max + msgwait) # for disk-based sbd SBD_DELAY_START = (token + consensus + 2*SBD_WATCHDOG_TIMEOUT) # for disk-less sbd - """ + ''' token_and_consensus_timeout = corosync.token_and_consensus_timeout() if self.disk_based: value = token_and_consensus_timeout + self.pcmk_delay_max + self.msgwait @@ -178,34 +305,38 @@ def get_sbd_delay_start_expected(self): @staticmethod def get_sbd_delay_start_sec_from_sysconfig(): - """ + ''' Get suitable systemd start timeout for sbd.service - """ + ''' # TODO 5ms, 5us, 5s, 5m, 5h are also valid for sbd sysconfig - value = SBDManager.get_sbd_value_from_config("SBD_DELAY_START") + value = SBDUtils.get_sbd_value_from_config("SBD_DELAY_START") if utils.is_boolean_true(value): return 2*SBDTimeout.get_sbd_watchdog_timeout() return int(value) @staticmethod def is_sbd_delay_start(): - """ + ''' Check if SBD_DELAY_START is not no or not set - """ - res = SBDManager.get_sbd_value_from_config("SBD_DELAY_START") + ''' + res = SBDUtils.get_sbd_value_from_config("SBD_DELAY_START") return res and res != "no" + @staticmethod + def get_sbd_systemd_start_timeout() -> int: + cmd = "systemctl show -p TimeoutStartUSec sbd --value" + out = sh.cluster_shell().get_stdout_or_raise_error(cmd) + return utils.get_systemd_timeout_start_in_sec(out) + def adjust_systemd_start_timeout(self): - """ + ''' Adjust start timeout for sbd when set SBD_DELAY_START - """ - sbd_delay_start_value = SBDManager.get_sbd_value_from_config("SBD_DELAY_START") + ''' + sbd_delay_start_value = SBDUtils.get_sbd_value_from_config("SBD_DELAY_START") if sbd_delay_start_value == "no": return - cmd = "systemctl show -p TimeoutStartUSec sbd --value" - out = sh.cluster_shell().get_stdout_or_raise_error(cmd) - start_timeout = utils.get_systemd_timeout_start_in_sec(out) + start_timeout = SBDTimeout.get_sbd_systemd_start_timeout() if start_timeout > int(sbd_delay_start_value): return @@ -216,15 +347,15 @@ def adjust_systemd_start_timeout(self): utils.cluster_run_cmd("systemctl daemon-reload") def adjust_stonith_timeout(self): - """ + ''' Adjust stonith-timeout property - """ + ''' utils.set_property("stonith-timeout", self.get_stonith_timeout_expected(), conditional=True) def adjust_sbd_delay_start(self): - """ + ''' Adjust SBD_DELAY_START in /etc/sysconfig/sbd - """ + ''' expected_value = str(self.sbd_delay_start_value_expected) config_value = self.sbd_delay_start_value_from_config if expected_value == config_value: @@ -232,29 +363,23 @@ def adjust_sbd_delay_start(self): if expected_value == "no" \ or (not re.search(r'\d+', config_value)) \ or (int(expected_value) > int(config_value)): - SBDManager.update_configuration({"SBD_DELAY_START": expected_value}) + SBDManager.update_sbd_configuration({"SBD_DELAY_START": expected_value}) @classmethod def adjust_sbd_timeout_related_cluster_configuration(cls): - """ + ''' Adjust sbd timeout related configurations - """ + ''' cls_inst = cls() cls_inst._load_configurations() - - message = "Adjusting sbd related timeout values" - with logger_utils.status_long(message): - cls_inst.adjust_sbd_delay_start() - cls_inst.adjust_stonith_timeout() - cls_inst.adjust_systemd_start_timeout() + cls_inst.adjust_sbd_delay_start() + cls_inst.adjust_stonith_timeout() + cls_inst.adjust_systemd_start_timeout() -class SBDManager(object): - """ - Class to manage sbd configuration and services - """ +class SBDManager: SYSCONFIG_SBD_TEMPLATE = "/usr/share/fillup-templates/sysconfig.sbd" - SBD_STATUS_DESCRIPTION = """Configure SBD: + SBD_STATUS_DESCRIPTION = '''Configure SBD: If you have shared storage, for example a SAN or iSCSI target, you can use it avoid split-brain scenarios by configuring SBD. This requires a 1 MB partition, accessible to all nodes in the @@ -262,91 +387,172 @@ class SBDManager(object): across all nodes in the cluster, so /dev/disk/by-id/* devices are a good choice. Note that all data on the partition you specify here will be destroyed. -""" - SBD_WARNING = "Not configuring SBD - STONITH will be disabled." +''' + NO_SBD_WARNING = "Not configuring SBD - STONITH will be disabled." + DISKLESS_SBD_MIN_EXPECTED_VOTE = 3 DISKLESS_SBD_WARNING = "Diskless SBD requires cluster with three or more nodes. If you want to use diskless SBD for 2-node cluster, should be combined with QDevice." - PARSE_RE = "[; ]" - DISKLESS_CRM_CMD = "crm configure property stonith-enabled=true stonith-watchdog-timeout={} stonith-timeout={}" SBD_RA = "stonith:fence_sbd" SBD_RA_ID = "stonith-sbd" + SBD_DEVICE_MAX = 3 + + def __init__( + self, + device_list_to_init: typing.List[str] | None = None, + timeout_dict: typing.Dict[str, int] | None = None, + update_dict: typing.Dict[str, str] | None = None, + no_overwrite_dev_map: typing.Dict[str, bool] | None = None, + new_config: bool = False, + diskless_sbd: bool = False, + bootstrap_context: bootstrap.Context | None = None + ): + ''' + Init function which can be called from crm sbd subcommand or bootstrap + ''' + self.package_installed = utils.package_is_installed("sbd") + if not self.package_installed: + return - def __init__(self, context): - """ - Init function + self.device_list_to_init = device_list_to_init or [] + self.timeout_dict = timeout_dict or {} + self.update_dict = update_dict or {} + self.diskless_sbd = diskless_sbd + self.cluster_is_running = ServiceManager().service_is_active(constants.PCMK_SERVICE) + self.bootstrap_context = bootstrap_context + self.no_overwrite_dev_map = no_overwrite_dev_map or {} + self.new_config = new_config + + # From bootstrap init or join process, override the values + if self.bootstrap_context: + self.device_list_to_init = self.bootstrap_context.sbd_devices + self.diskless_sbd = self.bootstrap_context.diskless_sbd + self.cluster_is_running = self.bootstrap_context.cluster_is_running + + def _load_attributes_from_bootstrap(self): + if not self.bootstrap_context: + return + timeout_inst = SBDTimeout(self.bootstrap_context) + timeout_inst.initialize_timeout() + self.timeout_dict["watchdog"] = timeout_inst.sbd_watchdog_timeout + if not self.diskless_sbd: + self.timeout_dict["msgwait"] = timeout_inst.sbd_msgwait + self.update_dict["SBD_WATCHDOG_TIMEOUT"] = str(timeout_inst.sbd_watchdog_timeout) + self.update_dict["SBD_WATCHDOG_DEV"] = watchdog.Watchdog.get_watchdog_device(self.bootstrap_context.watchdog) + + @staticmethod + def convert_timeout_dict_to_opt_str(timeout_dict: typing.Dict[str, int]) -> str: + timeout_option_map = { + "watchdog": "-1", + "allocate": "-2", + "loop": "-3", + "msgwait": "-4" + } + return ' '.join([f"{timeout_option_map[k]} {v}" for k, v in timeout_dict.items() + if k in timeout_option_map]) + + def update_configuration(self) -> None: + ''' + Update and sync sbd configuration + ''' + if not self.update_dict: + return + if (self.bootstrap_context and self.bootstrap_context.type == "init") or self.new_config: + utils.copy_local_file(self.SYSCONFIG_SBD_TEMPLATE, SYSCONFIG_SBD) + + for key, value in self.update_dict.items(): + logger.info("Update %s in %s: %s", key, SYSCONFIG_SBD, value) + utils.sysconfig_set(SYSCONFIG_SBD, **self.update_dict) + bootstrap.sync_file(SYSCONFIG_SBD) + logger.info("Already synced %s to all nodes", SYSCONFIG_SBD) + + @classmethod + def update_sbd_configuration(cls, update_dict: typing.Dict[str, str]) -> None: + inst = cls(update_dict=update_dict) + inst.update_configuration() + + def initialize_sbd(self): + if self.diskless_sbd: + logger.info("Configuring diskless SBD") + self._warn_diskless_sbd() + return + elif not all(self.no_overwrite_dev_map.values()): + logger.info("Configuring disk-based SBD") + + opt_str = SBDManager.convert_timeout_dict_to_opt_str(self.timeout_dict) + shell = sh.cluster_shell() + for dev in self.device_list_to_init: + # skip if device already initialized and not overwrite + if dev in self.no_overwrite_dev_map and self.no_overwrite_dev_map[dev]: + continue + logger.info("Initializing SBD device %s", dev) + cmd = f"sbd {opt_str} -d {dev} create" + logger.debug("Running command: %s", cmd) + shell.get_stdout_or_raise_error(cmd) - sbd_devices is provided by '-s' option on init process - diskless_sbd is provided by '-S' option on init process - """ - self.sbd_devices_input = context.sbd_devices - self.diskless_sbd = context.diskless_sbd - self._sbd_devices = None - self._watchdog_inst = None - self._context = context - self._delay_start = False - self.timeout_inst = None - self.no_overwrite_map = {} - self.no_update_config = False + SBDUtils.check_devices_metadata_consistent(self.device_list_to_init) @staticmethod - def _get_device_uuid(dev, node=None): - """ - Get UUID for specific device and node - """ - out = sh.cluster_shell().get_stdout_or_raise_error("sbd -d {} dump".format(dev), node) - res = re.search(r"UUID\s*:\s*(.*)\n", out) - if not res: - raise ValueError("Cannot find sbd device UUID for {}".format(dev)) - return res.group(1) + def enable_sbd_service(): + cluster_nodes = utils.list_cluster_nodes() or [utils.this_node()] + service_manager = ServiceManager() - def _compare_device_uuid(self, dev, node_list): - """ - Compare local sbd device UUID with other node's sbd device UUID - """ - if not node_list: + for node in cluster_nodes: + if not service_manager.service_is_enabled(constants.SBD_SERVICE, node): + logger.info("Enable %s on node %s", constants.SBD_SERVICE, node) + service_manager.enable_service(constants.SBD_SERVICE, node) + + @staticmethod + def restart_cluster_if_possible(): + if not ServiceManager().service_is_active(constants.PCMK_SERVICE): return - local_uuid = self._get_device_uuid(dev) - for node in node_list: - remote_uuid = self._get_device_uuid(dev, node) - if local_uuid != remote_uuid: - raise ValueError("Device {} doesn't have the same UUID with {}".format(dev, node)) - - def _verify_sbd_device(self, dev_list, compare_node_list=[]): - """ - Verify sbd device - """ - if len(dev_list) > 3: - raise ValueError("Maximum number of SBD device is 3") - for dev in dev_list: - if not utils.is_block_device(dev): - raise ValueError("{} doesn't look like a block device".format(dev)) - self._compare_device_uuid(dev, compare_node_list) + if xmlutil.CrmMonXmlParser().is_any_resource_running(): + logger.warning("Resource is running, need to restart cluster service manually on each node") + else: + bootstrap.restart_cluster() - def _no_overwrite_check(self, dev): - """ - Check if device already initialized and if need to overwrite - """ - return SBDManager.has_sbd_device_already_initialized(dev) and not bootstrap.confirm("SBD is already configured to use {} - overwrite?".format(dev)) + def configure_sbd(self): + ''' + Configure fence_sbd resource and related properties + ''' + if self.diskless_sbd: + utils.set_property("stonith-watchdog-timeout", SBDTimeout.STONITH_WATCHDOG_TIMEOUT_DEFAULT) + elif not xmlutil.CrmMonXmlParser().is_resource_configured(self.SBD_RA): + all_device_list = SBDUtils.get_sbd_device_from_config() + devices_param_str = f"params devices=\"{','.join(all_device_list)}\"" + cmd = f"crm configure primitive {self.SBD_RA_ID} {self.SBD_RA} {devices_param_str}" + sh.cluster_shell().get_stdout_or_raise_error(cmd) + utils.set_property("stonith-enabled", "true") - def _get_sbd_device_interactive(self): - """ + def _warn_diskless_sbd(self, peer=None): + ''' + Give warning when configuring diskless sbd + ''' + # When in sbd stage or join process + if (self.diskless_sbd and self.cluster_is_running) or peer: + vote_dict = utils.get_quorum_votes_dict(peer) + expected_vote = int(vote_dict.get('Expected', 0)) + if expected_vote < self.DISKLESS_SBD_MIN_EXPECTED_VOTE: + logger.warning(self.DISKLESS_SBD_WARNING) + # When in init process + elif self.diskless_sbd: + logger.warning(self.DISKLESS_SBD_WARNING) + + def get_sbd_device_interactive(self): + ''' Get sbd device on interactive mode - """ - if self._context.yes_to_all: - logger.warning(self.SBD_WARNING) + ''' + if self.bootstrap_context.yes_to_all: + logger.warning(self.NO_SBD_WARNING) return - logger.info(self.SBD_STATUS_DESCRIPTION) - if not bootstrap.confirm("Do you wish to use SBD?"): - logger.warning(self.SBD_WARNING) + logger.warning(self.NO_SBD_WARNING) return - configured_dev_list = self._get_sbd_device_from_config() - for dev in configured_dev_list: - self.no_overwrite_map[dev] = self._no_overwrite_check(dev) - if self.no_overwrite_map and all(self.no_overwrite_map.values()): - self.no_update_config = True - return configured_dev_list + configured_devices = SBDUtils.get_sbd_device_from_config() + for dev in configured_devices: + self.no_overwrite_dev_map[dev] = SBDUtils.no_overwrite_device_check(dev) + if self.no_overwrite_dev_map and all(self.no_overwrite_dev_map.values()): + return configured_devices dev_list = [] dev_looks_sane = False @@ -356,21 +562,20 @@ def _get_sbd_device_interactive(self): self.diskless_sbd = True return - dev_list = utils.re_split_string(self.PARSE_RE, dev) + dev_list = utils.re_split_string("[; ]", dev) try: - self._verify_sbd_device(dev_list) - except ValueError as err_msg: - logger.error(str(err_msg)) + SBDUtils.verify_sbd_device(dev_list) + except ValueError as e: + logger.error(e) continue - for dev in dev_list: - if dev not in self.no_overwrite_map: - self.no_overwrite_map[dev] = self._no_overwrite_check(dev) - if self.no_overwrite_map[dev]: + if dev not in self.no_overwrite_dev_map: + self.no_overwrite_dev_map[dev] = SBDUtils.no_overwrite_device_check(dev) + if self.no_overwrite_dev_map[dev]: if dev == dev_list[-1]: return dev_list continue - logger.warning("All data on {} will be destroyed!".format(dev)) + logger.warning("All data on %s will be destroyed", dev) if bootstrap.confirm('Are you sure you wish to use this device?'): dev_looks_sane = True else: @@ -379,248 +584,73 @@ def _get_sbd_device_interactive(self): return dev_list - def _get_sbd_device(self): - """ - Get sbd device from options or interactive mode - """ - dev_list = [] - if self.sbd_devices_input: - dev_list = self.sbd_devices_input - self._verify_sbd_device(dev_list) - for dev in dev_list: - self.no_overwrite_map[dev] = self._no_overwrite_check(dev) - if all(self.no_overwrite_map.values()) and dev_list == self._get_sbd_device_from_config(): - self.no_update_config = True + def get_sbd_device_from_bootstrap(self): + ''' + Handle sbd device input from 'crm cluster init' with -s or -S option + -s is for disk-based sbd + -S is for diskless sbd + ''' + # specified sbd device with -s option + if self.device_list_to_init: + self.update_dict["SBD_DEVICE"] = ';'.join(self.device_list_to_init) + # no -s and no -S option elif not self.diskless_sbd: - dev_list = self._get_sbd_device_interactive() - self._sbd_devices = dev_list - - def _initialize_sbd(self): - """ - Initialize SBD parameters according to profiles.yml, or the crmsh defined defaulst as the last resort. - This covers both disk-based-sbd, and diskless-sbd scenarios. - For diskless-sbd, set sbd_watchdog_timeout then return; - For disk-based-sbd, also calculate the msgwait value, then initialize the SBD device. - """ - msg = "" - if self.diskless_sbd: - msg = "Configuring diskless SBD" - elif not all(self.no_overwrite_map.values()): - msg = "Initializing SBD" - if msg: - logger.info(msg) - self.timeout_inst = SBDTimeout(self._context) - self.timeout_inst.initialize_timeout() - if self.diskless_sbd: - return - - opt = "-4 {} -1 {}".format(self.timeout_inst.sbd_msgwait, self.timeout_inst.sbd_watchdog_timeout) - - for dev in self._sbd_devices: - if dev in self.no_overwrite_map and self.no_overwrite_map[dev]: - continue - rc, _, err = bootstrap.invoke("sbd {} -d {} create".format(opt, dev)) - if not rc: - utils.fatal("Failed to initialize SBD device {}: {}".format(dev, err)) - - def _update_sbd_configuration(self): - """ - Update /etc/sysconfig/sbd - """ - if self.no_update_config: - bootstrap.sync_file(SYSCONFIG_SBD) + self.device_list_to_init = self.get_sbd_device_interactive() + + def init_and_deploy_sbd(self): + ''' + The process of deploying sbd includes: + 1. Initialize sbd device + 2. Write config file /etc/sysconfig/sbd + 3. Enable sbd.service + 4. Restart cluster service if possible + 5. Configure stonith-sbd resource and related properties + ''' + if not self.package_installed: return - utils.copy_local_file(self.SYSCONFIG_SBD_TEMPLATE, SYSCONFIG_SBD) - sbd_config_dict = { - "SBD_WATCHDOG_DEV": self._watchdog_inst.watchdog_device_name, - "SBD_WATCHDOG_TIMEOUT": str(self.timeout_inst.sbd_watchdog_timeout) - } - if self._sbd_devices: - sbd_config_dict["SBD_DEVICE"] = ';'.join(self._sbd_devices) - utils.sysconfig_set(SYSCONFIG_SBD, **sbd_config_dict) - bootstrap.sync_file(SYSCONFIG_SBD) - - def _get_sbd_device_from_config(self): - """ - Gets currently configured SBD device, i.e. what's in /etc/sysconfig/sbd - """ - res = SBDManager.get_sbd_value_from_config("SBD_DEVICE") - if res: - return utils.re_split_string(self.PARSE_RE, res) - else: - return [] - - def _restart_cluster_and_configure_sbd_ra(self): - """ - Try to configure sbd resource, restart cluster on needed - """ - if not xmlutil.CrmMonXmlParser().is_any_resource_running(): - bootstrap.restart_cluster() - self.configure_sbd_resource_and_properties() - else: - logger.warning("To start sbd.service, need to restart cluster service manually on each node") - if self.diskless_sbd: - cmd = self.DISKLESS_CRM_CMD.format(self.timeout_inst.stonith_watchdog_timeout, SBDTimeout.get_stonith_timeout()) - logger.warning("Then run \"{}\" on any node".format(cmd)) - else: - self.configure_sbd_resource_and_properties() - - def _enable_sbd_service(self): - """ - Try to enable sbd service - """ - if self._context.cluster_is_running: - # in sbd stage, enable sbd.service on cluster wide - utils.cluster_run_cmd("systemctl enable sbd.service") - self._restart_cluster_and_configure_sbd_ra() - else: - # in init process - bootstrap.invoke("systemctl enable sbd.service") - - def _warn_diskless_sbd(self, peer=None): - """ - Give warning when configuring diskless sbd - """ - # When in sbd stage or join process - if (self.diskless_sbd and self._context.cluster_is_running) or peer: - vote_dict = utils.get_quorum_votes_dict(peer) - expected_vote = int(vote_dict['Expected']) - if (expected_vote < 2 and peer) or (expected_vote < 3 and not peer): - logger.warning(self.DISKLESS_SBD_WARNING) - # When in init process - elif self.diskless_sbd: - logger.warning(self.DISKLESS_SBD_WARNING) - - def sbd_init(self): - """ - Function sbd_init includes these steps: - 1. Get sbd device from options or interactive mode - 2. Initialize sbd device - 3. Write config file /etc/sysconfig/sbd - """ - from .watchdog import Watchdog - - if not utils.package_is_installed("sbd"): - return - self._watchdog_inst = Watchdog(_input=self._context.watchdog) - self._watchdog_inst.init_watchdog() - self._get_sbd_device() - if not self._sbd_devices and not self.diskless_sbd: - bootstrap.invoke("systemctl disable sbd.service") - return - self._warn_diskless_sbd() - self._initialize_sbd() - self._update_sbd_configuration() - self._enable_sbd_service() - - def configure_sbd_resource_and_properties(self): - """ - Configure stonith-sbd resource and related properties - """ - if not utils.package_is_installed("sbd") or \ - not ServiceManager().service_is_enabled("sbd.service") or \ - xmlutil.CrmMonXmlParser().is_resource_configured(self.SBD_RA): - return - shell = sh.cluster_shell() + if self.bootstrap_context: + self.get_sbd_device_from_bootstrap() + if not self.device_list_to_init and not self.diskless_sbd: + ServiceManager().disable_service(constants.SBD_SERVICE) + return + self._load_attributes_from_bootstrap() - # disk-based sbd - if self._get_sbd_device_from_config(): - devices_param_str = f"params devices=\"{','.join(self._sbd_devices)}\"" - cmd = f"crm configure primitive {self.SBD_RA_ID} {self.SBD_RA} {devices_param_str}" - shell.get_stdout_or_raise_error(cmd) - utils.set_property("stonith-enabled", "true") - # disk-less sbd - else: - if self.timeout_inst is None: - self.timeout_inst = SBDTimeout(self._context) - self.timeout_inst.initialize_timeout() - cmd = self.DISKLESS_CRM_CMD.format(self.timeout_inst.stonith_watchdog_timeout, constants.STONITH_TIMEOUT_DEFAULT) - shell.get_stdout_or_raise_error(cmd) + self.initialize_sbd() + self.update_configuration() + SBDManager.enable_sbd_service() - # in sbd stage - if self._context.cluster_is_running: + if self.cluster_is_running: + SBDManager.restart_cluster_if_possible() + self.configure_sbd() bootstrap.adjust_properties() def join_sbd(self, remote_user, peer_host): - """ + ''' Function join_sbd running on join process only On joining process, check whether peer node has enabled sbd.service If so, check prerequisites of SBD and verify sbd device on join node - """ - from .watchdog import Watchdog - - if not utils.package_is_installed("sbd"): + ''' + if not self.package_installed: return - if not os.path.exists(SYSCONFIG_SBD) or not ServiceManager().service_is_enabled("sbd.service", peer_host): - bootstrap.invoke("systemctl disable sbd.service") + + service_manager = ServiceManager() + if not os.path.exists(SYSCONFIG_SBD) or not service_manager.service_is_enabled(constants.SBD_SERVICE, peer_host): + service_manager.disable_service(constants.SBD_SERVICE) return + + from .watchdog import Watchdog self._watchdog_inst = Watchdog(remote_user=remote_user, peer_host=peer_host) self._watchdog_inst.join_watchdog() - dev_list = self._get_sbd_device_from_config() + + dev_list = SBDUtils.get_sbd_device_from_config() if dev_list: - self._verify_sbd_device(dev_list, [peer_host]) + SBDUtils.verify_sbd_device(dev_list, [peer_host]) else: self._warn_diskless_sbd(peer_host) - logger.info("Got {}SBD configuration".format("" if dev_list else "diskless ")) - bootstrap.invoke("systemctl enable sbd.service") - - @classmethod - def verify_sbd_device(cls): - """ - This classmethod is for verifying sbd device on a running cluster - Raise ValueError for exceptions - """ - inst = cls(bootstrap.Context()) - dev_list = inst._get_sbd_device_from_config() - if not dev_list: - raise ValueError("No sbd device configured") - inst._verify_sbd_device(dev_list, utils.list_cluster_nodes_except_me()) - - @classmethod - def get_sbd_device_from_config(cls): - """ - Get sbd device list from config - """ - inst = cls(bootstrap.Context()) - return inst._get_sbd_device_from_config() - @classmethod - def is_using_diskless_sbd(cls): - """ - Check if using diskless SBD - """ - inst = cls(bootstrap.Context()) - dev_list = inst._get_sbd_device_from_config() - if not dev_list and ServiceManager().service_is_active("sbd.service"): - return True - return False - - @staticmethod - def update_configuration(sbd_config_dict): - """ - Update and sync sbd configuration - """ - utils.sysconfig_set(SYSCONFIG_SBD, **sbd_config_dict) - bootstrap.sync_file(SYSCONFIG_SBD) - - @staticmethod - def get_sbd_value_from_config(key): - """ - Get value from /etc/sysconfig/sbd - """ - conf = utils.parse_sysconfig(SYSCONFIG_SBD) - res = conf.get(key) - return res - - @staticmethod - def has_sbd_device_already_initialized(dev): - """ - Check if sbd device already initialized - """ - cmd = "sbd -d {} dump".format(dev) - rc, _, _ = ShellUtils().get_stdout_stderr(cmd) - return rc == 0 + logger.info("Got {}SBD configuration".format("" if dev_list else "diskless ")) + service_manager.enable_service(constants.SBD_SERVICE) def clean_up_existing_sbd_resource(): @@ -628,5 +658,40 @@ def clean_up_existing_sbd_resource(): sbd_id_list = xmlutil.CrmMonXmlParser().get_resource_id_list_via_type(SBDManager.SBD_RA) if xmlutil.CrmMonXmlParser().is_resource_started(SBDManager.SBD_RA): for sbd_id in sbd_id_list: + logger.info("Stop sbd resource '%s'(%s)", sbd_id, SBDManager.SBD_RA) utils.ext_cmd("crm resource stop {}".format(sbd_id)) + logger.info("Remove sbd resource '%s'", ';' .join(sbd_id_list)) utils.ext_cmd("crm configure delete {}".format(' '.join(sbd_id_list))) + + +def enable_sbd_on_cluster(): + cluster_nodes = utils.list_cluster_nodes() + service_manager = ServiceManager() + for node in cluster_nodes: + if not service_manager.service_is_enabled(constants.SBD_SERVICE, node): + logger.info("Enable %s on node %s", constants.SBD_SERVICE, node) + service_manager.enable_service(constants.SBD_SERVICE, node) + + +def disable_sbd_from_cluster(): + ''' + Disable SBD from cluster, the process includes: + - stop and remove sbd agent + - disable sbd.service + - adjust cluster attributes + - adjust related timeout values + ''' + clean_up_existing_sbd_resource() + + cluster_nodes = utils.list_cluster_nodes() + service_manager = ServiceManager() + for node in cluster_nodes: + if service_manager.service_is_enabled(constants.SBD_SERVICE, node): + logger.info("Disable %s on node %s", constants.SBD_SERVICE, node) + service_manager.disable_service(constants.SBD_SERVICE, node) + + out = sh.cluster_shell().get_stdout_or_raise_error("stonith_admin -L") + res = re.search("([0-9]+) fence device[s]* found", out) + # after disable sbd.service, check if sbd is the last stonith device + if res and int(res.group(1)) <= 1: + utils.cleanup_stonith_related_properties() diff --git a/crmsh/ui_root.py b/crmsh/ui_root.py index 61a8be882d..7e35586b89 100644 --- a/crmsh/ui_root.py +++ b/crmsh/ui_root.py @@ -33,6 +33,7 @@ from . import ui_resource from . import ui_script from . import ui_site +from . import ui_sbd class Root(command.UI): @@ -150,6 +151,10 @@ def do_report(self, context, *args): def do_resource(self): pass + @command.level(ui_sbd.SBD) + def do_sbd(self): + pass + @command.level(ui_script.Script) @command.help('''Cluster scripts Cluster scripts can perform cluster-wide configuration, diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py new file mode 100644 index 0000000000..2a27690eb6 --- /dev/null +++ b/crmsh/ui_sbd.py @@ -0,0 +1,481 @@ +import logging +import typing +import re +import os + +from crmsh import sbd +from crmsh import watchdog +from crmsh import command +from crmsh import utils +from crmsh import bootstrap +from crmsh import completers +from crmsh import sh +from crmsh import xmlutil +from crmsh import constants +from crmsh.service_manager import ServiceManager +from crmsh.bootstrap import SYSCONFIG_SBD + + +logger = logging.getLogger(__name__) + + +def sbd_devices_completer(completed_list: typing.List[str]) -> typing.List[str]: + ''' + completion for sbd devices + ''' + if not ServiceManager().service_is_active(constants.SBD_SERVICE): + return [] + dev_list = sbd.SBDUtils.get_sbd_device_from_config() + if dev_list: + return [dev for dev in dev_list if dev not in completed_list] + return [] + + +def sbd_configure_completer(completed_list: typing.List[str]) -> typing.List[str]: + ''' + completion for sbd configure command + ''' + if not ServiceManager().service_is_active(constants.PCMK_SERVICE): + return [] + sbd_service_is_enabled = service_manager.service_is_enabled(constants.SBD_SERVICE) + dev_list = sbd.SBDUtils.get_sbd_device_from_config() + # Show disk-based sbd configure options + # if there are devices in config or sbd.service is not enabled + is_diskbased = bool(dev_list) or not sbd_service_is_enabled + + parameters_pool = [] + if completed_list[1] == '': + parameters_pool = ["show"] + elif completed_list[1] == "show": + if len(completed_list) == 3: + show_types = SBD.SHOW_TYPES if is_diskbased else SBD.DISKLESS_SHOW_TYPES + return [t for t in show_types if t not in completed_list] + else: + return [] + if completed_list[-1] == "device=": + return [] + + timeout_types = SBD.TIMEOUT_TYPES if is_diskbased else SBD.DISKLESS_TIMEOUT_TYPES + parameters_pool.extend([f"{t}-timeout=" for t in timeout_types]) + parameters_pool.append("watchdog-device=") + parameters_pool = [ + p + for p in parameters_pool + if not any(c.startswith(p) for c in completed_list) + ] + + if is_diskbased: + dev_count = sum(1 for c in completed_list if c.startswith("device=")) + if dev_count < sbd.SBDManager.SBD_DEVICE_MAX: + parameters_pool.append("device=") + + return parameters_pool + + +class SBD(command.UI): + ''' + Class for sbd sub-level + + Includes commands: + - sbd configure + - sbd remove + - sbd status + ''' + name = "sbd" + TIMEOUT_TYPES = ("watchdog", "allocate", "loop", "msgwait") + DISKLESS_TIMEOUT_TYPES = ("watchdog",) + SHOW_TYPES = ("disk_metadata", "sysconfig", "property") + DISKLESS_SHOW_TYPES = ("sysconfig", "property") + RESTART_INFO = "Requires to restart cluster service to take effect" + PCMK_ATTRS = ( + "have-watchdog", + "stonith-timeout", + "stonith-watchdog-timeout", + "stonith-enabled", + "priority-fencing-delay", + "pcmk_delay_max" + ) + PARSE_RE = re.compile( + # Match "device" key with any value, including empty + r'(device)=("[^"]*"|[\w/\d;]*)' + # Match other keys with non-empty values, capturing possible suffix + r'|(\w+)(?:-(\w+))?=("[^"]+"|[\w/\d;]+)' + # Match standalone device path + r'|(/dev/[\w\d]+)' + ) + + class SyntaxError(Exception): + pass + + def __init__(self): + self.device_list_from_config: list[str] = None + self.device_meta_dict_runtime: dict[str, int] = None + self.watchdog_timeout_from_config: int = None + self.watchdog_device_from_config: str = None + self.service_manager: ServiceManager = None + self.cluster_shell: sh.cluster_shell = None + self.cluster_nodes: list[str] = None + self.crm_mon_xml_parser: xmlutil.CrmMonXmlParser = None + + command.UI.__init__(self) + + def _load_attributes(self): + self.device_list_from_config = sbd.SBDUtils.get_sbd_device_from_config() + self.device_meta_dict_runtime = {} + if self.device_list_from_config: + self.device_meta_dict_runtime = sbd.SBDUtils.get_sbd_device_metadata(self.device_list_from_config[0], timeout_only=True) + try: + self.watchdog_timeout_from_config = sbd.SBDTimeout.get_sbd_watchdog_timeout() + except: + self.watchdog_timeout_from_config = None + self.watchdog_device_from_config = watchdog.Watchdog.get_watchdog_device_from_sbd_config() + + self.service_manager = ServiceManager() + self.cluster_shell = sh.cluster_shell() + self.cluster_nodes = utils.list_cluster_nodes() or [utils.this_node()] + self.crm_mon_xml_parser = xmlutil.CrmMonXmlParser() + + def requires(self) -> bool: + ''' + Requirements check when entering sbd sub-level + ''' + if not utils.package_is_installed("sbd"): + logger.error("sbd is not installed") + return False + return True + + @property + def configure_usage(self) -> str: + ''' + Build usage string for sbd configure command, + including disk-based and diskless sbd cases + ''' + def build_timeout_usage_str(timeout_types: tuple[str]) -> str: + return " ".join([f"[{t}-timeout=]" for t in timeout_types]) + timeout_usage_str = build_timeout_usage_str(self.TIMEOUT_TYPES) + timeout_usage_str_diskless = build_timeout_usage_str(self.DISKLESS_TIMEOUT_TYPES) + show_usage_str = f"[{'|'.join(self.SHOW_TYPES)}]" + show_usage_str_diskless = f"[{'|'.join(self.DISKLESS_SHOW_TYPES)}]" + return ("Usage for disk-based SBD:\n" + f"crm sbd configure show {show_usage_str}\n" + f"crm sbd configure [device=]... {timeout_usage_str} [watchdog-device=]\n\n" + "Usage for diskless SBD:\n" + f"crm sbd configure show {show_usage_str_diskless}\n" + f"crm sbd configure device=\"\" {timeout_usage_str_diskless} [watchdog-device=]\n") + + @staticmethod + def _show_sysconfig() -> None: + ''' + Show pure content of /etc/sysconfig/sbd + ''' + with open(SYSCONFIG_SBD) as f: + content_list = [line.strip() for line in f.readlines() + if not line.startswith("#") + and line.strip()] + for line in content_list: + print(line) + + def _show_disk_metadata(self) -> None: + ''' + Show sbd disk metadata for each configured device + ''' + for dev in self.device_list_from_config: + print(self.cluster_shell.get_stdout_or_raise_error(f"sbd -d {dev} dump")) + print() + + def _show_property(self) -> None: + ''' + Show sbd-related properties from cluster and systemd + ''' + if self.service_manager.service_is_active(constants.PCMK_SERVICE): + cmd = "crm configure show" + else: # static case + cib_path = os.getenv("CIB_file", constants.CIB_RAW_FILE) + if not os.path.exists(cib_path): + return + cmd = f"CIB_file={cib_path} crm configure show" + out = self.cluster_shell.get_stdout_or_raise_error(cmd) + + regex = f"({'|'.join(self.PCMK_ATTRS)})=([^\s]+)" + matches = re.findall(regex, out) + for match in matches: + print(f"{match[0]}={match[1]}") + + systemd_start_timeout = sbd.SBDTimeout.get_sbd_systemd_start_timeout() + print(f"TimeoutStartUSec={systemd_start_timeout}") + + def _configure_show(self, args) -> None: + if len(args) > 2: + raise self.SyntaxError("Invalid argument") + elif len(args) == 2: + match args[1]: + case "disk_metadata": + self._show_disk_metadata() + case "sysconfig": + SBD._show_sysconfig() + case "property": + self._show_property() + case _: + raise self.SyntaxError(f"Unknown argument: {args[1]}") + else: + self._show_disk_metadata() + if self.device_list_from_config: + print() + SBD._show_sysconfig() + print() + self._show_property() + + def _parse_args(self, args: typing.List[str]) -> dict[str, int|str|list[str]]: + ''' + Parse arguments and verify them + + Possible arguments format like: + device="/dev/sdb5;/dev/sda6" + device="" watchdog-timeout=10 + /dev/sda5 watchdog-timeout=10 watchdog-device=/dev/watchdog + device=/dev/sdb5 device=/dev/sda6 watchdog-timeout=10 msgwait-timeout=20 + ''' + parameter_dict = {"device-list": []} + + for arg in args: + match = self.PARSE_RE.match(arg) + if not match: + raise self.SyntaxError(f"Invalid argument: {arg}") + device_key, device_value, key, suffix, value, device_path = match.groups() + + # device= parameter + if device_key: + if device_value: + parameter_dict.setdefault("device-list", []).extend(device_value.split(";")) + # explicitly set empty value, stands for diskless sbd + elif not parameter_dict.get("device-list"): + parameter_dict.pop("device-list", None) + # standalone device parameter + elif device_path: + parameter_dict.setdefault("device-list", []).append(device_path) + # timeout related parameters + elif key in self.TIMEOUT_TYPES and suffix and suffix == "timeout": + if not value.isdigit(): + raise self.SyntaxError(f"Invalid timeout value: {value}") + parameter_dict[key] = int(value) + # watchdog device parameter + elif key == "watchdog" and suffix == "device": + parameter_dict["watchdog-device"] = value + else: + raise self.SyntaxError(f"Unknown argument: {arg}") + + watchdog_device = parameter_dict.get("watchdog-device") + parameter_dict["watchdog-device"] = watchdog.Watchdog.get_watchdog_device(watchdog_device) + + logger.debug("Parsed arguments: %s", parameter_dict) + return parameter_dict + + @staticmethod + def _adjust_timeout_dict(timeout_dict: dict, diskless: bool = False) -> dict: + watchdog_timeout = timeout_dict.get("watchdog") + if not watchdog_timeout: + watchdog_timeout, _ = sbd.SBDTimeout.get_advised_sbd_timeout(diskless) + logger.info("No watchdog timeout specified, use advised value: %s", watchdog_timeout) + timeout_dict["watchdog"] = watchdog_timeout + + if diskless: + return timeout_dict + + msgwait_timeout = timeout_dict.get("msgwait") + if not msgwait_timeout: + msgwait_timeout = 2*watchdog_timeout + logger.info("No msgwait timeout specified, use 2*watchdog timeout: %s", msgwait_timeout) + timeout_dict["msgwait"] = msgwait_timeout + + if msgwait_timeout < 2*watchdog_timeout: + logger.warning("It's recommended to set msgwait timeout >= 2*watchdog timeout") + + return timeout_dict + + def _configure_diskbase(self, parameter_dict: dict): + ''' + Configure disk-based SBD based on input parameters and runtime config + ''' + if not self.device_list_from_config: + self.watchdog_timeout_from_config = None + self.watchdog_device_from_config = None + + update_dict = {} + device_list = parameter_dict.get("device-list", []) + if not device_list and not self.device_list_from_config: + raise self.SyntaxError("No device specified") + if len(device_list) > len(set(device_list)): + raise self.SyntaxError("Duplicate device") + watchdog_device = parameter_dict.get("watchdog-device") + if watchdog_device != self.watchdog_device_from_config: + update_dict["SBD_WATCHDOG_DEV"] = watchdog_device + timeout_dict = {k: v for k, v in parameter_dict.items() if k in self.TIMEOUT_TYPES} + + all_device_list = list( + dict.fromkeys(self.device_list_from_config + device_list) + ) + sbd.SBDUtils.verify_sbd_device(all_device_list) + + new_device_list = list( + set(device_list) - set(self.device_list_from_config) + ) + no_overwrite_dev_map : dict[str, bool] = { + dev: sbd.SBDUtils.no_overwrite_device_check(dev) for dev in new_device_list + } + if new_device_list: + update_dict["SBD_DEVICE"] = ";".join(all_device_list) + + device_list_to_init = [] + # initialize new devices only if no timeout parameter specified or timeout parameter is already in runtime config + if not timeout_dict or utils.is_subdict(timeout_dict, self.device_meta_dict_runtime): + device_list_to_init = new_device_list + # initialize all devices + else: + device_list_to_init = all_device_list + + # merge runtime timeout dict with new timeout dict + timeout_dict = self.device_meta_dict_runtime | timeout_dict + # adjust watchdog and msgwait timeout + timeout_dict = self._adjust_timeout_dict(timeout_dict) + watchdog_timeout = timeout_dict.get("watchdog") + if watchdog_timeout != self.watchdog_timeout_from_config: + update_dict["SBD_WATCHDOG_TIMEOUT"] = str(watchdog_timeout) + + sbd_manager = sbd.SBDManager( + device_list_to_init=device_list_to_init, + timeout_dict=timeout_dict, + update_dict=update_dict, + no_overwrite_dev_map=no_overwrite_dev_map, + new_config=False if self.device_list_from_config else True + ) + sbd_manager.init_and_deploy_sbd() + + def _configure_diskless(self, parameter_dict: dict): + ''' + Configure diskless SBD based on input parameters and runtime config + ''' + if self.device_list_from_config: + self.watchdog_timeout_from_config = None + self.watchdog_device_from_config = None + + update_dict = {} + parameter_dict = self._adjust_timeout_dict(parameter_dict, diskless=True) + watchdog_timeout = parameter_dict.get("watchdog") + if watchdog_timeout and watchdog_timeout != self.watchdog_timeout_from_config: + update_dict["SBD_WATCHDOG_TIMEOUT"] = str(watchdog_timeout) + watchdog_device = parameter_dict.get("watchdog-device") + if watchdog_device != self.watchdog_device_from_config: + update_dict["SBD_WATCHDOG_DEV"] = watchdog_device + + sbd_manager = sbd.SBDManager( + update_dict=update_dict, + diskless_sbd=True, + new_config=True if self.device_list_from_config else False + ) + sbd_manager.init_and_deploy_sbd() + + @command.completers_repeating(sbd_configure_completer) + def do_configure(self, context, *args) -> bool: + ''' + Implement sbd configure command + ''' + self._load_attributes() + + try: + if not args: + raise self.SyntaxError("No argument") + + if args[0] == "show": + self._configure_show(args) + return True + + if not self.service_manager.service_is_active(constants.PCMK_SERVICE): + logger.error("%s is not active", constants.PCMK_SERVICE) + return False + + parameter_dict = self._parse_args(args) + # disk-based sbd case + if "device-list" in parameter_dict: + return self._configure_diskbase(parameter_dict) + # diskless sbd case + else: + return self._configure_diskless(parameter_dict) + + except self.SyntaxError as e: + logger.error(str(e)) + print(self.configure_usage) + return False + + @command.completers_repeating(sbd_devices_completer) + def do_remove(self, context, *args) -> bool: + ''' + Implement sbd remove command + ''' + self._load_attributes() + + if not self.service_manager.service_is_active(constants.SBD_SERVICE): + logger.error("%s is not active", constants.SBD_SERVICE) + return False + + parameter_dict = self._parse_args(args) + dev_list = parameter_dict.get("device-list", []) + if dev_list: + if not self.device_list_from_config: + logger.error("No sbd device found in config") + return False + for dev in dev_list: + if dev not in self.device_list_from_config: + logger.error("Device %s is not in config", dev) + return False + changed_dev_list = set(self.device_list_from_config) - set(dev_list) + # remove part of devices from config + if changed_dev_list: + logger.info("Remove '%s' from %s", ";".join(dev_list), SYSCONFIG_SBD) + sbd.SBDManager.update_sbd_configuration({"SBD_DEVICE": ";".join(changed_dev_list)}) + # remove all devices, equivalent to stop sbd.service + else: + sbd.disable_sbd_from_cluster() + else: + sbd.disable_sbd_from_cluster() + + logger.info(self.RESTART_INFO) + return True + + def do_status(self, context) -> bool: + ''' + Implement sbd status command + ''' + self._load_attributes() + + print(f"{constants.SBD_SERVICE} status: (active|enabled|since)") + for node in self.cluster_nodes: + is_active = self.service_manager.service_is_active(constants.SBD_SERVICE, node) + is_active_str = "YES" if is_active else "NO" + is_enabled = self.service_manager.service_is_enabled(constants.SBD_SERVICE, node) + is_enabled_str = "YES" if is_enabled else "NO" + systemd_property = "ActiveEnterTimestamp" if is_active else "ActiveExitTimestamp" + since_str_prefix = "active since" if is_active else "disactive since" + systemctl_show_cmd = f"systemctl show {constants.SBD_SERVICE} --property={systemd_property} --value" + since = self.cluster_shell.get_stdout_or_raise_error(systemctl_show_cmd, node) or "N/A" + print(f"{node}: {is_active_str:<4}|{is_enabled_str:<4}|{since_str_prefix}: {since}") + print() + + print("watchdog info: (device|driver|kernel timeout)") + watchdog_sbd_re = "\[[0-9]+\] (/dev/.*)\nIdentity: Busy: .*sbd.*\nDriver: (.*)" + for node in self.cluster_nodes: + out = self.cluster_shell.get_stdout_or_raise_error("sbd query-watchdog", node) + res = re.search(watchdog_sbd_re, out) + if res: + device, driver = res.groups() + kernel_timeout = self.cluster_shell.get_stdout_or_raise_error("cat /proc/sys/kernel/watchdog_thresh", node) + print(f"{node}: {device}|{driver}|{kernel_timeout}") + else: + logger.error("Failed to get watchdog info from %s", node) + print() + + if self.crm_mon_xml_parser.is_resource_configured(sbd.SBDManager.SBD_RA): + print("fence_sbd status: ") + sbd_id_list = self.crm_mon_xml_parser.get_resource_id_list_via_type(sbd.SBDManager.SBD_RA) + for sbd_id in sbd_id_list: + out = self.cluster_shell.get_stdout_or_raise_error(f"crm resource status {sbd_id}") + print(out) diff --git a/crmsh/utils.py b/crmsh/utils.py index 5b0b193404..bad90364d2 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -2520,7 +2520,7 @@ def has_stonith_running(): from . import sbd out = sh.cluster_shell().get_stdout_or_raise_error("stonith_admin -L") has_stonith_device = re.search("[1-9]+ fence device[s]* found", out) is not None - using_diskless_sbd = sbd.SBDManager.is_using_diskless_sbd() + using_diskless_sbd = sbd.SBDUtils.is_using_diskless_sbd() return has_stonith_device or using_diskless_sbd @@ -2780,13 +2780,15 @@ def get_pcmk_delay_max(two_node_without_qdevice=False): return 0 -def get_property(name, property_type="crm_config", peer=None): +def get_property(name, property_type="crm_config", peer=None, get_default=True): """ Get cluster properties "property_type" can be crm_config|rsc_defaults|op_defaults + "get_default" is used to get the default value from cluster metadata, + when it is False, the property value will be got from cib """ - if property_type == "crm_config": + if property_type == "crm_config" and get_default: cib_path = os.getenv('CIB_file', constants.CIB_RAW_FILE) cmd = "CIB_file={} sudo --preserve-env=CIB_file crm configure get_property {}".format(cib_path, name) else: @@ -3153,7 +3155,7 @@ def ssh_command(): def load_cib_file_env(): - if options.regression_tests or ServiceManager().service_is_active("pacemaker.service"): + if options.regression_tests or ServiceManager().service_is_active(constants.PCMK_SERVICE): return cib_file = os.environ.setdefault('CIB_file', constants.CIB_RAW_FILE) logger.warning("Cluster is not running, loading the CIB file from %s", cib_file) @@ -3195,4 +3197,19 @@ def fuzzy_match(rx): if m: return m return None + + +def cleanup_stonith_related_properties(): + for p in ("stonith-watchdog-timeout", "stonith-timeout", "priority-fencing-delay"): + if get_property(p, get_default=False): + delete_property(p) + if get_property("stonith-enabled") == "true": + set_property("stonith-enabled", "false") + + +def is_subdict(sub_dict, main_dict): + """ + Check if sub_dict is a sub-dictionary of main_dict + """ + return all(item in main_dict.items() for item in sub_dict.items()) # vim:ts=4:sw=4:et: diff --git a/crmsh/watchdog.py b/crmsh/watchdog.py index 5af77a2f10..91a76a6103 100644 --- a/crmsh/watchdog.py +++ b/crmsh/watchdog.py @@ -27,7 +27,7 @@ def watchdog_device_name(self): return self._watchdog_device_name @staticmethod - def _verify_watchdog_device(dev, ignore_error=False): + def verify_watchdog_device(dev, ignore_error=False): """ Use wdctl to verify watchdog device """ @@ -48,7 +48,7 @@ def _load_watchdog_driver(driver): invoke("systemctl restart systemd-modules-load") @staticmethod - def _get_watchdog_device_from_sbd_config(): + def get_watchdog_device_from_sbd_config(): """ Try to get watchdog device name from sbd config file """ @@ -81,7 +81,7 @@ def _get_device_through_driver(self, driver_name): Get watchdog device name which has driver_name """ for device, driver in self._watchdog_info_dict.items(): - if driver == driver_name and self._verify_watchdog_device(device): + if driver == driver_name and self.verify_watchdog_device(device): return device return None @@ -108,7 +108,7 @@ def _get_first_unused_device(self): Get first unused watchdog device name """ for dev in self._watchdog_info_dict: - if self._verify_watchdog_device(dev, ignore_error=True): + if self.verify_watchdog_device(dev, ignore_error=True): return dev return None @@ -120,8 +120,8 @@ def _set_input(self): 3. Set the self._input as softdog """ if not self._input: - dev = self._get_watchdog_device_from_sbd_config() - if dev and self._verify_watchdog_device(dev, ignore_error=True): + dev = self.get_watchdog_device_from_sbd_config() + if dev and self.verify_watchdog_device(dev, ignore_error=True): self._input = dev return first_unused = self._get_first_unused_device() @@ -131,7 +131,7 @@ def _valid_device(self, dev): """ Is an unused watchdog device """ - if dev in self._watchdog_info_dict and self._verify_watchdog_device(dev): + if dev in self._watchdog_info_dict and self.verify_watchdog_device(dev): return True return False @@ -142,7 +142,7 @@ def join_watchdog(self): """ self._set_watchdog_info() - res = self._get_watchdog_device_from_sbd_config() + res = self.get_watchdog_device_from_sbd_config() if not res: utils.fatal("Failed to get watchdog device from {}".format(SYSCONFIG_SBD)) self._input = res @@ -177,3 +177,9 @@ def init_watchdog(self): if res: self._watchdog_device_name = res return + + @classmethod + def get_watchdog_device(cls, dev_or_driver=None): + w = cls(_input=dev_or_driver) + w.init_watchdog() + return w.watchdog_device_name From 1f56ed8db7fe35c0a7d9376016c44571e10476c2 Mon Sep 17 00:00:00 2001 From: xin liang Date: Tue, 13 Aug 2024 21:44:37 +0800 Subject: [PATCH 03/36] Dev: behave: Adjust functional test for previous changes --- test/features/bootstrap_sbd_delay.feature | 12 ++++++++---- test/features/bootstrap_sbd_normal.feature | 4 +++- test/features/steps/step_implementation.py | 10 +++++----- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/test/features/bootstrap_sbd_delay.feature b/test/features/bootstrap_sbd_delay.feature index b8d1970c4b..3c3250c753 100644 --- a/test/features/bootstrap_sbd_delay.feature +++ b/test/features/bootstrap_sbd_delay.feature @@ -18,8 +18,10 @@ Feature: configure sbd delay start correctly And SBD option "SBD_DELAY_START" value is "no" And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" And SBD option "msgwait" value for "/dev/sda1" is "30" - # calculated and set by sbd RA - And Cluster property "stonith-timeout" is "43" + # original value is 43, which is calculated by external/sbd RA + # now fence_sbd doesn't calculate it, so this value is the default one + # from pacemaker + And Cluster property "stonith-timeout" is "60" And Parameter "pcmk_delay_max" not configured in "stonith-sbd" Given Has disk "/dev/sda1" on "hanode2" @@ -112,8 +114,10 @@ Feature: configure sbd delay start correctly And SBD option "SBD_DELAY_START" value is "no" And SBD option "SBD_WATCHDOG_TIMEOUT" value is "60" And SBD option "msgwait" value for "/dev/sda1" is "120" - # calculated and set by sbd RA - And Cluster property "stonith-timeout" is "172" + # original value is 172, which is calculated by external/sbd RA + # now fence_sbd doesn't calculate it, so this value is the default one + # from pacemaker + And Cluster property "stonith-timeout" is "60" And Parameter "pcmk_delay_max" not configured in "stonith-sbd" Given Has disk "/dev/sda1" on "hanode2" diff --git a/test/features/bootstrap_sbd_normal.feature b/test/features/bootstrap_sbd_normal.feature index fe73377f54..d5b2e45610 100644 --- a/test/features/bootstrap_sbd_normal.feature +++ b/test/features/bootstrap_sbd_normal.feature @@ -139,7 +139,7 @@ Feature: crmsh bootstrap sbd management And Online nodes are "hanode1 hanode2" When Run "crm configure primitive d Dummy op monitor interval=3s" on "hanode1" When Run "crm cluster init sbd -s /dev/sda1 -y" on "hanode1" - Then Expected "WARNING: To start sbd.service, need to restart cluster service manually on each node" in stderr + Then Expected "WARNING: Resource is running, need to restart cluster service manually on each node" in stderr Then Service "sbd" is "stopped" on "hanode1" And Service "sbd" is "stopped" on "hanode2" When Run "crm cluster restart" on "hanode1" @@ -263,6 +263,8 @@ Feature: crmsh bootstrap sbd management And Resource "stonith-sbd" type "fence_sbd" is "Started" And Run "ps -ef|grep -v grep|grep 'watcher: /dev/sda1 '" OK + When Try "crm cluster init sbd -s /dev/sda2 -y" + Then Except "ERROR: cluster.init: Can't configure stage sbd: sbd.service already running! Please use crm option '-F' if need to redeploy" When Run "crm -F cluster init sbd -s /dev/sda2 -y" on "hanode1" Then Service "sbd" is "started" on "hanode1" And Service "sbd" is "started" on "hanode2" diff --git a/test/features/steps/step_implementation.py b/test/features/steps/step_implementation.py index b1c4c2a8db..47b1383930 100644 --- a/test/features/steps/step_implementation.py +++ b/test/features/steps/step_implementation.py @@ -439,7 +439,7 @@ def step_impl(context, res_id, node): @then('SBD option "{key}" value is "{value}"') def step_impl(context, key, value): - res = sbd.SBDManager.get_sbd_value_from_config(key) + res = sbd.SBDUtils.get_sbd_value_from_config(key) assert_eq(value, res) @@ -453,27 +453,27 @@ def step_impl(context, key, dev, value): def step_impl(context, key, value): res = crmutils.get_property(key) assert res is not None - assert_eq(value, str(res)) + assert_eq(value.strip('s'), str(res).strip('s')) @then('Property "{key}" in "{type}" is "{value}"') def step_impl(context, key, type, value): res = crmutils.get_property(key, type) assert res is not None - assert_eq(value, str(res)) + assert_eq(value.strip('s'), str(res).strip('s')) @then('Parameter "{param_name}" not configured in "{res_id}"') def step_impl(context, param_name, res_id): _, out, _ = run_command(context, "crm configure show {}".format(res_id)) - result = re.search("params {}=".format(param_name), out) + result = re.search("params .*{}=".format(param_name), out) assert result is None @then('Parameter "{param_name}" configured in "{res_id}"') def step_impl(context, param_name, res_id): _, out, _ = run_command(context, "crm configure show {}".format(res_id)) - result = re.search("params {}=".format(param_name), out) + result = re.search("params .*{}=".format(param_name), out) assert result is not None From c5adeed04485665469139c9658a989f5d3353a1d Mon Sep 17 00:00:00 2001 From: xin liang Date: Tue, 20 Aug 2024 07:25:16 +0800 Subject: [PATCH 04/36] Dev: doc: Add help info for crm sbd sublevel --- doc/crm.8.adoc | 67 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/doc/crm.8.adoc b/doc/crm.8.adoc index fd62df771f..e887259fae 100644 --- a/doc/crm.8.adoc +++ b/doc/crm.8.adoc @@ -1,5 +1,5 @@ :man source: crm -:man version: 4.6.0 +:man version: 5.0.0 :man manual: crmsh documentation crm(8) @@ -2104,6 +2104,71 @@ Example: utilization xen1 set memory 4096 ............... +[[cmdhelp.sbd,SBD management]] +=== `sbd` - SBD management + +This level is for managing the SBD (STONITH Block Device) daemon. + +[[cmdhelp.sbd.configure,configure SBD]] +==== `configure` + +Configure the SBD daemon for both disk-based and disk-less mode. + +Main functionailities include: +- Show configured disk metadata +- Show contents of /etc/sysconfig/sbd +- Show SBD related cluster properties +- Newly setup SBD configuration on a running cluster +- Update the existing parameters +- Add more devices to the existing disk-based SBD configuration + +For more details on SBD and related parameters, please see man sbd(8). + +Usage: +............... +# For disk-based SBD +crm sbd configure show [disk_metadata|sysconfig|property] +crm sbd configure [device=]... [watchdog-device=] [watchdog-timeout=] [allocate-timeout=] [loop-timeout=] [msgwait-timeout=] + +# For disk-less SBD +crm sbd configure show [sysconfig|property] +crm sbd configure device="" [watchdog-device=] [watchdog-timeout=] +............... + +example: +............... +configure show +configure show disk_metadata +configure show sysconfig +configure show property +configure device="/dev/sdb1;/dev/sdb2" +configure device=/dev/sdb1 device=/dev/sdb2 +configure device=/dev/sdb1 watchdog-timeout=30 msgwait-timeout=60 +configure device="" watchdog-timeout=30 +............... + +[[cmdhelp.sbd.status,show SBD status]] +==== `status` + +Show the status of the SBD daemon. + +Usage: +............... +status +............... + +[[cmdhelp.sbd.remove,remove SBD configuration]] +==== `remove` + +Remove part of devices from the SBD configuration, or remove SBD +service from the cluster. + +Usage: +............... +remove +remove [ ...] +............... + [[cmdhelp.node,Node management]] === `node` - Node management From 4c93b3d1f53024281f6bb7831396cf85b9d55b5b Mon Sep 17 00:00:00 2001 From: xin liang Date: Wed, 11 Sep 2024 10:11:42 +0800 Subject: [PATCH 05/36] Dev: ui_sbd: Add property/sysconfig section header for sbd configure show --- crmsh/ui_sbd.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 2a27690eb6..5d6075d2a7 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -172,6 +172,8 @@ def _show_sysconfig() -> None: content_list = [line.strip() for line in f.readlines() if not line.startswith("#") and line.strip()] + if content_list: + logger.info("crm sbd configure show sysconfig") for line in content_list: print(line) @@ -179,6 +181,8 @@ def _show_disk_metadata(self) -> None: ''' Show sbd disk metadata for each configured device ''' + if self.device_list_from_config: + logger.info("crm sbd configure show disk_metadata") for dev in self.device_list_from_config: print(self.cluster_shell.get_stdout_or_raise_error(f"sbd -d {dev} dump")) print() @@ -196,11 +200,14 @@ def _show_property(self) -> None: cmd = f"CIB_file={cib_path} crm configure show" out = self.cluster_shell.get_stdout_or_raise_error(cmd) + logger.info("crm sbd configure show property") regex = f"({'|'.join(self.PCMK_ATTRS)})=([^\s]+)" matches = re.findall(regex, out) for match in matches: print(f"{match[0]}={match[1]}") + print() + logger.info("systemctl show -p TimeoutStartUSec sbd --value") systemd_start_timeout = sbd.SBDTimeout.get_sbd_systemd_start_timeout() print(f"TimeoutStartUSec={systemd_start_timeout}") From dc76bda78a2c3bb0837978f4bd7979cc80cb8ce7 Mon Sep 17 00:00:00 2001 From: xin liang Date: Wed, 11 Sep 2024 10:16:10 +0800 Subject: [PATCH 06/36] Dev: ui_sbd: No need to consider static case when calling crm configure show After PR#1540 got merged, crmsh will load CIB_file env before calling above readonly command. --- crmsh/ui_sbd.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 5d6075d2a7..2b3480c2bd 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -191,14 +191,7 @@ def _show_property(self) -> None: ''' Show sbd-related properties from cluster and systemd ''' - if self.service_manager.service_is_active(constants.PCMK_SERVICE): - cmd = "crm configure show" - else: # static case - cib_path = os.getenv("CIB_file", constants.CIB_RAW_FILE) - if not os.path.exists(cib_path): - return - cmd = f"CIB_file={cib_path} crm configure show" - out = self.cluster_shell.get_stdout_or_raise_error(cmd) + out = self.cluster_shell.get_stdout_or_raise_error("crm configure show") logger.info("crm sbd configure show property") regex = f"({'|'.join(self.PCMK_ATTRS)})=([^\s]+)" From 4681854c2e12793493db34e14c11b39b1d2396b4 Mon Sep 17 00:00:00 2001 From: xin liang Date: Wed, 11 Sep 2024 14:31:08 +0800 Subject: [PATCH 07/36] Dev: ui_sbd: Catch both stderr and stdout for crm resource status since crm_resource command will direct message to stderr when resource is not running --- crmsh/ui_sbd.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 2b3480c2bd..78ff538cec 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -477,5 +477,8 @@ def do_status(self, context) -> bool: print("fence_sbd status: ") sbd_id_list = self.crm_mon_xml_parser.get_resource_id_list_via_type(sbd.SBDManager.SBD_RA) for sbd_id in sbd_id_list: - out = self.cluster_shell.get_stdout_or_raise_error(f"crm resource status {sbd_id}") - print(out) + rc, out, err = self.cluster_shell.get_rc_stdout_stderr_without_input(None, f"crm resource status {sbd_id}") + if out: + print(out) + if err: + print(err) From b9242ccb9687ae7f73a634f0fef2661702728473 Mon Sep 17 00:00:00 2001 From: xin liang Date: Wed, 11 Sep 2024 15:14:23 +0800 Subject: [PATCH 08/36] Dev: ui_sbd: Update regex for parsing SBD device by partlabel --- crmsh/ui_sbd.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 78ff538cec..da7f912fd7 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -95,13 +95,15 @@ class SBD(command.UI): "priority-fencing-delay", "pcmk_delay_max" ) + # a commom character class for matching device path + dev_char_class = r'[\w/\d;\-:.]' PARSE_RE = re.compile( # Match "device" key with any value, including empty - r'(device)=("[^"]*"|[\w/\d;]*)' + fr'(device)=("[^"]*"|{dev_char_class}*)' # Match other keys with non-empty values, capturing possible suffix r'|(\w+)(?:-(\w+))?=("[^"]+"|[\w/\d;]+)' # Match standalone device path - r'|(/dev/[\w\d]+)' + fr'|(/dev/{dev_char_class}+)' ) class SyntaxError(Exception): From 7fd5d9644e4690499d62067c5a2aeed12598b91b Mon Sep 17 00:00:00 2001 From: xin liang Date: Wed, 11 Sep 2024 15:33:44 +0800 Subject: [PATCH 09/36] Dev: ui_sbd: Clean up existing fence_sbd resource before configure diskless SBD --- crmsh/ui_sbd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index da7f912fd7..87e4982d53 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -359,6 +359,7 @@ def _configure_diskless(self, parameter_dict: dict): if self.device_list_from_config: self.watchdog_timeout_from_config = None self.watchdog_device_from_config = None + sbd.clean_up_existing_sbd_resource() update_dict = {} parameter_dict = self._adjust_timeout_dict(parameter_dict, diskless=True) From b3383a9c7ec0ae586b53afbb155578416236dfa8 Mon Sep 17 00:00:00 2001 From: xin liang Date: Wed, 11 Sep 2024 22:51:13 +0800 Subject: [PATCH 10/36] Dev: ui_sbd: Minor changes to the code --- crmsh/constants.py | 2 ++ crmsh/sbd.py | 3 +-- crmsh/ui_sbd.py | 17 +++++++++-------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/crmsh/constants.py b/crmsh/constants.py index 89686aa83d..0106e29bea 100644 --- a/crmsh/constants.py +++ b/crmsh/constants.py @@ -453,4 +453,6 @@ PCMK_SERVICE = "pacemaker.service" SBD_SERVICE = "sbd.service" + +SHOW_SBD_START_TIMEOUT_CMD = "systemctl show -p TimeoutStartUSec sbd.service --value" # vim:ts=4:sw=4:et: diff --git a/crmsh/sbd.py b/crmsh/sbd.py index 7e5bb7f626..ced58fccf7 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -324,8 +324,7 @@ def is_sbd_delay_start(): @staticmethod def get_sbd_systemd_start_timeout() -> int: - cmd = "systemctl show -p TimeoutStartUSec sbd --value" - out = sh.cluster_shell().get_stdout_or_raise_error(cmd) + out = sh.cluster_shell().get_stdout_or_raise_error(constants.SHOW_SBD_START_TIMEOUT_CMD) return utils.get_systemd_timeout_start_in_sec(out) def adjust_systemd_start_timeout(self): diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 87e4982d53..0051b62788 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -128,7 +128,7 @@ def _load_attributes(self): self.device_meta_dict_runtime = sbd.SBDUtils.get_sbd_device_metadata(self.device_list_from_config[0], timeout_only=True) try: self.watchdog_timeout_from_config = sbd.SBDTimeout.get_sbd_watchdog_timeout() - except: + except Exception: self.watchdog_timeout_from_config = None self.watchdog_device_from_config = watchdog.Watchdog.get_watchdog_device_from_sbd_config() @@ -152,7 +152,7 @@ def configure_usage(self) -> str: Build usage string for sbd configure command, including disk-based and diskless sbd cases ''' - def build_timeout_usage_str(timeout_types: tuple[str]) -> str: + def build_timeout_usage_str(timeout_types: tuple[str, ...]) -> str: return " ".join([f"[{t}-timeout=]" for t in timeout_types]) timeout_usage_str = build_timeout_usage_str(self.TIMEOUT_TYPES) timeout_usage_str_diskless = build_timeout_usage_str(self.DISKLESS_TIMEOUT_TYPES) @@ -196,13 +196,13 @@ def _show_property(self) -> None: out = self.cluster_shell.get_stdout_or_raise_error("crm configure show") logger.info("crm sbd configure show property") - regex = f"({'|'.join(self.PCMK_ATTRS)})=([^\s]+)" + regex = f"({'|'.join(self.PCMK_ATTRS)})=(\\S+)" matches = re.findall(regex, out) for match in matches: print(f"{match[0]}={match[1]}") print() - logger.info("systemctl show -p TimeoutStartUSec sbd --value") + logger.info('%s', constants.SHOW_SBD_START_TIMEOUT_CMD) systemd_start_timeout = sbd.SBDTimeout.get_sbd_systemd_start_timeout() print(f"TimeoutStartUSec={systemd_start_timeout}") @@ -227,7 +227,7 @@ def _configure_show(self, args) -> None: print() self._show_property() - def _parse_args(self, args: typing.List[str]) -> dict[str, int|str|list[str]]: + def _parse_args(self, args: tuple[str, ...]) -> dict[str, int|str|list[str]]: ''' Parse arguments and verify them @@ -399,13 +399,14 @@ def do_configure(self, context, *args) -> bool: parameter_dict = self._parse_args(args) # disk-based sbd case if "device-list" in parameter_dict: - return self._configure_diskbase(parameter_dict) + self._configure_diskbase(parameter_dict) # diskless sbd case else: - return self._configure_diskless(parameter_dict) + self._configure_diskless(parameter_dict) + return True except self.SyntaxError as e: - logger.error(str(e)) + logger.error('%s', e) print(self.configure_usage) return False From 3294086d38fde495ab1ef1427c1f7c43b09535c2 Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 13 Sep 2024 09:41:45 +0800 Subject: [PATCH 11/36] Dev: bootstrap: Check if sbd package is installed in the right place Changes: - Check at the beginning of the bootstrap process - Check at configure sbd stage in the interactive mode - Put the sbd not installed message in the constants.py --- crmsh/bootstrap.py | 9 ++++++++- crmsh/sbd.py | 24 +++++++++--------------- crmsh/ui_sbd.py | 4 ++-- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index d8101688f7..293a156cb9 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -212,12 +212,19 @@ def _validate_sbd_option(self): Validate sbd options """ from .sbd import SBDUtils + with_sbd_option = self.sbd_devices or self.diskless_sbd + sbd_installed = utils.package_is_installed("sbd") + + if with_sbd_option and not sbd_installed: + utils.fatal(SBDManager.SBD_NOT_INSTALLED_MSG) if self.sbd_devices and self.diskless_sbd: utils.fatal("Can't use -s and -S options together") if self.sbd_devices: SBDUtils.verify_sbd_device(self.sbd_devices) if self.stage == "sbd": - if not self.sbd_devices and not self.diskless_sbd and self.yes_to_all: + if not sbd_installed: + utils.fatal(SBDManager.SBD_NOT_INSTALLED_MSG) + if not with_sbd_option and self.yes_to_all: utils.fatal("Stage sbd should specify sbd device by -s or diskless sbd by -S option") if ServiceManager().service_is_active(constants.SBD_SERVICE) and not config.core.force: utils.fatal("Can't configure stage sbd: sbd.service already running! Please use crm option '-F' if need to redeploy") diff --git a/crmsh/sbd.py b/crmsh/sbd.py index ced58fccf7..0a927ce0e9 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -390,6 +390,7 @@ class SBDManager: NO_SBD_WARNING = "Not configuring SBD - STONITH will be disabled." DISKLESS_SBD_MIN_EXPECTED_VOTE = 3 DISKLESS_SBD_WARNING = "Diskless SBD requires cluster with three or more nodes. If you want to use diskless SBD for 2-node cluster, should be combined with QDevice." + SBD_NOT_INSTALLED_MSG = "Package sbd is not installed." SBD_RA = "stonith:fence_sbd" SBD_RA_ID = "stonith-sbd" SBD_DEVICE_MAX = 3 @@ -407,10 +408,6 @@ def __init__( ''' Init function which can be called from crm sbd subcommand or bootstrap ''' - self.package_installed = utils.package_is_installed("sbd") - if not self.package_installed: - return - self.device_list_to_init = device_list_to_init or [] self.timeout_dict = timeout_dict or {} self.update_dict = update_dict or {} @@ -530,23 +527,26 @@ def _warn_diskless_sbd(self, peer=None): vote_dict = utils.get_quorum_votes_dict(peer) expected_vote = int(vote_dict.get('Expected', 0)) if expected_vote < self.DISKLESS_SBD_MIN_EXPECTED_VOTE: - logger.warning(self.DISKLESS_SBD_WARNING) + logger.warning('%s', self.DISKLESS_SBD_WARNING) # When in init process elif self.diskless_sbd: - logger.warning(self.DISKLESS_SBD_WARNING) + logger.warning('%s', self.DISKLESS_SBD_WARNING) def get_sbd_device_interactive(self): ''' Get sbd device on interactive mode ''' if self.bootstrap_context.yes_to_all: - logger.warning(self.NO_SBD_WARNING) + logger.warning('%s', self.NO_SBD_WARNING) return logger.info(self.SBD_STATUS_DESCRIPTION) if not bootstrap.confirm("Do you wish to use SBD?"): - logger.warning(self.NO_SBD_WARNING) + logger.warning('%s', self.NO_SBD_WARNING) return + if not utils.package_is_installed("sbd"): + utils.fatal(self.SBD_NOT_INSTALLED_MSG) + configured_devices = SBDUtils.get_sbd_device_from_config() for dev in configured_devices: self.no_overwrite_dev_map[dev] = SBDUtils.no_overwrite_device_check(dev) @@ -565,7 +565,7 @@ def get_sbd_device_interactive(self): try: SBDUtils.verify_sbd_device(dev_list) except ValueError as e: - logger.error(e) + logger.error('%s', e) continue for dev in dev_list: if dev not in self.no_overwrite_dev_map: @@ -605,9 +605,6 @@ def init_and_deploy_sbd(self): 4. Restart cluster service if possible 5. Configure stonith-sbd resource and related properties ''' - if not self.package_installed: - return - if self.bootstrap_context: self.get_sbd_device_from_bootstrap() if not self.device_list_to_init and not self.diskless_sbd: @@ -630,9 +627,6 @@ def join_sbd(self, remote_user, peer_host): On joining process, check whether peer node has enabled sbd.service If so, check prerequisites of SBD and verify sbd device on join node ''' - if not self.package_installed: - return - service_manager = ServiceManager() if not os.path.exists(SYSCONFIG_SBD) or not service_manager.service_is_enabled(constants.SBD_SERVICE, peer_host): service_manager.disable_service(constants.SBD_SERVICE) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 0051b62788..42a3f23d54 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -142,7 +142,7 @@ def requires(self) -> bool: Requirements check when entering sbd sub-level ''' if not utils.package_is_installed("sbd"): - logger.error("sbd is not installed") + logger.error('%s', sbd.SBDManager.SBD_NOT_INSTALLED_MSG) return False return True @@ -442,7 +442,7 @@ def do_remove(self, context, *args) -> bool: else: sbd.disable_sbd_from_cluster() - logger.info(self.RESTART_INFO) + logger.info('%s', self.RESTART_INFO) return True def do_status(self, context) -> bool: From 588022b9804606f0acc83a7f7c1e906433ed6521 Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 13 Sep 2024 16:12:11 +0800 Subject: [PATCH 12/36] Dev: ui_sbd: Refactor do_status method - Split the method into smaller methods - Enhance the readability of output - Print the type of SBD --- crmsh/ui_sbd.py | 61 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 42a3f23d54..d8738850ce 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -445,13 +445,23 @@ def do_remove(self, context, *args) -> bool: logger.info('%s', self.RESTART_INFO) return True - def do_status(self, context) -> bool: - ''' - Implement sbd status command - ''' - self._load_attributes() + def _print_sbd_type(self): + if not self.service_manager.service_is_active(constants.SBD_SERVICE): + return + print("# Type of SBD:") + if self.device_list_from_config: + print("Disk-based SBD configured") + else: + print("Diskless SBD configured") + print() + + def _print_sbd_status(self): + padding = 2 + status_len = 8 + max_node_len = max(len(node) for node in self.cluster_nodes) + padding - print(f"{constants.SBD_SERVICE} status: (active|enabled|since)") + print(f"# Status of {constants.SBD_SERVICE}:") + print(f"{'Node':<{max_node_len}}|{'Active':<{status_len}}|{'Enabled':<{status_len}}|Since") for node in self.cluster_nodes: is_active = self.service_manager.service_is_active(constants.SBD_SERVICE, node) is_active_str = "YES" if is_active else "NO" @@ -461,24 +471,42 @@ def do_status(self, context) -> bool: since_str_prefix = "active since" if is_active else "disactive since" systemctl_show_cmd = f"systemctl show {constants.SBD_SERVICE} --property={systemd_property} --value" since = self.cluster_shell.get_stdout_or_raise_error(systemctl_show_cmd, node) or "N/A" - print(f"{node}: {is_active_str:<4}|{is_enabled_str:<4}|{since_str_prefix}: {since}") + print(f"{node:<{max_node_len}}|{is_active_str:<{status_len}}|{is_enabled_str:<{status_len}}|{since_str_prefix}: {since}") print() - print("watchdog info: (device|driver|kernel timeout)") + def _print_watchdog_info(self): + padding = 2 + max_node_len = max(len(node) for node in self.cluster_nodes) + padding + watchdog_sbd_re = "\[[0-9]+\] (/dev/.*)\nIdentity: Busy: .*sbd.*\nDriver: (.*)" - for node in self.cluster_nodes: + device_list, driver_list, kernel_timeout_list = [], [], [] + cluster_nodes = self.cluster_nodes[:] + for node in cluster_nodes[:]: out = self.cluster_shell.get_stdout_or_raise_error("sbd query-watchdog", node) res = re.search(watchdog_sbd_re, out) if res: device, driver = res.groups() kernel_timeout = self.cluster_shell.get_stdout_or_raise_error("cat /proc/sys/kernel/watchdog_thresh", node) - print(f"{node}: {device}|{driver}|{kernel_timeout}") + device_list.append(device) + driver_list.append(driver) + kernel_timeout_list.append(kernel_timeout) else: logger.error("Failed to get watchdog info from %s", node) + cluster_nodes.remove(node) + if not cluster_nodes: + return + + print("# Watchdog info:") + max_dev_len = max(len(dev) for dev in device_list) + padding + max_driver_len = max(len(driver) for driver in driver_list) + padding + print(f"{'Node':<{max_node_len}}|{'Device':<{max_dev_len}}|{'Driver':<{max_driver_len}}|Kernel Timeout") + for i, node in enumerate(cluster_nodes): + print(f"{node:<{max_node_len}}|{device_list[i]:<{max_dev_len}}|{driver_list[i]:<{max_driver_len}}|{kernel_timeout_list[i]}") print() + def _print_sbd_agent_status(self): if self.crm_mon_xml_parser.is_resource_configured(sbd.SBDManager.SBD_RA): - print("fence_sbd status: ") + print("# Status of fence_sbd:") sbd_id_list = self.crm_mon_xml_parser.get_resource_id_list_via_type(sbd.SBDManager.SBD_RA) for sbd_id in sbd_id_list: rc, out, err = self.cluster_shell.get_rc_stdout_stderr_without_input(None, f"crm resource status {sbd_id}") @@ -486,3 +514,14 @@ def do_status(self, context) -> bool: print(out) if err: print(err) + + def do_status(self, context) -> bool: + ''' + Implement sbd status command + ''' + self._load_attributes() + self._print_sbd_type() + self._print_sbd_status() + self._print_watchdog_info() + self._print_sbd_agent_status() + return True From 96d9d9a4c2ff26d6a4f0e1bcf72499c85eb4ae7f Mon Sep 17 00:00:00 2001 From: xin liang Date: Thu, 19 Sep 2024 10:30:10 +0800 Subject: [PATCH 13/36] Dev: Refactor the code to avoid circular import --- crmsh/bootstrap.py | 18 ++++++---------- crmsh/qdevice.py | 2 +- crmsh/sbd.py | 25 +++++++++++----------- crmsh/ui_sbd.py | 13 +++++------ crmsh/watchdog.py | 14 ++++++------ test/features/steps/step_implementation.py | 3 ++- 6 files changed, 37 insertions(+), 38 deletions(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 293a156cb9..532eeceb85 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -43,6 +43,8 @@ from .sh import ShellUtils from .ui_node import NodeMgmt from .user_of_host import UserOfHost, UserNotFoundError +from .sbd import SBDUtils, SBDManager, SBDTimeout +from . import watchdog import crmsh.healthcheck @@ -55,21 +57,18 @@ COROSYNC_AUTH = "/etc/corosync/authkey" CRM_CFG = "/etc/crm/crm.conf" PROFILES_FILE = "/etc/crm/profiles.yml" -SYSCONFIG_SBD = "/etc/sysconfig/sbd" SYSCONFIG_PCMK = "/etc/sysconfig/pacemaker" SYSCONFIG_NFS = "/etc/sysconfig/nfs" PCMK_REMOTE_AUTH = "/etc/pacemaker/authkey" COROSYNC_CONF_ORIG = tmpfiles.create()[1] SERVICES_STOP_LIST = ["corosync-qdevice.service", "corosync.service", "hawk.service", CSYNC2_SERVICE] -WATCHDOG_CFG = "/etc/modules-load.d/watchdog.conf" BOOTH_DIR = "/etc/booth" BOOTH_CFG = "/etc/booth/booth.conf" BOOTH_AUTH = "/etc/booth/authkey" -SBD_SYSTEMD_DELAY_START_DIR = "/etc/systemd/system/sbd.service.d" FILES_TO_SYNC = (BOOTH_DIR, corosync.conf(), COROSYNC_AUTH, CSYNC2_CFG, CSYNC2_KEY, "/etc/ctdb/nodes", "/etc/drbd.conf", "/etc/drbd.d", "/etc/ha.d/ldirectord.cf", "/etc/lvm/lvm.conf", "/etc/multipath.conf", - "/etc/samba/smb.conf", SYSCONFIG_NFS, SYSCONFIG_PCMK, SYSCONFIG_SBD, PCMK_REMOTE_AUTH, WATCHDOG_CFG, - PROFILES_FILE, CRM_CFG, SBD_SYSTEMD_DELAY_START_DIR) + "/etc/samba/smb.conf", SYSCONFIG_NFS, SYSCONFIG_PCMK, SBDManager.SYSCONFIG_SBD, PCMK_REMOTE_AUTH, watchdog.Watchdog.WATCHDOG_CFG, + PROFILES_FILE, CRM_CFG, SBDManager.SBD_SYSTEMD_DELAY_START_DIR) INIT_STAGES_EXTERNAL = ("ssh", "csync2", "corosync", "sbd", "cluster", "admin", "qdevice") INIT_STAGES_INTERNAL = ("csync2_remote", "qnetd_remote") @@ -132,7 +131,7 @@ def __init__(self): self.profiles_dict = {} self.default_nic = None self.default_ip_list = [] - self.rm_list = [SYSCONFIG_SBD, CSYNC2_CFG, corosync.conf(), CSYNC2_KEY, + self.rm_list = [SBDManager.SYSCONFIG_SBD, CSYNC2_CFG, corosync.conf(), CSYNC2_KEY, COROSYNC_AUTH, "/var/lib/heartbeat/crm/*", "/var/lib/pacemaker/cib/*", "/var/lib/corosync/*", "/var/lib/pacemaker/pengine/*", PCMK_REMOTE_AUTH, "/var/lib/csync2/*", "~/.config/crm/*"] @@ -211,7 +210,6 @@ def _validate_sbd_option(self): """ Validate sbd options """ - from .sbd import SBDUtils with_sbd_option = self.sbd_devices or self.diskless_sbd sbd_installed = utils.package_is_installed("sbd") @@ -301,7 +299,6 @@ def validate_option(self): self._validate_sbd_option() def init_sbd_manager(self): - from .sbd import SBDManager self.sbd_manager = SBDManager(bootstrap_context=self) def detect_platform(self): @@ -773,7 +770,6 @@ def start_pacemaker(node_list=[], enable_flag=False): Return success node list """ - from .sbd import SBDTimeout # not _context means not in init or join process if not _context and \ utils.package_is_installed("sbd") and \ @@ -2057,8 +2053,7 @@ def rm_configuration_files(remote=None): shell.get_stdout_or_raise_error("rm -f {}".format(' '.join(_context.rm_list)), remote) # restore original sbd configuration file from /usr/share/fillup-templates/sysconfig.sbd if utils.package_is_installed("sbd", remote_addr=remote): - from .sbd import SBDManager - cmd = "cp {} {}".format(SBDManager.SYSCONFIG_SBD_TEMPLATE, SYSCONFIG_SBD) + cmd = "cp {} {}".format(SBDManager.SYSCONFIG_SBD_TEMPLATE, SBDManager.SYSCONFIG_SBD) shell.get_stdout_or_raise_error(cmd, remote) @@ -2682,7 +2677,6 @@ def adjust_stonith_timeout(): Adjust stonith-timeout for sbd and other scenarios """ if ServiceManager().service_is_active(constants.SBD_SERVICE): - from .sbd import SBDTimeout SBDTimeout.adjust_sbd_timeout_related_cluster_configuration() else: value = get_stonith_timeout_generally_expected() diff --git a/crmsh/qdevice.py b/crmsh/qdevice.py index 1f63022214..6f569b5181 100644 --- a/crmsh/qdevice.py +++ b/crmsh/qdevice.py @@ -15,6 +15,7 @@ from . import lock from . import log from .service_manager import ServiceManager +from .sbd import SBDManager, SBDTimeout, SBDUtils logger = log.setup_logger(__name__) @@ -612,7 +613,6 @@ def adjust_sbd_watchdog_timeout_with_qdevice(self): """ Adjust SBD_WATCHDOG_TIMEOUT when configuring qdevice and diskless SBD """ - from .sbd import SBDManager, SBDTimeout, SBDUtils utils.check_all_nodes_reachable() self.using_diskless_sbd = SBDUtils.is_using_diskless_sbd() # add qdevice after diskless sbd started diff --git a/crmsh/sbd.py b/crmsh/sbd.py index 0a927ce0e9..c4eff0d7d2 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -3,7 +3,6 @@ import typing from . import utils, sh from . import bootstrap -from .bootstrap import SYSCONFIG_SBD, SBD_SYSTEMD_DELAY_START_DIR from . import log from . import constants from . import corosync @@ -77,7 +76,7 @@ def get_sbd_value_from_config(key): ''' Get value from /etc/sysconfig/sbd ''' - return utils.parse_sysconfig(SYSCONFIG_SBD).get(key) + return utils.parse_sysconfig(SBDManager.SYSCONFIG_SBD).get(key) @staticmethod def get_sbd_device_from_config(): @@ -339,10 +338,10 @@ def adjust_systemd_start_timeout(self): if start_timeout > int(sbd_delay_start_value): return - utils.mkdirp(SBD_SYSTEMD_DELAY_START_DIR) - sbd_delay_start_file = "{}/sbd_delay_start.conf".format(SBD_SYSTEMD_DELAY_START_DIR) + utils.mkdirp(SBDManager.SBD_SYSTEMD_DELAY_START_DIR) + sbd_delay_start_file = "{}/sbd_delay_start.conf".format(SBDManager.SBD_SYSTEMD_DELAY_START_DIR) utils.str2file("[Service]\nTimeoutSec={}".format(int(1.2*int(sbd_delay_start_value))), sbd_delay_start_file) - bootstrap.sync_file(SBD_SYSTEMD_DELAY_START_DIR) + bootstrap.sync_file(SBDManager.SBD_SYSTEMD_DELAY_START_DIR) utils.cluster_run_cmd("systemctl daemon-reload") def adjust_stonith_timeout(self): @@ -377,7 +376,9 @@ def adjust_sbd_timeout_related_cluster_configuration(cls): class SBDManager: + SYSCONFIG_SBD = "/etc/sysconfig/sbd" SYSCONFIG_SBD_TEMPLATE = "/usr/share/fillup-templates/sysconfig.sbd" + SBD_SYSTEMD_DELAY_START_DIR = "/etc/systemd/system/sbd.service.d" SBD_STATUS_DESCRIPTION = '''Configure SBD: If you have shared storage, for example a SAN or iSCSI target, you can use it avoid split-brain scenarios by configuring SBD. @@ -403,7 +404,7 @@ def __init__( no_overwrite_dev_map: typing.Dict[str, bool] | None = None, new_config: bool = False, diskless_sbd: bool = False, - bootstrap_context: bootstrap.Context | None = None + bootstrap_context: 'bootstrap.Context | None' = None ): ''' Init function which can be called from crm sbd subcommand or bootstrap @@ -452,13 +453,13 @@ def update_configuration(self) -> None: if not self.update_dict: return if (self.bootstrap_context and self.bootstrap_context.type == "init") or self.new_config: - utils.copy_local_file(self.SYSCONFIG_SBD_TEMPLATE, SYSCONFIG_SBD) + utils.copy_local_file(self.SYSCONFIG_SBD_TEMPLATE, self.SYSCONFIG_SBD) for key, value in self.update_dict.items(): - logger.info("Update %s in %s: %s", key, SYSCONFIG_SBD, value) - utils.sysconfig_set(SYSCONFIG_SBD, **self.update_dict) - bootstrap.sync_file(SYSCONFIG_SBD) - logger.info("Already synced %s to all nodes", SYSCONFIG_SBD) + logger.info("Update %s in %s: %s", key, self.SYSCONFIG_SBD, value) + utils.sysconfig_set(self.SYSCONFIG_SBD, **self.update_dict) + bootstrap.sync_file(self.SYSCONFIG_SBD) + logger.info("Already synced %s to all nodes", self.SYSCONFIG_SBD) @classmethod def update_sbd_configuration(cls, update_dict: typing.Dict[str, str]) -> None: @@ -628,7 +629,7 @@ def join_sbd(self, remote_user, peer_host): If so, check prerequisites of SBD and verify sbd device on join node ''' service_manager = ServiceManager() - if not os.path.exists(SYSCONFIG_SBD) or not service_manager.service_is_enabled(constants.SBD_SERVICE, peer_host): + if not os.path.exists(self.SYSCONFIG_SBD) or not service_manager.service_is_enabled(constants.SBD_SERVICE, peer_host): service_manager.disable_service(constants.SBD_SERVICE) return diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index d8738850ce..2db849a87e 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -13,7 +13,6 @@ from crmsh import xmlutil from crmsh import constants from crmsh.service_manager import ServiceManager -from crmsh.bootstrap import SYSCONFIG_SBD logger = logging.getLogger(__name__) @@ -170,10 +169,12 @@ def _show_sysconfig() -> None: ''' Show pure content of /etc/sysconfig/sbd ''' - with open(SYSCONFIG_SBD) as f: - content_list = [line.strip() for line in f.readlines() - if not line.startswith("#") - and line.strip()] + with open(sbd.SBDManager.SYSCONFIG_SBD) as f: + content_list = [ + line.strip() + for line in f.readlines() + if not line.startswith("#") and line.strip() + ] if content_list: logger.info("crm sbd configure show sysconfig") for line in content_list: @@ -434,7 +435,7 @@ def do_remove(self, context, *args) -> bool: changed_dev_list = set(self.device_list_from_config) - set(dev_list) # remove part of devices from config if changed_dev_list: - logger.info("Remove '%s' from %s", ";".join(dev_list), SYSCONFIG_SBD) + logger.info("Remove '%s' from %s", ";".join(dev_list), sbd.SBDManager.SYSCONFIG_SBD) sbd.SBDManager.update_sbd_configuration({"SBD_DEVICE": ";".join(changed_dev_list)}) # remove all devices, equivalent to stop sbd.service else: diff --git a/crmsh/watchdog.py b/crmsh/watchdog.py index 91a76a6103..cf21847eec 100644 --- a/crmsh/watchdog.py +++ b/crmsh/watchdog.py @@ -1,14 +1,15 @@ import re from . import utils from .constants import SSH_OPTION -from .bootstrap import invoke, invokerc, WATCHDOG_CFG, SYSCONFIG_SBD from .sh import ShellUtils +from . import sbd class Watchdog(object): """ Class to find valid watchdog device name """ + WATCHDOG_CFG = "/etc/modules-load.d/watchdog.conf" QUERY_CMD = "sudo sbd query-watchdog" DEVICE_FIND_REGREX = "\\[[0-9]+\\] (/dev/.*)\n.*\nDriver: (.*)" @@ -44,15 +45,15 @@ def _load_watchdog_driver(driver): """ Load specific watchdog driver """ - invoke("echo {} > {}".format(driver, WATCHDOG_CFG)) - invoke("systemctl restart systemd-modules-load") + ShellUtils().get_stdout_stderr(f"echo {driver} > {Watchdog.WATCHDOG_CFG}") + ShellUtils().get_stdout_stderr("systemctl restart systemd-modules-load") @staticmethod def get_watchdog_device_from_sbd_config(): """ Try to get watchdog device name from sbd config file """ - conf = utils.parse_sysconfig(SYSCONFIG_SBD) + conf = utils.parse_sysconfig(sbd.SBDManager.SYSCONFIG_SBD) return conf.get("SBD_WATCHDOG_DEV") @staticmethod @@ -144,7 +145,7 @@ def join_watchdog(self): res = self.get_watchdog_device_from_sbd_config() if not res: - utils.fatal("Failed to get watchdog device from {}".format(SYSCONFIG_SBD)) + utils.fatal("Failed to get watchdog device from {}".format(sbd.SBDManager.SYSCONFIG_SBD)) self._input = res if not self._valid_device(self._input): @@ -164,7 +165,8 @@ def init_watchdog(self): return # self._input is invalid, exit - if not invokerc("modinfo {}".format(self._input)): + rc, _, _ = ShellUtils().get_stdout_stderr(f"modinfo {self._input}") + if rc != 0: utils.fatal("Should provide valid watchdog device or driver name by -w option") # self._input is a driver name, load it if it was unloaded diff --git a/test/features/steps/step_implementation.py b/test/features/steps/step_implementation.py index 47b1383930..b87b52c31e 100644 --- a/test/features/steps/step_implementation.py +++ b/test/features/steps/step_implementation.py @@ -7,8 +7,9 @@ import behave from behave import given, when, then import behave_agent -from crmsh import corosync, sbd, userdir, bootstrap +from crmsh import corosync, userdir, bootstrap from crmsh import utils as crmutils +from crmsh import sbd from crmsh.sh import ShellUtils from utils import check_cluster_state, check_service_state, online, run_command, me, \ run_command_local_or_remote, file_in_archive, \ From 35685f2910704aec8228f9d9cc56709ce4087b89 Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 20 Sep 2024 09:49:29 +0800 Subject: [PATCH 14/36] Dev: report: Dump output of 'crm sbd configure show' and 'crm sbd status' to the report result --- crmsh/report/collect.py | 13 +++++++++---- crmsh/report/utils.py | 2 +- crmsh/utils.py | 8 ++++++++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/crmsh/report/collect.py b/crmsh/report/collect.py index 17823b4897..8d05b98ce2 100644 --- a/crmsh/report/collect.py +++ b/crmsh/report/collect.py @@ -378,11 +378,16 @@ def collect_sbd_info(context: core.Context) -> None: return sbd_f = os.path.join(context.work_dir, constants.SBD_F) - cmd = ". {};export SBD_DEVICE;{};{}".format(constants.SBDCONF, "sbd dump", "sbd list") + cmd_list = [ + f". {constants.SBDCONF};export SBD_DEVICE;sbd dump;sbd list", + "crm sbd configure show", + "crm sbd status" + ] with open(sbd_f, "w") as f: - f.write("\n\n#=====[ Command ] ==========================#\n") - f.write(f"# {cmd}\n") - f.write(utils.get_cmd_output(cmd)) + for cmd in cmd_list: + f.write("\n\n#=====[ Command ] ==========================#\n") + f.write(f"# {cmd}\n") + f.write(utils.get_cmd_output(cmd)) logger.debug(f"Dump SBD config file into {utils.real_path(sbd_f)}") diff --git a/crmsh/report/utils.py b/crmsh/report/utils.py index 4fadf307ff..b6ffe02ca3 100644 --- a/crmsh/report/utils.py +++ b/crmsh/report/utils.py @@ -758,7 +758,7 @@ def get_cmd_output(cmd: str, timeout: int = None) -> str: out_str += f"{out}\n" if err: out_str += f"{err}\n" - return out_str + return crmutils.strip_ansi_escape_sequences(out_str) def get_timespan_str(context: core.Context) -> str: diff --git a/crmsh/utils.py b/crmsh/utils.py index bad90364d2..6a253c0982 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -3212,4 +3212,12 @@ def is_subdict(sub_dict, main_dict): Check if sub_dict is a sub-dictionary of main_dict """ return all(item in main_dict.items() for item in sub_dict.items()) + + +def strip_ansi_escape_sequences(text): + """ + Remove ANSI escape sequences from text + """ + ansi_escape_pattern = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]') + return ansi_escape_pattern.sub('', text) # vim:ts=4:sw=4:et: From 6b5d7ebacfa2bcee5dc43a84a84a9c22cf342796 Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 20 Sep 2024 10:26:09 +0800 Subject: [PATCH 15/36] Dev: ui_sbd: No need to specify device="" when trying to modify properties under diskless sbd --- crmsh/ui_sbd.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 2db849a87e..f86c9a5dca 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -270,6 +270,12 @@ def _parse_args(self, args: tuple[str, ...]) -> dict[str, int|str|list[str]]: watchdog_device = parameter_dict.get("watchdog-device") parameter_dict["watchdog-device"] = watchdog.Watchdog.get_watchdog_device(watchdog_device) + # No need to specify device="" when trying to modify properties under diskless sbd + if sbd.SBDUtils.is_using_diskless_sbd() \ + and "device-list" in parameter_dict \ + and not parameter_dict["device-list"]: + parameter_dict.pop("device-list") + logger.debug("Parsed arguments: %s", parameter_dict) return parameter_dict From a2dc21aefaa2e12387ddd469f574cde1c77f8fd0 Mon Sep 17 00:00:00 2001 From: xin liang Date: Mon, 30 Sep 2024 14:41:52 +0800 Subject: [PATCH 16/36] Dev: ui_sbd: Add sbd device sub command --- crmsh/sbd.py | 16 +++++++- crmsh/ui_sbd.py | 100 +++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 100 insertions(+), 16 deletions(-) diff --git a/crmsh/sbd.py b/crmsh/sbd.py index c4eff0d7d2..e8e462a1a6 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -86,13 +86,25 @@ def get_sbd_device_from_config(): res = SBDUtils.get_sbd_value_from_config("SBD_DEVICE") return res.split(';') if res else [] + @staticmethod + def _is_sbd_running_with_device(): + if not ServiceManager().service_is_active(constants.SBD_SERVICE): + return False + return bool(SBDUtils.get_sbd_device_from_config()) + @staticmethod def is_using_diskless_sbd(): ''' Check if using diskless SBD ''' - dev_list = SBDUtils.get_sbd_device_from_config() - return not dev_list and ServiceManager().service_is_active(constants.SBD_SERVICE) + return not SBDUtils._is_sbd_running_with_device() + + @staticmethod + def is_using_disk_based_sbd(): + ''' + Check if using disk-based SBD + ''' + return SBDUtils._is_sbd_running_with_device() @staticmethod def has_sbd_device_already_initialized(dev) -> bool: diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index f86c9a5dca..ff8e90e5f6 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -18,16 +18,24 @@ logger = logging.getLogger(__name__) -def sbd_devices_completer(completed_list: typing.List[str]) -> typing.List[str]: +def sbd_device_completer(completed_list: typing.List[str]) -> typing.List[str]: ''' - completion for sbd devices + Completion for sbd device command ''' - if not ServiceManager().service_is_active(constants.SBD_SERVICE): + if not sbd.SBDUtils.is_using_disk_based_sbd(): return [] + if len(completed_list) == 2: + return ["add", "remove"] + if len(completed_list) > 2 and completed_list[1] != "remove": + return [] + + # completer for sbd device remove dev_list = sbd.SBDUtils.get_sbd_device_from_config() - if dev_list: - return [dev for dev in dev_list if dev not in completed_list] - return [] + not_complete_list = [dev for dev in dev_list if dev not in completed_list[2:]] + # not allow to remove the last device + if len(not_complete_list) == 1: + return [] + return not_complete_list def sbd_configure_completer(completed_list: typing.List[str]) -> typing.List[str]: @@ -51,8 +59,6 @@ def sbd_configure_completer(completed_list: typing.List[str]) -> typing.List[str return [t for t in show_types if t not in completed_list] else: return [] - if completed_list[-1] == "device=": - return [] timeout_types = SBD.TIMEOUT_TYPES if is_diskbased else SBD.DISKLESS_TIMEOUT_TYPES parameters_pool.extend([f"{t}-timeout=" for t in timeout_types]) @@ -63,11 +69,6 @@ def sbd_configure_completer(completed_list: typing.List[str]) -> typing.List[str if not any(c.startswith(p) for c in completed_list) ] - if is_diskbased: - dev_count = sum(1 for c in completed_list if c.startswith("device=")) - if dev_count < sbd.SBDManager.SBD_DEVICE_MAX: - parameters_pool.append("device=") - return parameters_pool @@ -384,6 +385,78 @@ def _configure_diskless(self, parameter_dict: dict): ) sbd_manager.init_and_deploy_sbd() + def _device_add(self, devices_to_add: typing.List[str]): + ''' + Implement sbd device add command, add devices to sbd configuration + ''' + all_device_list = self.device_list_from_config + devices_to_add + sbd.SBDUtils.verify_sbd_device(all_device_list) + + logger.info("Append devices: %s", ';'.join(devices_to_add)) + update_dict = {"SBD_DEVICE": ";".join(all_device_list)} + sbd_manager = sbd.SBDManager( + device_list_to_init=devices_to_add, + update_dict=update_dict, + timeout_dict=self.device_meta_dict_runtime + ) + sbd_manager.init_and_deploy_sbd() + + def _device_remove(self, devices_to_remove: typing.List[str]): + ''' + Implement sbd device remove command, remove devices from sbd configuration + ''' + for dev in devices_to_remove: + if dev not in self.device_list_from_config: + raise self.SyntaxError(f"Device {dev} is not in config") + # To keep the order of devices during removal + left_device_list = [dev for dev in self.device_list_from_config if dev not in devices_to_remove] + if len(left_device_list) == 0: + raise self.SyntaxError(f"Not allowed to remove all devices") + + logger.info("Remove devices: %s", ';'.join(devices_to_remove)) + update_dict = {"SBD_DEVICE": ";".join(left_device_list)} + sbd.SBDManager.update_sbd_configuration(update_dict) + logger.info('%s', self.RESTART_INFO) + + @command.completers_repeating(sbd_device_completer) + def do_device(self, context, *args) -> bool: + ''' + Implement sbd device command + ''' + if not ServiceManager().service_is_active(constants.PCMK_SERVICE): + logger.error("%s is not active", constants.PCMK_SERVICE) + return False + if not sbd.SBDUtils.is_using_disk_based_sbd(): + logger.error("Only works for disk-based SBD") + logger.info("Please use 'crm cluster init sbd -s [-s [-s ]]' to configure the disk-based SBD first") + return False + + try: + if not args: + raise self.SyntaxError("No argument") + if args[0] not in ("add", "remove"): + raise self.SyntaxError(f"Invalid argument: {args[0]}") + if len(args) < 2: + raise self.SyntaxError("No device specified") + + self._load_attributes() + logger.info("Configured sbd devices: %s", ';'.join(self.device_list_from_config)) + if len(args) == 2 and ";" in args[1]: + device_list_from_args = args[1].split(";") + else: + device_list_from_args = list(args[1:]) + match args[0]: + case "add": + self._device_add(device_list_from_args) + case "remove": + self._device_remove(device_list_from_args) + return True + + except self.SyntaxError as e: + logger.error('%s', e) + logger.info("Usage: crm sbd device ...") + return False + @command.completers_repeating(sbd_configure_completer) def do_configure(self, context, *args) -> bool: ''' @@ -417,7 +490,6 @@ def do_configure(self, context, *args) -> bool: print(self.configure_usage) return False - @command.completers_repeating(sbd_devices_completer) def do_remove(self, context, *args) -> bool: ''' Implement sbd remove command From fb4192734ed4e33e740a114e6279ca68a730c098 Mon Sep 17 00:00:00 2001 From: xin liang Date: Mon, 14 Oct 2024 15:19:47 +0800 Subject: [PATCH 17/36] Dev: ui_sbd: Replace sbd remove as sbd disable sub-command --- crmsh/ui_sbd.py | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index ff8e90e5f6..57befb606f 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -78,8 +78,9 @@ class SBD(command.UI): Includes commands: - sbd configure - - sbd remove + - sbd device - sbd status + - sbd disable ''' name = "sbd" TIMEOUT_TYPES = ("watchdog", "allocate", "loop", "msgwait") @@ -490,37 +491,14 @@ def do_configure(self, context, *args) -> bool: print(self.configure_usage) return False - def do_remove(self, context, *args) -> bool: + def do_disable(self, context) -> bool: ''' - Implement sbd remove command + Implement sbd disable command ''' - self._load_attributes() - - if not self.service_manager.service_is_active(constants.SBD_SERVICE): + if not ServiceManager().service_is_active(constants.SBD_SERVICE): logger.error("%s is not active", constants.SBD_SERVICE) return False - - parameter_dict = self._parse_args(args) - dev_list = parameter_dict.get("device-list", []) - if dev_list: - if not self.device_list_from_config: - logger.error("No sbd device found in config") - return False - for dev in dev_list: - if dev not in self.device_list_from_config: - logger.error("Device %s is not in config", dev) - return False - changed_dev_list = set(self.device_list_from_config) - set(dev_list) - # remove part of devices from config - if changed_dev_list: - logger.info("Remove '%s' from %s", ";".join(dev_list), sbd.SBDManager.SYSCONFIG_SBD) - sbd.SBDManager.update_sbd_configuration({"SBD_DEVICE": ";".join(changed_dev_list)}) - # remove all devices, equivalent to stop sbd.service - else: - sbd.disable_sbd_from_cluster() - else: - sbd.disable_sbd_from_cluster() - + sbd.disable_sbd_from_cluster() logger.info('%s', self.RESTART_INFO) return True From 8604f98f32b360a7c8aea1a6a85d9203f6c02e13 Mon Sep 17 00:00:00 2001 From: xin liang Date: Tue, 15 Oct 2024 09:36:00 +0800 Subject: [PATCH 18/36] Dev: ui_sbd: Adjust sbd confiure interface After adding sbd device interface to manage devices, related functionalities inside sbd configure interface should be adjusted --- crmsh/sbd.py | 18 +++--- crmsh/ui_sbd.py | 160 +++++++++++++++--------------------------------- crmsh/utils.py | 7 --- 3 files changed, 55 insertions(+), 130 deletions(-) diff --git a/crmsh/sbd.py b/crmsh/sbd.py index e8e462a1a6..8758b61060 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -86,25 +86,23 @@ def get_sbd_device_from_config(): res = SBDUtils.get_sbd_value_from_config("SBD_DEVICE") return res.split(';') if res else [] - @staticmethod - def _is_sbd_running_with_device(): - if not ServiceManager().service_is_active(constants.SBD_SERVICE): - return False - return bool(SBDUtils.get_sbd_device_from_config()) - @staticmethod def is_using_diskless_sbd(): ''' Check if using diskless SBD ''' - return not SBDUtils._is_sbd_running_with_device() + if not ServiceManager().service_is_active(constants.SBD_SERVICE): + return False + return not bool(SBDUtils.get_sbd_device_from_config()) @staticmethod def is_using_disk_based_sbd(): ''' Check if using disk-based SBD ''' - return SBDUtils._is_sbd_running_with_device() + if not ServiceManager().service_is_active(constants.SBD_SERVICE): + return False + return bool(SBDUtils.get_sbd_device_from_config()) @staticmethod def has_sbd_device_already_initialized(dev) -> bool: @@ -414,7 +412,6 @@ def __init__( timeout_dict: typing.Dict[str, int] | None = None, update_dict: typing.Dict[str, str] | None = None, no_overwrite_dev_map: typing.Dict[str, bool] | None = None, - new_config: bool = False, diskless_sbd: bool = False, bootstrap_context: 'bootstrap.Context | None' = None ): @@ -428,7 +425,6 @@ def __init__( self.cluster_is_running = ServiceManager().service_is_active(constants.PCMK_SERVICE) self.bootstrap_context = bootstrap_context self.no_overwrite_dev_map = no_overwrite_dev_map or {} - self.new_config = new_config # From bootstrap init or join process, override the values if self.bootstrap_context: @@ -464,7 +460,7 @@ def update_configuration(self) -> None: ''' if not self.update_dict: return - if (self.bootstrap_context and self.bootstrap_context.type == "init") or self.new_config: + if self.bootstrap_context and self.bootstrap_context.type == "init": utils.copy_local_file(self.SYSCONFIG_SBD_TEMPLATE, self.SYSCONFIG_SBD) for key, value in self.update_dict.items(): diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 57befb606f..7d91e79d79 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -42,25 +42,31 @@ def sbd_configure_completer(completed_list: typing.List[str]) -> typing.List[str ''' completion for sbd configure command ''' - if not ServiceManager().service_is_active(constants.PCMK_SERVICE): + service_manager = ServiceManager() + pcmk_is_active = service_manager.service_is_active(constants.PCMK_SERVICE) + sbd_is_active = service_manager.service_is_active(constants.SBD_SERVICE) + if not pcmk_is_active or not sbd_is_active: return [] - sbd_service_is_enabled = service_manager.service_is_enabled(constants.SBD_SERVICE) - dev_list = sbd.SBDUtils.get_sbd_device_from_config() - # Show disk-based sbd configure options - # if there are devices in config or sbd.service is not enabled - is_diskbased = bool(dev_list) or not sbd_service_is_enabled - parameters_pool = [] - if completed_list[1] == '': - parameters_pool = ["show"] - elif completed_list[1] == "show": + is_diskbased = sbd.SBDUtils.is_using_disk_based_sbd() + is_diskless = sbd.SBDUtils.is_using_diskless_sbd() + show_types, timeout_types = (), () + if is_diskbased: + show_types = SBD.SHOW_TYPES + timeout_types = SBD.TIMEOUT_TYPES + elif is_diskless: + show_types = SBD.DISKLESS_SHOW_TYPES + timeout_types = SBD.DISKLESS_TIMEOUT_TYPES + + if completed_list[1] == "show": if len(completed_list) == 3: - show_types = SBD.SHOW_TYPES if is_diskbased else SBD.DISKLESS_SHOW_TYPES return [t for t in show_types if t not in completed_list] else: return [] - timeout_types = SBD.TIMEOUT_TYPES if is_diskbased else SBD.DISKLESS_TIMEOUT_TYPES + parameters_pool = [] + if completed_list[1] == '': + parameters_pool = ["show"] parameters_pool.extend([f"{t}-timeout=" for t in timeout_types]) parameters_pool.append("watchdog-device=") parameters_pool = [ @@ -68,7 +74,6 @@ def sbd_configure_completer(completed_list: typing.List[str]) -> typing.List[str for p in parameters_pool if not any(c.startswith(p) for c in completed_list) ] - return parameters_pool @@ -96,15 +101,9 @@ class SBD(command.UI): "priority-fencing-delay", "pcmk_delay_max" ) - # a commom character class for matching device path - dev_char_class = r'[\w/\d;\-:.]' PARSE_RE = re.compile( - # Match "device" key with any value, including empty - fr'(device)=("[^"]*"|{dev_char_class}*)' - # Match other keys with non-empty values, capturing possible suffix - r'|(\w+)(?:-(\w+))?=("[^"]+"|[\w/\d;]+)' - # Match standalone device path - fr'|(/dev/{dev_char_class}+)' + # Match keys with non-empty values, capturing possible suffix + r'(\w+)(?:-(\w+))?=("[^"]+"|[\w/\d;]+)' ) class SyntaxError(Exception): @@ -153,18 +152,17 @@ def configure_usage(self) -> str: Build usage string for sbd configure command, including disk-based and diskless sbd cases ''' - def build_timeout_usage_str(timeout_types: tuple[str, ...]) -> str: - return " ".join([f"[{t}-timeout=]" for t in timeout_types]) - timeout_usage_str = build_timeout_usage_str(self.TIMEOUT_TYPES) - timeout_usage_str_diskless = build_timeout_usage_str(self.DISKLESS_TIMEOUT_TYPES) - show_usage_str = f"[{'|'.join(self.SHOW_TYPES)}]" - show_usage_str_diskless = f"[{'|'.join(self.DISKLESS_SHOW_TYPES)}]" - return ("Usage for disk-based SBD:\n" - f"crm sbd configure show {show_usage_str}\n" - f"crm sbd configure [device=]... {timeout_usage_str} [watchdog-device=]\n\n" - "Usage for diskless SBD:\n" - f"crm sbd configure show {show_usage_str_diskless}\n" - f"crm sbd configure device=\"\" {timeout_usage_str_diskless} [watchdog-device=]\n") + timeout_types, show_types = (), () + if sbd.SBDUtils.is_using_disk_based_sbd(): + timeout_types, show_types = self.TIMEOUT_TYPES, self.SHOW_TYPES + elif sbd.SBDUtils.is_using_diskless_sbd(): + timeout_types, show_types = self.DISKLESS_TIMEOUT_TYPES, self.DISKLESS_SHOW_TYPES + else: + return "" + + timeout_usage_str = " ".join([f"[{t}-timeout=]" for t in timeout_types]) + show_usage = f"crm sbd configure show [{'|'.join(show_types)}]" + return f"Usage:\n{show_usage}\ncrm sbd configure {timeout_usage_str} [watchdog-device=]\n" @staticmethod def _show_sysconfig() -> None: @@ -230,36 +228,19 @@ def _configure_show(self, args) -> None: print() self._show_property() - def _parse_args(self, args: tuple[str, ...]) -> dict[str, int|str|list[str]]: + def _parse_args(self, args: tuple[str, ...]) -> dict[str, int|str]: ''' Parse arguments and verify them - - Possible arguments format like: - device="/dev/sdb5;/dev/sda6" - device="" watchdog-timeout=10 - /dev/sda5 watchdog-timeout=10 watchdog-device=/dev/watchdog - device=/dev/sdb5 device=/dev/sda6 watchdog-timeout=10 msgwait-timeout=20 ''' - parameter_dict = {"device-list": []} + parameter_dict = {} for arg in args: match = self.PARSE_RE.match(arg) if not match: raise self.SyntaxError(f"Invalid argument: {arg}") - device_key, device_value, key, suffix, value, device_path = match.groups() - - # device= parameter - if device_key: - if device_value: - parameter_dict.setdefault("device-list", []).extend(device_value.split(";")) - # explicitly set empty value, stands for diskless sbd - elif not parameter_dict.get("device-list"): - parameter_dict.pop("device-list", None) - # standalone device parameter - elif device_path: - parameter_dict.setdefault("device-list", []).append(device_path) + key, suffix, value = match.groups() # timeout related parameters - elif key in self.TIMEOUT_TYPES and suffix and suffix == "timeout": + if key in self.TIMEOUT_TYPES and suffix and suffix == "timeout": if not value.isdigit(): raise self.SyntaxError(f"Invalid timeout value: {value}") parameter_dict[key] = int(value) @@ -272,12 +253,6 @@ def _parse_args(self, args: tuple[str, ...]) -> dict[str, int|str|list[str]]: watchdog_device = parameter_dict.get("watchdog-device") parameter_dict["watchdog-device"] = watchdog.Watchdog.get_watchdog_device(watchdog_device) - # No need to specify device="" when trying to modify properties under diskless sbd - if sbd.SBDUtils.is_using_diskless_sbd() \ - and "device-list" in parameter_dict \ - and not parameter_dict["device-list"]: - parameter_dict.pop("device-list") - logger.debug("Parsed arguments: %s", parameter_dict) return parameter_dict @@ -307,43 +282,11 @@ def _configure_diskbase(self, parameter_dict: dict): ''' Configure disk-based SBD based on input parameters and runtime config ''' - if not self.device_list_from_config: - self.watchdog_timeout_from_config = None - self.watchdog_device_from_config = None - update_dict = {} - device_list = parameter_dict.get("device-list", []) - if not device_list and not self.device_list_from_config: - raise self.SyntaxError("No device specified") - if len(device_list) > len(set(device_list)): - raise self.SyntaxError("Duplicate device") watchdog_device = parameter_dict.get("watchdog-device") if watchdog_device != self.watchdog_device_from_config: update_dict["SBD_WATCHDOG_DEV"] = watchdog_device timeout_dict = {k: v for k, v in parameter_dict.items() if k in self.TIMEOUT_TYPES} - - all_device_list = list( - dict.fromkeys(self.device_list_from_config + device_list) - ) - sbd.SBDUtils.verify_sbd_device(all_device_list) - - new_device_list = list( - set(device_list) - set(self.device_list_from_config) - ) - no_overwrite_dev_map : dict[str, bool] = { - dev: sbd.SBDUtils.no_overwrite_device_check(dev) for dev in new_device_list - } - if new_device_list: - update_dict["SBD_DEVICE"] = ";".join(all_device_list) - - device_list_to_init = [] - # initialize new devices only if no timeout parameter specified or timeout parameter is already in runtime config - if not timeout_dict or utils.is_subdict(timeout_dict, self.device_meta_dict_runtime): - device_list_to_init = new_device_list - # initialize all devices - else: - device_list_to_init = all_device_list - # merge runtime timeout dict with new timeout dict timeout_dict = self.device_meta_dict_runtime | timeout_dict # adjust watchdog and msgwait timeout @@ -353,11 +296,9 @@ def _configure_diskbase(self, parameter_dict: dict): update_dict["SBD_WATCHDOG_TIMEOUT"] = str(watchdog_timeout) sbd_manager = sbd.SBDManager( - device_list_to_init=device_list_to_init, + device_list_to_init=self.device_list_from_config, timeout_dict=timeout_dict, - update_dict=update_dict, - no_overwrite_dev_map=no_overwrite_dev_map, - new_config=False if self.device_list_from_config else True + update_dict=update_dict ) sbd_manager.init_and_deploy_sbd() @@ -365,11 +306,6 @@ def _configure_diskless(self, parameter_dict: dict): ''' Configure diskless SBD based on input parameters and runtime config ''' - if self.device_list_from_config: - self.watchdog_timeout_from_config = None - self.watchdog_device_from_config = None - sbd.clean_up_existing_sbd_resource() - update_dict = {} parameter_dict = self._adjust_timeout_dict(parameter_dict, diskless=True) watchdog_timeout = parameter_dict.get("watchdog") @@ -381,8 +317,7 @@ def _configure_diskless(self, parameter_dict: dict): sbd_manager = sbd.SBDManager( update_dict=update_dict, - diskless_sbd=True, - new_config=True if self.device_list_from_config else False + diskless_sbd=True ) sbd_manager.init_and_deploy_sbd() @@ -466,6 +401,11 @@ def do_configure(self, context, *args) -> bool: self._load_attributes() try: + for service in (constants.PCMK_SERVICE, constants.SBD_SERVICE): + if not self.service_manager.service_is_active(service): + logger.error("%s is not active", service) + return False + if not args: raise self.SyntaxError("No argument") @@ -473,22 +413,18 @@ def do_configure(self, context, *args) -> bool: self._configure_show(args) return True - if not self.service_manager.service_is_active(constants.PCMK_SERVICE): - logger.error("%s is not active", constants.PCMK_SERVICE) - return False - parameter_dict = self._parse_args(args) - # disk-based sbd case - if "device-list" in parameter_dict: + if sbd.SBDUtils.is_using_disk_based_sbd(): self._configure_diskbase(parameter_dict) - # diskless sbd case - else: + elif sbd.SBDUtils.is_using_diskless_sbd(): self._configure_diskless(parameter_dict) return True except self.SyntaxError as e: logger.error('%s', e) - print(self.configure_usage) + usage = self.configure_usage + if usage: + print(usage) return False def do_disable(self, context) -> bool: diff --git a/crmsh/utils.py b/crmsh/utils.py index 6a253c0982..4d6940e1dc 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -3207,13 +3207,6 @@ def cleanup_stonith_related_properties(): set_property("stonith-enabled", "false") -def is_subdict(sub_dict, main_dict): - """ - Check if sub_dict is a sub-dictionary of main_dict - """ - return all(item in main_dict.items() for item in sub_dict.items()) - - def strip_ansi_escape_sequences(text): """ Remove ANSI escape sequences from text From 8b3cc6e140b0bf7a8818d538323197e5a98b9541 Mon Sep 17 00:00:00 2001 From: xin liang Date: Thu, 17 Oct 2024 11:07:33 +0800 Subject: [PATCH 19/36] Dev: ui_sbd: Check if the adding device is already initialized and make sure the metadata is consistent between devices. --- crmsh/sbd.py | 128 +++++++++++++++++++++++++++++++----------------- crmsh/ui_sbd.py | 7 ++- 2 files changed, 90 insertions(+), 45 deletions(-) diff --git a/crmsh/sbd.py b/crmsh/sbd.py index 8758b61060..6ba4684b76 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -120,7 +120,7 @@ def no_overwrite_device_check(dev) -> bool: ''' initialized = SBDUtils.has_sbd_device_already_initialized(dev) return initialized and \ - not bootstrap.confirm(f"{dev} has already been initialized by SBD, do you want to overwrite it?") + not bootstrap.confirm(f"{dev} has already been initialized by SBD - overwrite?") @staticmethod def check_devices_metadata_consistent(dev_list) -> bool: @@ -131,12 +131,42 @@ def check_devices_metadata_consistent(dev_list) -> bool: if len(dev_list) < 2: return consistent first_dev_metadata = SBDUtils.get_sbd_device_metadata(dev_list[0], timeout_only=True) + if not first_dev_metadata: + logger.warning(f"Cannot get metadata for {dev_list[0]}") + return False for dev in dev_list[1:]: - if SBDUtils.get_sbd_device_metadata(dev, timeout_only=True) != first_dev_metadata: + this_dev_metadata = SBDUtils.get_sbd_device_metadata(dev, timeout_only=True) + if not this_dev_metadata: + logger.warning(f"Cannot get metadata for {dev}") + return False + if this_dev_metadata != first_dev_metadata: logger.warning(f"Device {dev} doesn't have the same metadata as {dev_list[0]}") consistent = False return consistent + @staticmethod + def handle_input_sbd_devices(dev_list, dev_list_from_config=None): + ''' + Given a list of devices, split them into two lists: + - overwrite_list: devices that need to be overwritten + - no_overwrite_list: devices that don't need to be overwritten + + Raise TerminateSubCommand if the metadata of no_overwrite_list is not consistent + ''' + no_overwrite_list = dev_list_from_config or [] + overwrite_list = [] + + for dev in dev_list: + if SBDUtils.no_overwrite_device_check(dev): + no_overwrite_list.append(dev) + else: + overwrite_list.append(dev) + + if no_overwrite_list and not SBDUtils.check_devices_metadata_consistent(no_overwrite_list): + raise utils.TerminateSubCommand + + return overwrite_list, no_overwrite_list + class SBDTimeout(object): ''' @@ -406,12 +436,14 @@ class SBDManager: SBD_RA_ID = "stonith-sbd" SBD_DEVICE_MAX = 3 + class NotConfigSBD(Exception): + pass + def __init__( self, device_list_to_init: typing.List[str] | None = None, timeout_dict: typing.Dict[str, int] | None = None, update_dict: typing.Dict[str, str] | None = None, - no_overwrite_dev_map: typing.Dict[str, bool] | None = None, diskless_sbd: bool = False, bootstrap_context: 'bootstrap.Context | None' = None ): @@ -424,23 +456,24 @@ def __init__( self.diskless_sbd = diskless_sbd self.cluster_is_running = ServiceManager().service_is_active(constants.PCMK_SERVICE) self.bootstrap_context = bootstrap_context - self.no_overwrite_dev_map = no_overwrite_dev_map or {} + self.overwrite_sysconfig = False # From bootstrap init or join process, override the values if self.bootstrap_context: - self.device_list_to_init = self.bootstrap_context.sbd_devices + self.overwrite_sysconfig = self.bootstrap_context.type == "init" self.diskless_sbd = self.bootstrap_context.diskless_sbd self.cluster_is_running = self.bootstrap_context.cluster_is_running def _load_attributes_from_bootstrap(self): - if not self.bootstrap_context: + if not self.bootstrap_context or not self.overwrite_sysconfig: return - timeout_inst = SBDTimeout(self.bootstrap_context) - timeout_inst.initialize_timeout() - self.timeout_dict["watchdog"] = timeout_inst.sbd_watchdog_timeout - if not self.diskless_sbd: - self.timeout_dict["msgwait"] = timeout_inst.sbd_msgwait - self.update_dict["SBD_WATCHDOG_TIMEOUT"] = str(timeout_inst.sbd_watchdog_timeout) + if not self.timeout_dict: + timeout_inst = SBDTimeout(self.bootstrap_context) + timeout_inst.initialize_timeout() + self.timeout_dict["watchdog"] = timeout_inst.sbd_watchdog_timeout + if not self.diskless_sbd: + self.timeout_dict["msgwait"] = timeout_inst.sbd_msgwait + self.update_dict["SBD_WATCHDOG_TIMEOUT"] = str(timeout_inst.sbd_watchdog_timeout) self.update_dict["SBD_WATCHDOG_DEV"] = watchdog.Watchdog.get_watchdog_device(self.bootstrap_context.watchdog) @staticmethod @@ -460,7 +493,7 @@ def update_configuration(self) -> None: ''' if not self.update_dict: return - if self.bootstrap_context and self.bootstrap_context.type == "init": + if self.overwrite_sysconfig: utils.copy_local_file(self.SYSCONFIG_SBD_TEMPLATE, self.SYSCONFIG_SBD) for key, value in self.update_dict.items(): @@ -479,22 +512,19 @@ def initialize_sbd(self): logger.info("Configuring diskless SBD") self._warn_diskless_sbd() return - elif not all(self.no_overwrite_dev_map.values()): + elif self.device_list_to_init: logger.info("Configuring disk-based SBD") + else: + return opt_str = SBDManager.convert_timeout_dict_to_opt_str(self.timeout_dict) shell = sh.cluster_shell() for dev in self.device_list_to_init: - # skip if device already initialized and not overwrite - if dev in self.no_overwrite_dev_map and self.no_overwrite_dev_map[dev]: - continue logger.info("Initializing SBD device %s", dev) cmd = f"sbd {opt_str} -d {dev} create" logger.debug("Running command: %s", cmd) shell.get_stdout_or_raise_error(cmd) - SBDUtils.check_devices_metadata_consistent(self.device_list_to_init) - @staticmethod def enable_sbd_service(): cluster_nodes = utils.list_cluster_nodes() or [utils.this_node()] @@ -547,20 +577,22 @@ def get_sbd_device_interactive(self): ''' if self.bootstrap_context.yes_to_all: logger.warning('%s', self.NO_SBD_WARNING) - return + raise self.NotConfigSBD logger.info(self.SBD_STATUS_DESCRIPTION) if not bootstrap.confirm("Do you wish to use SBD?"): logger.warning('%s', self.NO_SBD_WARNING) - return - + raise self.NotConfigSBD if not utils.package_is_installed("sbd"): utils.fatal(self.SBD_NOT_INSTALLED_MSG) configured_devices = SBDUtils.get_sbd_device_from_config() - for dev in configured_devices: - self.no_overwrite_dev_map[dev] = SBDUtils.no_overwrite_device_check(dev) - if self.no_overwrite_dev_map and all(self.no_overwrite_dev_map.values()): - return configured_devices + if configured_devices: + wants_to_overwrite_msg = f"SBD_DEVICE in {self.SYSCONFIG_SBD} is already configured to use '{';'.join(configured_devices)}' - overwrite?" + if not bootstrap.confirm(wants_to_overwrite_msg): + if not SBDUtils.check_devices_metadata_consistent(configured_devices): + raise utils.TerminateSubCommand + self.overwrite_sysconfig = False + return dev_list = [] dev_looks_sane = False @@ -577,18 +609,16 @@ def get_sbd_device_interactive(self): logger.error('%s', e) continue for dev in dev_list: - if dev not in self.no_overwrite_dev_map: - self.no_overwrite_dev_map[dev] = SBDUtils.no_overwrite_device_check(dev) - if self.no_overwrite_dev_map[dev]: - if dev == dev_list[-1]: - return dev_list - continue - logger.warning("All data on %s will be destroyed", dev) - if bootstrap.confirm('Are you sure you wish to use this device?'): + if SBDUtils.has_sbd_device_already_initialized(dev): dev_looks_sane = True + continue else: - dev_looks_sane = False - break + logger.warning("All data on %s will be destroyed", dev) + if bootstrap.confirm('Are you sure you wish to use this device?'): + dev_looks_sane = True + else: + dev_looks_sane = False + break return dev_list @@ -598,12 +628,21 @@ def get_sbd_device_from_bootstrap(self): -s is for disk-based sbd -S is for diskless sbd ''' - # specified sbd device with -s option - if self.device_list_to_init: - self.update_dict["SBD_DEVICE"] = ';'.join(self.device_list_to_init) - # no -s and no -S option - elif not self.diskless_sbd: - self.device_list_to_init = self.get_sbd_device_interactive() + # if specified sbd device with -s option + device_list = self.bootstrap_context.sbd_devices + # else if not use -S option, get sbd device interactively + if not device_list and not self.bootstrap_context.diskless_sbd: + device_list = self.get_sbd_device_interactive() + if not device_list: + return + + # get two lists of devices, one for overwrite, one for no overwrite with consistent metadata + overwrite_list, no_overwrite_list = SBDUtils.handle_input_sbd_devices(device_list) + self.device_list_to_init = overwrite_list + # if no_overwrite_list is not empty, get timeout metadata from the first device + if no_overwrite_list: + self.timeout_dict = SBDUtils.get_sbd_device_metadata(no_overwrite_list[0], timeout_only=True) + self.update_dict["SBD_DEVICE"] = ';'.join(device_list) def init_and_deploy_sbd(self): ''' @@ -615,8 +654,9 @@ def init_and_deploy_sbd(self): 5. Configure stonith-sbd resource and related properties ''' if self.bootstrap_context: - self.get_sbd_device_from_bootstrap() - if not self.device_list_to_init and not self.diskless_sbd: + try: + self.get_sbd_device_from_bootstrap() + except self.NotConfigSBD: ServiceManager().disable_service(constants.SBD_SERVICE) return self._load_attributes_from_bootstrap() diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 7d91e79d79..52261bce09 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -328,10 +328,15 @@ def _device_add(self, devices_to_add: typing.List[str]): all_device_list = self.device_list_from_config + devices_to_add sbd.SBDUtils.verify_sbd_device(all_device_list) + devices_to_init, _ = sbd.SBDUtils.handle_input_sbd_devices( + devices_to_add, + self.device_list_from_config + ) + logger.info("Append devices: %s", ';'.join(devices_to_add)) update_dict = {"SBD_DEVICE": ";".join(all_device_list)} sbd_manager = sbd.SBDManager( - device_list_to_init=devices_to_add, + device_list_to_init=devices_to_init, update_dict=update_dict, timeout_dict=self.device_meta_dict_runtime ) From 820e4abdcb89a046221da6388e8ba70ceab06589 Mon Sep 17 00:00:00 2001 From: xin liang Date: Wed, 23 Oct 2024 15:32:59 +0800 Subject: [PATCH 20/36] Dev: bootstrap: Add a log info when starting pacemaker.service Add a log message to indicate the start of pacemaker.service. This helps users understand that the system is not hanging but is actually starting pacemaker, especially when SBD_DELAY_START is set and it takes longer to start pacemaker. --- crmsh/bootstrap.py | 1 + 1 file changed, 1 insertion(+) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 532eeceb85..453cf1cd6d 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -793,6 +793,7 @@ def start_pacemaker(node_list=[], enable_flag=False): except ValueError as err: node_list.remove(node) logger.error(err) + logger.info("Starting %s on %s", constants.PCMK_SERVICE, ', '.join(node_list) or utils.this_node()) return service_manager.start_service("pacemaker.service", enable=enable_flag, node_list=node_list) From 60c7b3f5cdc783b8af5187de0f34a873176b5526 Mon Sep 17 00:00:00 2001 From: xin liang Date: Tue, 29 Oct 2024 09:20:51 +0800 Subject: [PATCH 21/36] Dev: ui_sbd: Reuse sbd.SBDManager.restart_cluster_if_possible to avoid duplicate info message. --- crmsh/ui_sbd.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 52261bce09..d44eeee754 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -92,7 +92,6 @@ class SBD(command.UI): DISKLESS_TIMEOUT_TYPES = ("watchdog",) SHOW_TYPES = ("disk_metadata", "sysconfig", "property") DISKLESS_SHOW_TYPES = ("sysconfig", "property") - RESTART_INFO = "Requires to restart cluster service to take effect" PCMK_ATTRS = ( "have-watchdog", "stonith-timeout", @@ -357,7 +356,7 @@ def _device_remove(self, devices_to_remove: typing.List[str]): logger.info("Remove devices: %s", ';'.join(devices_to_remove)) update_dict = {"SBD_DEVICE": ";".join(left_device_list)} sbd.SBDManager.update_sbd_configuration(update_dict) - logger.info('%s', self.RESTART_INFO) + sbd.SBDManager.restart_cluster_if_possible() @command.completers_repeating(sbd_device_completer) def do_device(self, context, *args) -> bool: @@ -440,7 +439,7 @@ def do_disable(self, context) -> bool: logger.error("%s is not active", constants.SBD_SERVICE) return False sbd.disable_sbd_from_cluster() - logger.info('%s', self.RESTART_INFO) + sbd.SBDManager.restart_cluster_if_possible() return True def _print_sbd_type(self): From 9f1b89a2b03608b5c163c68108d45cc844bd7d81 Mon Sep 17 00:00:00 2001 From: xin liang Date: Tue, 29 Oct 2024 09:34:09 +0800 Subject: [PATCH 22/36] Dev: ui_sbd: Check if node is reachable when getting the node list --- crmsh/ui_sbd.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index d44eeee754..8d010b5729 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -134,6 +134,12 @@ def _load_attributes(self): self.service_manager = ServiceManager() self.cluster_shell = sh.cluster_shell() self.cluster_nodes = utils.list_cluster_nodes() or [utils.this_node()] + for node in self.cluster_nodes[:]: + try: + utils.node_reachable_check(node) + except Exception as e: + logger.error(e) + self.cluster_nodes.remove(node) self.crm_mon_xml_parser = xmlutil.CrmMonXmlParser() def requires(self) -> bool: @@ -351,7 +357,7 @@ def _device_remove(self, devices_to_remove: typing.List[str]): # To keep the order of devices during removal left_device_list = [dev for dev in self.device_list_from_config if dev not in devices_to_remove] if len(left_device_list) == 0: - raise self.SyntaxError(f"Not allowed to remove all devices") + raise self.SyntaxError("Not allowed to remove all devices") logger.info("Remove devices: %s", ';'.join(devices_to_remove)) update_dict = {"SBD_DEVICE": ";".join(left_device_list)} @@ -474,9 +480,9 @@ def _print_sbd_status(self): def _print_watchdog_info(self): padding = 2 max_node_len = max(len(node) for node in self.cluster_nodes) + padding - - watchdog_sbd_re = "\[[0-9]+\] (/dev/.*)\nIdentity: Busy: .*sbd.*\nDriver: (.*)" + watchdog_sbd_re = r"\[[0-9]+\] (/dev/.*)\nIdentity: Busy: .*sbd.*\nDriver: (.*)" device_list, driver_list, kernel_timeout_list = [], [], [] + cluster_nodes = self.cluster_nodes[:] for node in cluster_nodes[:]: out = self.cluster_shell.get_stdout_or_raise_error("sbd query-watchdog", node) From 71fcc4c56543658e49190ab503854a0a577df834 Mon Sep 17 00:00:00 2001 From: xin liang Date: Tue, 29 Oct 2024 09:44:14 +0800 Subject: [PATCH 23/36] Dev: sbd: Move constants.SHOW_SBD_START_TIMEOUT_CMD to sbd.py --- crmsh/constants.py | 2 -- crmsh/sbd.py | 3 ++- crmsh/ui_sbd.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/crmsh/constants.py b/crmsh/constants.py index 0106e29bea..89686aa83d 100644 --- a/crmsh/constants.py +++ b/crmsh/constants.py @@ -453,6 +453,4 @@ PCMK_SERVICE = "pacemaker.service" SBD_SERVICE = "sbd.service" - -SHOW_SBD_START_TIMEOUT_CMD = "systemctl show -p TimeoutStartUSec sbd.service --value" # vim:ts=4:sw=4:et: diff --git a/crmsh/sbd.py b/crmsh/sbd.py index 6ba4684b76..0940e93af0 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -177,6 +177,7 @@ class SBDTimeout(object): SBD_WATCHDOG_TIMEOUT_DEFAULT_S390 = 15 SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE = 35 QDEVICE_SYNC_TIMEOUT_MARGIN = 5 + SHOW_SBD_START_TIMEOUT_CMD = "systemctl show -p TimeoutStartUSec sbd.service --value" def __init__(self, context=None): ''' @@ -363,7 +364,7 @@ def is_sbd_delay_start(): @staticmethod def get_sbd_systemd_start_timeout() -> int: - out = sh.cluster_shell().get_stdout_or_raise_error(constants.SHOW_SBD_START_TIMEOUT_CMD) + out = sh.cluster_shell().get_stdout_or_raise_error(SBDTimeout.SHOW_SBD_START_TIMEOUT_CMD) return utils.get_systemd_timeout_start_in_sec(out) def adjust_systemd_start_timeout(self): diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 8d010b5729..444dd15fc7 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -208,7 +208,7 @@ def _show_property(self) -> None: print(f"{match[0]}={match[1]}") print() - logger.info('%s', constants.SHOW_SBD_START_TIMEOUT_CMD) + logger.info('%s', sbd.SBDTimeout.SHOW_SBD_START_TIMEOUT_CMD) systemd_start_timeout = sbd.SBDTimeout.get_sbd_systemd_start_timeout() print(f"TimeoutStartUSec={systemd_start_timeout}") From 6aa521a7c5eab6577d6d17f81fa08615586d4b9d Mon Sep 17 00:00:00 2001 From: xin liang Date: Tue, 29 Oct 2024 10:04:46 +0800 Subject: [PATCH 24/36] Dev: sh: Add get_rc_output_without_input in ClusterShell to redirect stderr to stdout. --- crmsh/sh.py | 8 ++++++++ crmsh/ui_sbd.py | 8 +++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/crmsh/sh.py b/crmsh/sh.py index 75e3b2b281..c4f537eb37 100644 --- a/crmsh/sh.py +++ b/crmsh/sh.py @@ -384,6 +384,14 @@ def get_rc_stdout_stderr_without_input(self, host, cmd) -> typing.Tuple[int, str rc, stdout, stderr = self.get_rc_stdout_stderr_raw_without_input(host, cmd) return rc, Utils.decode_str(stdout).strip(), Utils.decode_str(stderr).strip() + def get_rc_output_without_input(self, host, cmd) -> typing.Tuple[int, str]: + result = self.subprocess_run_without_input( + host, None, cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT + ) + return result.returncode, Utils.decode_str(result.stdout).strip() + def get_stdout_or_raise_error( self, cmd: str, diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 444dd15fc7..a58a925dd1 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -512,11 +512,9 @@ def _print_sbd_agent_status(self): print("# Status of fence_sbd:") sbd_id_list = self.crm_mon_xml_parser.get_resource_id_list_via_type(sbd.SBDManager.SBD_RA) for sbd_id in sbd_id_list: - rc, out, err = self.cluster_shell.get_rc_stdout_stderr_without_input(None, f"crm resource status {sbd_id}") - if out: - print(out) - if err: - print(err) + rc, output = self.cluster_shell.get_rc_output_without_input(None, f"crm resource status {sbd_id}") + if output: + print(output) def do_status(self, context) -> bool: ''' From 0705d0dd5024f633aabf257ca6a6ba8aa38d755e Mon Sep 17 00:00:00 2001 From: xin liang Date: Wed, 13 Nov 2024 14:20:18 +0800 Subject: [PATCH 25/36] Dev: ui_sbd: Replace 'sbd disable' as 'sbd purge' And the `sbd purge` command will also move /etc/sysconfig/sbd to /etc/sysconfig/sbd.bak on all nodes. --- crmsh/sbd.py | 18 +++++++----------- crmsh/ui_sbd.py | 34 ++++++++++++++++------------------ 2 files changed, 23 insertions(+), 29 deletions(-) diff --git a/crmsh/sbd.py b/crmsh/sbd.py index 0940e93af0..9402ea4d9f 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -707,20 +707,12 @@ def clean_up_existing_sbd_resource(): utils.ext_cmd("crm configure delete {}".format(' '.join(sbd_id_list))) -def enable_sbd_on_cluster(): - cluster_nodes = utils.list_cluster_nodes() - service_manager = ServiceManager() - for node in cluster_nodes: - if not service_manager.service_is_enabled(constants.SBD_SERVICE, node): - logger.info("Enable %s on node %s", constants.SBD_SERVICE, node) - service_manager.enable_service(constants.SBD_SERVICE, node) - - -def disable_sbd_from_cluster(): +def purge_sbd_from_cluster(): ''' - Disable SBD from cluster, the process includes: + Purge SBD from cluster, the process includes: - stop and remove sbd agent - disable sbd.service + - move /etc/sysconfig/sbd to /etc/sysconfig/sbd.bak - adjust cluster attributes - adjust related timeout values ''' @@ -733,6 +725,10 @@ def disable_sbd_from_cluster(): logger.info("Disable %s on node %s", constants.SBD_SERVICE, node) service_manager.disable_service(constants.SBD_SERVICE, node) + config_bak = f"{SBDManager.SYSCONFIG_SBD}.bak" + logger.info("Move %s to %s on all nodes", SBDManager.SYSCONFIG_SBD, config_bak) + utils.cluster_run_cmd(f"mv {SBDManager.SYSCONFIG_SBD} {config_bak}") + out = sh.cluster_shell().get_stdout_or_raise_error("stonith_admin -L") res = re.search("([0-9]+) fence device[s]* found", out) # after disable sbd.service, check if sbd is the last stonith device diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index a58a925dd1..d8b24b6bc4 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -85,7 +85,7 @@ class SBD(command.UI): - sbd configure - sbd device - sbd status - - sbd disable + - sbd purge ''' name = "sbd" TIMEOUT_TYPES = ("watchdog", "allocate", "loop", "msgwait") @@ -118,6 +118,7 @@ def __init__(self): self.cluster_nodes: list[str] = None self.crm_mon_xml_parser: xmlutil.CrmMonXmlParser = None + self._load_attributes() command.UI.__init__(self) def _load_attributes(self): @@ -151,6 +152,12 @@ def requires(self) -> bool: return False return True + def service_is_active(self, service: str) -> bool: + if not self.service_manager.service_is_active(service): + logger.error("%s is not active", service) + return False + return True + @property def configure_usage(self) -> str: ''' @@ -369,8 +376,7 @@ def do_device(self, context, *args) -> bool: ''' Implement sbd device command ''' - if not ServiceManager().service_is_active(constants.PCMK_SERVICE): - logger.error("%s is not active", constants.PCMK_SERVICE) + if not self.service_is_active(constants.PCMK_SERVICE): return False if not sbd.SBDUtils.is_using_disk_based_sbd(): logger.error("Only works for disk-based SBD") @@ -385,7 +391,6 @@ def do_device(self, context, *args) -> bool: if len(args) < 2: raise self.SyntaxError("No device specified") - self._load_attributes() logger.info("Configured sbd devices: %s", ';'.join(self.device_list_from_config)) if len(args) == 2 and ";" in args[1]: device_list_from_args = args[1].split(";") @@ -399,7 +404,7 @@ def do_device(self, context, *args) -> bool: return True except self.SyntaxError as e: - logger.error('%s', e) + logger.error('%s', str(e)) logger.info("Usage: crm sbd device ...") return False @@ -408,21 +413,16 @@ def do_configure(self, context, *args) -> bool: ''' Implement sbd configure command ''' - self._load_attributes() - try: for service in (constants.PCMK_SERVICE, constants.SBD_SERVICE): - if not self.service_manager.service_is_active(service): - logger.error("%s is not active", service) + if not self.service_is_active(service): return False - if not args: raise self.SyntaxError("No argument") if args[0] == "show": self._configure_show(args) return True - parameter_dict = self._parse_args(args) if sbd.SBDUtils.is_using_disk_based_sbd(): self._configure_diskbase(parameter_dict) @@ -431,20 +431,19 @@ def do_configure(self, context, *args) -> bool: return True except self.SyntaxError as e: - logger.error('%s', e) + logger.error('%s', str(e)) usage = self.configure_usage if usage: print(usage) return False - def do_disable(self, context) -> bool: + def do_purge(self, context) -> bool: ''' - Implement sbd disable command + Implement sbd purge command ''' - if not ServiceManager().service_is_active(constants.SBD_SERVICE): - logger.error("%s is not active", constants.SBD_SERVICE) + if not self.service_is_active(constants.SBD_SERVICE): return False - sbd.disable_sbd_from_cluster() + sbd.purge_sbd_from_cluster() sbd.SBDManager.restart_cluster_if_possible() return True @@ -520,7 +519,6 @@ def do_status(self, context) -> bool: ''' Implement sbd status command ''' - self._load_attributes() self._print_sbd_type() self._print_sbd_status() self._print_watchdog_info() From 9b7f165b135d7f8794e0e8c68a42738ca8c93e4b Mon Sep 17 00:00:00 2001 From: xin liang Date: Wed, 11 Sep 2024 10:46:19 +0800 Subject: [PATCH 26/36] Dev: doc: Upadate crm.8.adoc for SBD help text --- doc/crm.8.adoc | 48 ++++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/doc/crm.8.adoc b/doc/crm.8.adoc index e887259fae..220dace365 100644 --- a/doc/crm.8.adoc +++ b/doc/crm.8.adoc @@ -2107,7 +2107,10 @@ utilization xen1 set memory 4096 [[cmdhelp.sbd,SBD management]] === `sbd` - SBD management -This level is for managing the SBD (STONITH Block Device) daemon. +This level displays the real-time SBD status and the static SBD configuration. +Additionally, it manages the configuration file for both disk-based and diskless SBD scenarios, +as well as the on-disk metadata for the disk-based scenario. +Currently, SBD management requires a running cluster. [[cmdhelp.sbd.configure,configure SBD]] ==== `configure` @@ -2118,9 +2121,7 @@ Main functionailities include: - Show configured disk metadata - Show contents of /etc/sysconfig/sbd - Show SBD related cluster properties -- Newly setup SBD configuration on a running cluster -- Update the existing parameters -- Add more devices to the existing disk-based SBD configuration +- Update the SBD related configuration parameters For more details on SBD and related parameters, please see man sbd(8). @@ -2128,11 +2129,11 @@ Usage: ............... # For disk-based SBD crm sbd configure show [disk_metadata|sysconfig|property] -crm sbd configure [device=]... [watchdog-device=] [watchdog-timeout=] [allocate-timeout=] [loop-timeout=] [msgwait-timeout=] +crm sbd configure [watchdog-timeout=] [allocate-timeout=] [loop-timeout=] [msgwait-timeout=] [watchdog-device=] # For disk-less SBD crm sbd configure show [sysconfig|property] -crm sbd configure device="" [watchdog-device=] [watchdog-timeout=] +crm sbd configure [watchdog-timeout=] [watchdog-device=] ............... example: @@ -2141,32 +2142,43 @@ configure show configure show disk_metadata configure show sysconfig configure show property -configure device="/dev/sdb1;/dev/sdb2" -configure device=/dev/sdb1 device=/dev/sdb2 -configure device=/dev/sdb1 watchdog-timeout=30 msgwait-timeout=60 -configure device="" watchdog-timeout=30 +configure watchdog-timeout=30 ............... -[[cmdhelp.sbd.status,show SBD status]] +[[cmdhelp.sbd.device,add or remove SBD device]] +==== `device` + +Add or remove SBD device(s) from the existing SBD configuration. + +example: +............... +device add /dev/sdb5 +device add /dev/sdb5 /dev/sdb6 +device add "/dev/sda5;/dev/sda6" +device remove /dev/sdb5 +............... + +[[cmdhelp.sbd.status,show SBD runtime status]] ==== `status` -Show the status of the SBD daemon. +Show the runtime status of the SBD daemon and +the other information of those SBD related components, +ie. watchdog, fence agent. Usage: ............... status ............... -[[cmdhelp.sbd.remove,remove SBD configuration]] -==== `remove` +[[cmdhelp.sbd.purge,purge SBD from cluster]] +==== `purge` -Remove part of devices from the SBD configuration, or remove SBD -service from the cluster. +Disable the systemd sbd.service on all cluster nodes, +move the sbd sysconfig to .bak and adjust SBD related cluster properties. Usage: ............... -remove -remove [ ...] +purge ............... [[cmdhelp.node,Node management]] From e20f2b41da142389f5e009ff1c9d448486496a97 Mon Sep 17 00:00:00 2001 From: xin liang Date: Tue, 16 Jul 2024 20:12:27 +0800 Subject: [PATCH 27/36] Dev: behave: Add sbd_ui.feature to test the crm sbd UI --- data-manifest | 1 + test/features/sbd_ui.feature | 100 +++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 test/features/sbd_ui.feature diff --git a/data-manifest b/data-manifest index 7210be6b55..54ee89dd44 100644 --- a/data-manifest +++ b/data-manifest @@ -88,6 +88,7 @@ test/features/qdevice_usercase.feature test/features/qdevice_validate.feature test/features/resource_failcount.feature test/features/resource_set.feature +test/features/sbd_ui.feature test/features/ssh_agent.feature test/features/steps/behave_agent.py test/features/steps/const.py diff --git a/test/features/sbd_ui.feature b/test/features/sbd_ui.feature new file mode 100644 index 0000000000..23dd7eec21 --- /dev/null +++ b/test/features/sbd_ui.feature @@ -0,0 +1,100 @@ +@sbd +Feature: crm sbd ui test cases + + Tag @clean means need to stop cluster service if the service is available + + @clean + Scenario: Syntax check for crm sbd + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + Given Has disk "/dev/sda5" on "hanode1" + Given Has disk "/dev/sda6" on "hanode1" + Given Has disk "/dev/sda7" on "hanode1" + Given Has disk "/dev/sda8" on "hanode1" + Given Has disk "/dev/sda5" on "hanode2" + Given Has disk "/dev/sda6" on "hanode2" + Given Has disk "/dev/sda7" on "hanode2" + Given Has disk "/dev/sda8" on "hanode2" + When Try "crm sbd configure watchdog-timeout=30" + Then Except "ERROR: pacemaker.service is not active" + When Run "crm cluster init -y" on "hanode1" + And Run "crm cluster join -c hanode1 -y" on "hanode2" + And Try "crm sbd configure watchdog-timeout=30" + Then Except "ERROR: sbd.service is not active" + When Run "crm cluster init sbd -s /dev/sda5 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode1" + And Resource "stonith-sbd" type "fence_sbd" is "Started" + + When Try "crm sbd configure show sysconfig xxx" + Then Except "ERROR: Invalid argument" + When Try "crm sbd configure show testing" + Then Except "ERROR: Unknown argument: testing" + When Try "crm sbd configure" + Then Except "ERROR: No argument" + When Try "crm sbd configure testing" + Then Except "ERROR: Invalid argument: testing" + When Try "crm sbd configure watchdog-timeout=f" + Then Except "ERROR: Invalid timeout value: f" + When Try "crm sbd configure name=testing" + Then Except "ERROR: Unknown argument: name=testing" + When Try "crm sbd device add /dev/sda6 /dev/sda6" + Then Expected "Duplicated device path detected" in stderr + When Try "crm sbd device add /dev/sda6 /dev/sda7 /dev/sda8" + Then Expected "Maximum number of SBD device is 3" in stderr + + Scenario: sbd configure for diskbased sbd + # Update disk metadata + When Run "crm sbd configure watchdog-timeout=30 msgwait-timeout=60" on "hanode1" + Then Run "crm sbd configure show disk_metadata|grep -E "watchdog.*30"" OK + Then Run "crm sbd configure show disk_metadata|grep -E "msgwait.*60"" OK + + Scenario: sbd device add and remove + # Add a sbd disk + Given Run "crm sbd configure show sysconfig|grep "SBD_DEVICE=/dev/sda5"" OK + When Run "crm -F sbd device add /dev/sda6" on "hanode1" + Then Run "crm sbd configure show sysconfig|grep -E "SBD_DEVICE=\"/dev/sda5;/dev/sda6\""" OK + Then Run "crm sbd configure show sysconfig|grep -E "SBD_DEVICE=\"/dev/sda5;/dev/sda6\""" OK on "hanode2" + And Run "crm sbd configure show disk_metadata |grep -A 8 '/dev/sda6'|grep -E "watchdog.*30"" OK + And Run "crm sbd configure show disk_metadata |grep -A 8 '/dev/sda6'|grep -E "msgwait.*60"" OK + When Run "crm cluster restart --all" on "hanode1" + And Wait for DC + # Remove a sbd disk + When Run "crm sbd device remove /dev/sda5" on "hanode1" + Then Run "crm sbd configure show sysconfig|grep "SBD_DEVICE=/dev/sda6"" OK + Then Run "crm sbd configure show sysconfig|grep "SBD_DEVICE=/dev/sda6"" OK on "hanode2" + When Run "crm cluster restart --all" on "hanode1" + And Wait for DC + # Replace a sbd disk + When Run "crm -F sbd device add /dev/sda7" on "hanode1" + Then Run "crm sbd configure show sysconfig|grep -E "SBD_DEVICE=\"/dev/sda6;/dev/sda7\""" OK + Then Run "crm sbd configure show sysconfig|grep -E "SBD_DEVICE=\"/dev/sda6;/dev/sda7\""" OK on "hanode2" + And Run "crm sbd configure show disk_metadata |grep -A 8 '/dev/sda7'|grep -E "watchdog.*30"" OK + And Run "crm sbd configure show disk_metadata |grep -A 8 '/dev/sda7'|grep -E "msgwait.*60"" OK + When Run "crm cluster restart --all" on "hanode1" + And Wait for DC + # Purge sbd from cluster + When Run "crm sbd purge" on "hanode1" + And Run "crm cluster restart --all" on "hanode1" + Then Service "sbd.service" is "stopped" on "hanode1" + Then Service "sbd.service" is "stopped" on "hanode2" + + @clean + Scenario: sbd configure for diskless sbd + # Newly setup + When Run "crm cluster init -S -y" on "hanode1" + And Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode1" + Then Cluster service is "started" on "hanode2" + Then Service "sbd" is "started" on "hanode1" + And Service "sbd" is "started" on "hanode2" + And Resource "stonith:fence_sbd" not configured + # Shoud not has any sbd device configured + When Try "crm sbd configure show sysconfig|grep -E "SBD_DEVICE=.+"" + Then Expected return code is "1" + # Purge sbd from cluster + When Run "crm sbd purge" on "hanode1" + And Run "crm cluster restart --all" on "hanode1" + Then Service "sbd.service" is "stopped" on "hanode1" + Then Service "sbd.service" is "stopped" on "hanode2" From 11328a7a90295998512da0520f3856ee6ab53449 Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 15 Nov 2024 09:44:29 +0800 Subject: [PATCH 28/36] Dev: sbd: Split get_sbd_device_interactive into smaller functions --- crmsh/bootstrap.py | 2 +- crmsh/sbd.py | 62 +++++++++++++++++++++++++++------------------- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 453cf1cd6d..890c603ac7 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -1400,7 +1400,7 @@ def init_sbd(): """ import crmsh.sbd if _context.stage == "sbd": - crmsh.sbd.clean_up_existing_sbd_resource() + crmsh.sbd.cleanup_existing_sbd_resource() _context.sbd_manager.init_and_deploy_sbd() diff --git a/crmsh/sbd.py b/crmsh/sbd.py index 9402ea4d9f..37bfa40327 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -572,36 +572,30 @@ def _warn_diskless_sbd(self, peer=None): elif self.diskless_sbd: logger.warning('%s', self.DISKLESS_SBD_WARNING) - def get_sbd_device_interactive(self): + def _warn_and_raise_no_sbd(self): + logger.warning('%s', self.NO_SBD_WARNING) + raise self.NotConfigSBD + + def _wants_to_overwrite(self, configured_devices): + wants_to_overwrite_msg = f"SBD_DEVICE in {self.SYSCONFIG_SBD} is already configured to use '{';'.join(configured_devices)}' - overwrite?" + if not bootstrap.confirm(wants_to_overwrite_msg): + if not SBDUtils.check_devices_metadata_consistent(configured_devices): + raise utils.TerminateSubCommand + self.overwrite_sysconfig = False + return False + return True + + def _prompt_for_sbd_device(self) -> list[str]: ''' - Get sbd device on interactive mode + Prompt for sbd device and verify ''' - if self.bootstrap_context.yes_to_all: - logger.warning('%s', self.NO_SBD_WARNING) - raise self.NotConfigSBD - logger.info(self.SBD_STATUS_DESCRIPTION) - if not bootstrap.confirm("Do you wish to use SBD?"): - logger.warning('%s', self.NO_SBD_WARNING) - raise self.NotConfigSBD - if not utils.package_is_installed("sbd"): - utils.fatal(self.SBD_NOT_INSTALLED_MSG) - - configured_devices = SBDUtils.get_sbd_device_from_config() - if configured_devices: - wants_to_overwrite_msg = f"SBD_DEVICE in {self.SYSCONFIG_SBD} is already configured to use '{';'.join(configured_devices)}' - overwrite?" - if not bootstrap.confirm(wants_to_overwrite_msg): - if not SBDUtils.check_devices_metadata_consistent(configured_devices): - raise utils.TerminateSubCommand - self.overwrite_sysconfig = False - return - dev_list = [] dev_looks_sane = False while not dev_looks_sane: dev = bootstrap.prompt_for_string('Path to storage device (e.g. /dev/disk/by-id/...), or "none" for diskless sbd, use ";" as separator for multi path', r'none|\/.*') if dev == "none": self.diskless_sbd = True - return + return [] dev_list = utils.re_split_string("[; ]", dev) try: @@ -620,9 +614,27 @@ def get_sbd_device_interactive(self): else: dev_looks_sane = False break - return dev_list + def get_sbd_device_interactive(self) -> list[str]: + ''' + Get sbd device on interactive mode + ''' + if self.bootstrap_context.yes_to_all: + self._warn_and_raise_no_sbd() + logger.info(self.SBD_STATUS_DESCRIPTION) + if not bootstrap.confirm("Do you wish to use SBD?"): + self._warn_and_raise_no_sbd() + if not utils.package_is_installed("sbd"): + utils.fatal(self.SBD_NOT_INSTALLED_MSG) + + configured_devices = SBDUtils.get_sbd_device_from_config() + # return empty list if already configured and user doesn't want to overwrite + if configured_devices and not self._wants_to_overwrite(configured_devices): + return [] + + return self._prompt_for_sbd_device() + def get_sbd_device_from_bootstrap(self): ''' Handle sbd device input from 'crm cluster init' with -s or -S option @@ -696,7 +708,7 @@ def join_sbd(self, remote_user, peer_host): service_manager.enable_service(constants.SBD_SERVICE) -def clean_up_existing_sbd_resource(): +def cleanup_existing_sbd_resource(): if xmlutil.CrmMonXmlParser().is_resource_configured(SBDManager.SBD_RA): sbd_id_list = xmlutil.CrmMonXmlParser().get_resource_id_list_via_type(SBDManager.SBD_RA) if xmlutil.CrmMonXmlParser().is_resource_started(SBDManager.SBD_RA): @@ -716,7 +728,7 @@ def purge_sbd_from_cluster(): - adjust cluster attributes - adjust related timeout values ''' - clean_up_existing_sbd_resource() + cleanup_existing_sbd_resource() cluster_nodes = utils.list_cluster_nodes() service_manager = ServiceManager() From 3fc5da08df7c973d7a92f19609f2e8129b098414 Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 29 Nov 2024 09:38:50 +0800 Subject: [PATCH 29/36] Dev: ui_sbd: Print sbd cmdline content in `sbd status` command --- crmsh/ui_sbd.py | 24 ++++++++++++++++++++++++ crmsh/utils.py | 7 +++++++ 2 files changed, 31 insertions(+) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index d8b24b6bc4..ff6c73f2cc 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -515,12 +515,36 @@ def _print_sbd_agent_status(self): if output: print(output) + def _print_sbd_cgroup_status(self): + scripts_in_shell = '''#!/bin/bash +cgroup_procs_file="/sys/fs/cgroup/system.slice/sbd.service/cgroup.procs" +if [ ! -f "$cgroup_procs_file" ]; then + exit +fi +pids=$(cat "$cgroup_procs_file") +for pid in $pids; do + cmdline_file="/proc/$pid/cmdline" + if [ -f "$cmdline_file" ]; then + cmdline=$(tr '\0' ' ' < "$cmdline_file") + if [[ "$cmdline" == *"slot:"* ]]; then + echo "├─$pid \"$cmdline\"" + fi + fi +done + ''' + for node in self.cluster_nodes: + out = self.cluster_shell.get_stdout_or_raise_error(scripts_in_shell, node) + if out: + print(f"# Status of the sbd disk watcher process on {node}:") + print(out + "\n") + def do_status(self, context) -> bool: ''' Implement sbd status command ''' self._print_sbd_type() self._print_sbd_status() + self._print_sbd_cgroup_status() self._print_watchdog_info() self._print_sbd_agent_status() return True diff --git a/crmsh/utils.py b/crmsh/utils.py index 4d6940e1dc..eb598f70c5 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -3213,4 +3213,11 @@ def strip_ansi_escape_sequences(text): """ ansi_escape_pattern = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]') return ansi_escape_pattern.sub('', text) + + +def is_subdict(sub_dict, main_dict): + """ + Check if sub_dict is a sub-dictionary of main_dict + """ + return all(main_dict.get(k) == v for k, v in sub_dict.items()) # vim:ts=4:sw=4:et: From b19f96079d46e1b130d09cbc954130f3b9a120bf Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 29 Nov 2024 13:03:09 +0800 Subject: [PATCH 30/36] Dev: ui_sbd: Adjust sbd configure subcommand - Return immediately if no changes are made - Adjust watchdog timeout and msgwait values properly --- crmsh/ui_sbd.py | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index ff6c73f2cc..ebba1dd9d3 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -269,26 +269,21 @@ def _parse_args(self, args: tuple[str, ...]) -> dict[str, int|str]: return parameter_dict @staticmethod - def _adjust_timeout_dict(timeout_dict: dict, diskless: bool = False) -> dict: + def _adjust_timeout_dict(timeout_dict: dict) -> dict: watchdog_timeout = timeout_dict.get("watchdog") - if not watchdog_timeout: - watchdog_timeout, _ = sbd.SBDTimeout.get_advised_sbd_timeout(diskless) - logger.info("No watchdog timeout specified, use advised value: %s", watchdog_timeout) - timeout_dict["watchdog"] = watchdog_timeout - - if diskless: - return timeout_dict - msgwait_timeout = timeout_dict.get("msgwait") - if not msgwait_timeout: - msgwait_timeout = 2*watchdog_timeout - logger.info("No msgwait timeout specified, use 2*watchdog timeout: %s", msgwait_timeout) - timeout_dict["msgwait"] = msgwait_timeout - - if msgwait_timeout < 2*watchdog_timeout: + if watchdog_timeout and msgwait_timeout and msgwait_timeout < 2*watchdog_timeout: logger.warning("It's recommended to set msgwait timeout >= 2*watchdog timeout") - - return timeout_dict + return timeout_dict + if watchdog_timeout and not msgwait_timeout: + timeout_dict["msgwait"] = 2*watchdog_timeout + logger.info("No msgwait timeout specified, use 2*watchdog timeout: %s", 2*watchdog_timeout) + return timeout_dict + if msgwait_timeout and not watchdog_timeout: + watchdog_timeout = msgwait_timeout//2 + timeout_dict["watchdog"] = watchdog_timeout + logger.info("No watchdog timeout specified, use msgwait timeout/2: %s", watchdog_timeout) + return timeout_dict def _configure_diskbase(self, parameter_dict: dict): ''' @@ -299,10 +294,17 @@ def _configure_diskbase(self, parameter_dict: dict): if watchdog_device != self.watchdog_device_from_config: update_dict["SBD_WATCHDOG_DEV"] = watchdog_device timeout_dict = {k: v for k, v in parameter_dict.items() if k in self.TIMEOUT_TYPES} - # merge runtime timeout dict with new timeout dict - timeout_dict = self.device_meta_dict_runtime | timeout_dict - # adjust watchdog and msgwait timeout - timeout_dict = self._adjust_timeout_dict(timeout_dict) + is_subdict_timeout = utils.is_subdict(timeout_dict, self.device_meta_dict_runtime) + + if is_subdict_timeout and not update_dict: + logger.info("No change in SBD configuration") + return + + if not is_subdict_timeout: + timeout_dict = self._adjust_timeout_dict(timeout_dict) + # merge runtime timeout dict into parameter timeout dict without overwriting + timeout_dict = {**self.device_meta_dict_runtime, **timeout_dict} + watchdog_timeout = timeout_dict.get("watchdog") if watchdog_timeout != self.watchdog_timeout_from_config: update_dict["SBD_WATCHDOG_TIMEOUT"] = str(watchdog_timeout) @@ -319,13 +321,15 @@ def _configure_diskless(self, parameter_dict: dict): Configure diskless SBD based on input parameters and runtime config ''' update_dict = {} - parameter_dict = self._adjust_timeout_dict(parameter_dict, diskless=True) watchdog_timeout = parameter_dict.get("watchdog") if watchdog_timeout and watchdog_timeout != self.watchdog_timeout_from_config: update_dict["SBD_WATCHDOG_TIMEOUT"] = str(watchdog_timeout) watchdog_device = parameter_dict.get("watchdog-device") if watchdog_device != self.watchdog_device_from_config: update_dict["SBD_WATCHDOG_DEV"] = watchdog_device + if not update_dict: + logger.info("No change in SBD configuration") + return sbd_manager = sbd.SBDManager( update_dict=update_dict, From 4998bbc4feaacfd07de2e4c42bcfb88968b386d0 Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 29 Nov 2024 21:05:44 +0800 Subject: [PATCH 31/36] Dev: ui_sbd: Adjust output of `sbd status` --- crmsh/ui_sbd.py | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index ebba1dd9d3..4e9934e5b6 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -116,7 +116,6 @@ def __init__(self): self.service_manager: ServiceManager = None self.cluster_shell: sh.cluster_shell = None self.cluster_nodes: list[str] = None - self.crm_mon_xml_parser: xmlutil.CrmMonXmlParser = None self._load_attributes() command.UI.__init__(self) @@ -141,7 +140,6 @@ def _load_attributes(self): except Exception as e: logger.error(e) self.cluster_nodes.remove(node) - self.crm_mon_xml_parser = xmlutil.CrmMonXmlParser() def requires(self) -> bool: ''' @@ -486,38 +484,37 @@ def _print_watchdog_info(self): watchdog_sbd_re = r"\[[0-9]+\] (/dev/.*)\nIdentity: Busy: .*sbd.*\nDriver: (.*)" device_list, driver_list, kernel_timeout_list = [], [], [] - cluster_nodes = self.cluster_nodes[:] - for node in cluster_nodes[:]: + for node in self.cluster_nodes: out = self.cluster_shell.get_stdout_or_raise_error("sbd query-watchdog", node) res = re.search(watchdog_sbd_re, out) if res: device, driver = res.groups() kernel_timeout = self.cluster_shell.get_stdout_or_raise_error("cat /proc/sys/kernel/watchdog_thresh", node) - device_list.append(device) - driver_list.append(driver) - kernel_timeout_list.append(kernel_timeout) else: - logger.error("Failed to get watchdog info from %s", node) - cluster_nodes.remove(node) - if not cluster_nodes: - return + device, driver, kernel_timeout = "N/A", "N/A", "N/A" + device_list.append(device) + driver_list.append(driver) + kernel_timeout_list.append(kernel_timeout) print("# Watchdog info:") - max_dev_len = max(len(dev) for dev in device_list) + padding - max_driver_len = max(len(driver) for driver in driver_list) + padding + max_dev_len = max(len(dev) for dev in device_list+["Device"]) + padding + max_driver_len = max(len(driver) for driver in driver_list+["Driver"]) + padding print(f"{'Node':<{max_node_len}}|{'Device':<{max_dev_len}}|{'Driver':<{max_driver_len}}|Kernel Timeout") - for i, node in enumerate(cluster_nodes): + for i, node in enumerate(self.cluster_nodes): print(f"{node:<{max_node_len}}|{device_list[i]:<{max_dev_len}}|{driver_list[i]:<{max_driver_len}}|{kernel_timeout_list[i]}") print() def _print_sbd_agent_status(self): - if self.crm_mon_xml_parser.is_resource_configured(sbd.SBDManager.SBD_RA): - print("# Status of fence_sbd:") - sbd_id_list = self.crm_mon_xml_parser.get_resource_id_list_via_type(sbd.SBDManager.SBD_RA) - for sbd_id in sbd_id_list: - rc, output = self.cluster_shell.get_rc_output_without_input(None, f"crm resource status {sbd_id}") - if output: - print(output) + for node in self.cluster_nodes: + crm_mon_xml_parser = xmlutil.CrmMonXmlParser(node) + if crm_mon_xml_parser.is_resource_configured(sbd.SBDManager.SBD_RA): + print("# Status of fence_sbd:") + sbd_id_list = crm_mon_xml_parser.get_resource_id_list_via_type(sbd.SBDManager.SBD_RA) + for sbd_id in sbd_id_list: + rc, output = self.cluster_shell.get_rc_output_without_input(node, f"crm resource status {sbd_id}") + if output: + print(output) + return def _print_sbd_cgroup_status(self): scripts_in_shell = '''#!/bin/bash From 494416dc4dea3a9b2723ad7150b54641fde9718d Mon Sep 17 00:00:00 2001 From: xin liang Date: Mon, 2 Dec 2024 11:10:31 +0800 Subject: [PATCH 32/36] Dev: Don't set and show SBD_WATCHDOG_TIMEOUT for disk-based SBD --- crmsh/sbd.py | 5 +++-- crmsh/ui_sbd.py | 13 +++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/crmsh/sbd.py b/crmsh/sbd.py index 37bfa40327..a0ce89efbe 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -472,9 +472,10 @@ def _load_attributes_from_bootstrap(self): timeout_inst = SBDTimeout(self.bootstrap_context) timeout_inst.initialize_timeout() self.timeout_dict["watchdog"] = timeout_inst.sbd_watchdog_timeout - if not self.diskless_sbd: + if self.diskless_sbd: + self.update_dict["SBD_WATCHDOG_TIMEOUT"] = str(timeout_inst.sbd_watchdog_timeout) + else: self.timeout_dict["msgwait"] = timeout_inst.sbd_msgwait - self.update_dict["SBD_WATCHDOG_TIMEOUT"] = str(timeout_inst.sbd_watchdog_timeout) self.update_dict["SBD_WATCHDOG_DEV"] = watchdog.Watchdog.get_watchdog_device(self.bootstrap_context.watchdog) @staticmethod diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index 4e9934e5b6..bca41ae927 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -174,8 +174,7 @@ def configure_usage(self) -> str: show_usage = f"crm sbd configure show [{'|'.join(show_types)}]" return f"Usage:\n{show_usage}\ncrm sbd configure {timeout_usage_str} [watchdog-device=]\n" - @staticmethod - def _show_sysconfig() -> None: + def _show_sysconfig(self) -> None: ''' Show pure content of /etc/sysconfig/sbd ''' @@ -188,6 +187,8 @@ def _show_sysconfig() -> None: if content_list: logger.info("crm sbd configure show sysconfig") for line in content_list: + if line.startswith("SBD_WATCHDOG_TIMEOUT") and bool(self.device_list_from_config): + continue print(line) def _show_disk_metadata(self) -> None: @@ -225,7 +226,7 @@ def _configure_show(self, args) -> None: case "disk_metadata": self._show_disk_metadata() case "sysconfig": - SBD._show_sysconfig() + self._show_sysconfig() case "property": self._show_property() case _: @@ -234,7 +235,7 @@ def _configure_show(self, args) -> None: self._show_disk_metadata() if self.device_list_from_config: print() - SBD._show_sysconfig() + self._show_sysconfig() print() self._show_property() @@ -303,10 +304,6 @@ def _configure_diskbase(self, parameter_dict: dict): # merge runtime timeout dict into parameter timeout dict without overwriting timeout_dict = {**self.device_meta_dict_runtime, **timeout_dict} - watchdog_timeout = timeout_dict.get("watchdog") - if watchdog_timeout != self.watchdog_timeout_from_config: - update_dict["SBD_WATCHDOG_TIMEOUT"] = str(watchdog_timeout) - sbd_manager = sbd.SBDManager( device_list_to_init=self.device_list_from_config, timeout_dict=timeout_dict, From f98154fb906c1f90948c057115da3688cf771d83 Mon Sep 17 00:00:00 2001 From: xin liang Date: Mon, 2 Dec 2024 22:13:08 +0800 Subject: [PATCH 33/36] Dev: sbd: Remove sbd delay start related diretories when running sbd purge Remove /etc/systemd/system/sbd.service.d Remove /run/systemd/system/sbd.service.d --- crmsh/bootstrap.py | 6 ++---- crmsh/sbd.py | 7 +++++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 890c603ac7..6fef5c1303 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -775,10 +775,8 @@ def start_pacemaker(node_list=[], enable_flag=False): utils.package_is_installed("sbd") and \ ServiceManager().service_is_enabled(constants.SBD_SERVICE) and \ SBDTimeout.is_sbd_delay_start(): - target_dir = "/run/systemd/system/sbd.service.d/" - cmd1 = "mkdir -p {}".format(target_dir) - target_file = "{}sbd_delay_start_disabled.conf".format(target_dir) - cmd2 = "echo -e '[Service]\nUnsetEnvironment=SBD_DELAY_START' > {}".format(target_file) + cmd1 = f"mkdir -p {SBDManager.SBD_SYSTEMD_DELAY_START_DISABLE_DIR}" + cmd2 = f"echo -e '[Service]\nUnsetEnvironment=SBD_DELAY_START' > {SBDManager.SBD_SYSTEMD_DELAY_START_DISABLE_FILE}" cmd3 = "systemctl daemon-reload" for cmd in [cmd1, cmd2, cmd3]: parallax.parallax_call(node_list, cmd) diff --git a/crmsh/sbd.py b/crmsh/sbd.py index a0ce89efbe..f4814a340b 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -8,6 +8,7 @@ from . import corosync from . import xmlutil from . import watchdog +from . import parallax from .service_manager import ServiceManager from .sh import ShellUtils @@ -420,6 +421,8 @@ class SBDManager: SYSCONFIG_SBD = "/etc/sysconfig/sbd" SYSCONFIG_SBD_TEMPLATE = "/usr/share/fillup-templates/sysconfig.sbd" SBD_SYSTEMD_DELAY_START_DIR = "/etc/systemd/system/sbd.service.d" + SBD_SYSTEMD_DELAY_START_DISABLE_DIR = "/run/systemd/system/sbd.service.d" + SBD_SYSTEMD_DELAY_START_DISABLE_FILE = f"{SBD_SYSTEMD_DELAY_START_DISABLE_DIR}/sbd_delay_start_disabled.conf" SBD_STATUS_DESCRIPTION = '''Configure SBD: If you have shared storage, for example a SAN or iSCSI target, you can use it avoid split-brain scenarios by configuring SBD. @@ -747,3 +750,7 @@ def purge_sbd_from_cluster(): # after disable sbd.service, check if sbd is the last stonith device if res and int(res.group(1)) <= 1: utils.cleanup_stonith_related_properties() + + for _dir in [SBDManager.SBD_SYSTEMD_DELAY_START_DIR, SBDManager.SBD_SYSTEMD_DELAY_START_DISABLE_DIR]: + cmd = f"test -d {_dir} && rm -rf {_dir} || exit 0" + parallax.parallax_call(cluster_nodes, cmd) From b9258af79718cbdcafa4297481f5c22cb4713865 Mon Sep 17 00:00:00 2001 From: xin liang Date: Wed, 4 Dec 2024 11:18:05 +0800 Subject: [PATCH 34/36] Dev: sbd: Delete stonith-watchdog-timeout property when configuring disk-based SBD --- crmsh/sbd.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/crmsh/sbd.py b/crmsh/sbd.py index f4814a340b..a702fc99c9 100644 --- a/crmsh/sbd.py +++ b/crmsh/sbd.py @@ -555,11 +555,14 @@ def configure_sbd(self): ''' if self.diskless_sbd: utils.set_property("stonith-watchdog-timeout", SBDTimeout.STONITH_WATCHDOG_TIMEOUT_DEFAULT) - elif not xmlutil.CrmMonXmlParser().is_resource_configured(self.SBD_RA): - all_device_list = SBDUtils.get_sbd_device_from_config() - devices_param_str = f"params devices=\"{','.join(all_device_list)}\"" - cmd = f"crm configure primitive {self.SBD_RA_ID} {self.SBD_RA} {devices_param_str}" - sh.cluster_shell().get_stdout_or_raise_error(cmd) + else: + if utils.get_property("stonith-watchdog-timeout", get_default=False): + utils.delete_property("stonith-watchdog-timeout") + if not xmlutil.CrmMonXmlParser().is_resource_configured(self.SBD_RA): + all_device_list = SBDUtils.get_sbd_device_from_config() + devices_param_str = f"params devices=\"{','.join(all_device_list)}\"" + cmd = f"crm configure primitive {self.SBD_RA_ID} {self.SBD_RA} {devices_param_str}" + sh.cluster_shell().get_stdout_or_raise_error(cmd) utils.set_property("stonith-enabled", "true") def _warn_diskless_sbd(self, peer=None): From a97986aa784dd8c252a5cadc7839429fb95b2b41 Mon Sep 17 00:00:00 2001 From: xin liang Date: Wed, 4 Dec 2024 12:41:59 +0800 Subject: [PATCH 35/36] Dev: ui_sbd: Don't show stonith-watchdog-timeout for disk-based SBD --- crmsh/ui_sbd.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crmsh/ui_sbd.py b/crmsh/ui_sbd.py index bca41ae927..1b3b45dbe8 100644 --- a/crmsh/ui_sbd.py +++ b/crmsh/ui_sbd.py @@ -95,11 +95,11 @@ class SBD(command.UI): PCMK_ATTRS = ( "have-watchdog", "stonith-timeout", - "stonith-watchdog-timeout", "stonith-enabled", "priority-fencing-delay", "pcmk_delay_max" ) + PCMK_ATTRS_DISKLESS = ('stonith-watchdog-timeout',) PARSE_RE = re.compile( # Match keys with non-empty values, capturing possible suffix r'(\w+)(?:-(\w+))?=("[^"]+"|[\w/\d;]+)' @@ -208,7 +208,11 @@ def _show_property(self) -> None: out = self.cluster_shell.get_stdout_or_raise_error("crm configure show") logger.info("crm sbd configure show property") - regex = f"({'|'.join(self.PCMK_ATTRS)})=(\\S+)" + if self.device_list_from_config: + attrs = self.PCMK_ATTRS + else: + attrs = self.PCMK_ATTRS + self.PCMK_ATTRS_DISKLESS + regex = f"({'|'.join(attrs)})=(\\S+)" matches = re.findall(regex, out) for match in matches: print(f"{match[0]}={match[1]}") From d04573ddbb116ccc4c2db2ef7e0218ddba7521c9 Mon Sep 17 00:00:00 2001 From: xin liang Date: Fri, 20 Sep 2024 10:52:32 +0800 Subject: [PATCH 36/36] Dev: unittests: Adjust unit test for previous commits --- data-manifest | 1 + test/unittests/test_bootstrap.py | 28 +- test/unittests/test_qdevice.py | 6 +- test/unittests/test_report_collect.py | 8 +- test/unittests/test_sbd.py | 1629 ++++++++++++------------- test/unittests/test_ui_sbd.py | 580 +++++++++ test/unittests/test_utils.py | 8 +- test/unittests/test_watchdog.py | 64 +- 8 files changed, 1406 insertions(+), 918 deletions(-) create mode 100644 test/unittests/test_ui_sbd.py diff --git a/data-manifest b/data-manifest index 54ee89dd44..39ea66fea8 100644 --- a/data-manifest +++ b/data-manifest @@ -213,6 +213,7 @@ test/unittests/test_sh.py test/unittests/test_time.py test/unittests/test_ui_cluster.py test/unittests/test_ui_corosync.py +test/unittests/test_ui_sbd.py test/unittests/test_upgradeuitl.py test/unittests/test_utils.py test/unittests/test_watchdog.py diff --git a/test/unittests/test_bootstrap.py b/test/unittests/test_bootstrap.py index fbd032a91f..4f6cd49a3d 100644 --- a/test/unittests/test_bootstrap.py +++ b/test/unittests/test_bootstrap.py @@ -29,6 +29,7 @@ from crmsh import bootstrap from crmsh import constants from crmsh import qdevice +from crmsh import sbd class TestContext(unittest.TestCase): @@ -126,8 +127,10 @@ def test_initialize_qdevice_with_user(self, mock_qdevice): ctx.initialize_qdevice() mock_qdevice.assert_called_once_with(qnetd_addr='node3', port=123, ssh_user='alice', algo=None, tie_breaker=None, tls=None, cmds=None, mode=None, is_stage=False) + @mock.patch('crmsh.utils.package_is_installed') @mock.patch('crmsh.utils.fatal') - def test_validate_sbd_option_error_together(self, mock_error): + def test_validate_sbd_option_error_together(self, mock_error, mock_installed): + mock_installed.return_value = True mock_error.side_effect = SystemExit ctx = crmsh.bootstrap.Context() ctx.sbd_devices = ["/dev/sda1"] @@ -136,8 +139,10 @@ def test_validate_sbd_option_error_together(self, mock_error): ctx._validate_sbd_option() mock_error.assert_called_once_with("Can't use -s and -S options together") + @mock.patch('crmsh.utils.package_is_installed') @mock.patch('crmsh.utils.fatal') - def test_validate_sbd_option_error_sbd_stage_no_option(self, mock_error): + def test_validate_sbd_option_error_sbd_stage_no_option(self, mock_error, mock_installed): + mock_installed.return_value = True mock_error.side_effect = SystemExit ctx = crmsh.bootstrap.Context() ctx.stage = "sbd" @@ -146,9 +151,11 @@ def test_validate_sbd_option_error_sbd_stage_no_option(self, mock_error): ctx._validate_sbd_option() mock_error.assert_called_once_with("Stage sbd should specify sbd device by -s or diskless sbd by -S option") + @mock.patch('crmsh.utils.package_is_installed') @mock.patch('crmsh.utils.fatal') @mock.patch('crmsh.service_manager.ServiceManager.service_is_active') - def test_validate_sbd_option_error_sbd_stage_service(self, mock_active, mock_error): + def test_validate_sbd_option_error_sbd_stage_service(self, mock_active, mock_error, mock_installed): + mock_installed.return_value = True mock_error.side_effect = SystemExit ctx = crmsh.bootstrap.Context() ctx.stage = "sbd" @@ -159,10 +166,12 @@ def test_validate_sbd_option_error_sbd_stage_service(self, mock_active, mock_err mock_error.assert_called_once_with("Can't configure stage sbd: sbd.service already running! Please use crm option '-F' if need to redeploy") mock_active.assert_called_once_with("sbd.service") + @mock.patch('crmsh.utils.package_is_installed') @mock.patch('crmsh.utils.check_all_nodes_reachable') @mock.patch('crmsh.service_manager.ServiceManager.service_is_active') - def test_validate_sbd_option_error_sbd_stage(self, mock_active, mock_check_all): + def test_validate_sbd_option_error_sbd_stage(self, mock_active, mock_check_all, mock_installed): options = mock.Mock(stage="sbd", diskless_sbd=True, cluster_is_running=True) + mock_installed.return_value = True ctx = crmsh.bootstrap.Context() ctx.stage = "sbd" ctx.diskless_sbd = True @@ -465,8 +474,8 @@ def test_start_pacemaker(self, mock_installed, mock_enabled, mock_delay_start, m mock.call("pacemaker.service", enable=False, node_list=node_list) ]) mock_parallax_call.assert_has_calls([ - mock.call(node_list, 'mkdir -p /run/systemd/system/sbd.service.d/'), - mock.call(node_list, "echo -e '[Service]\nUnsetEnvironment=SBD_DELAY_START' > /run/systemd/system/sbd.service.d/sbd_delay_start_disabled.conf"), + mock.call(node_list, f'mkdir -p {sbd.SBDManager.SBD_SYSTEMD_DELAY_START_DISABLE_DIR}'), + mock.call(node_list, f"echo -e '[Service]\nUnsetEnvironment=SBD_DELAY_START' > {sbd.SBDManager.SBD_SYSTEMD_DELAY_START_DISABLE_FILE}"), mock.call(node_list, "systemctl daemon-reload"), ]) @@ -1336,13 +1345,12 @@ def test_adjust_pcmk_delay(self, mock_cib_factory, mock_run, mock_debug): bootstrap.adjust_pcmk_delay_max(False) mock_run.assert_called_once_with("crm resource param res_1 delete pcmk_delay_max") - @mock.patch('crmsh.sbd.SBDTimeout') + @mock.patch('crmsh.sbd.SBDTimeout.adjust_sbd_timeout_related_cluster_configuration') @mock.patch('crmsh.service_manager.ServiceManager.service_is_active') - def test_adjust_stonith_timeout_sbd(self, mock_is_active, mock_sbd_timeout): + def test_adjust_stonith_timeout_sbd(self, mock_is_active, mock_sbd_adjust_timeout): mock_is_active.return_value = True - mock_sbd_timeout.adjust_sbd_timeout_related_cluster_configuration = mock.Mock() bootstrap.adjust_stonith_timeout() - mock_sbd_timeout.adjust_sbd_timeout_related_cluster_configuration.assert_called_once_with() + mock_sbd_adjust_timeout.assert_called_once_with() @mock.patch('crmsh.utils.set_property') @mock.patch('crmsh.bootstrap.get_stonith_timeout_generally_expected') diff --git a/test/unittests/test_qdevice.py b/test/unittests/test_qdevice.py index 59bb297949..246ee2a86d 100644 --- a/test/unittests/test_qdevice.py +++ b/test/unittests/test_qdevice.py @@ -804,9 +804,9 @@ def test_config_and_start_qdevice(self, mock_rm_db, mock_status_long, mock_evalu @mock.patch('crmsh.utils.set_property') @mock.patch('crmsh.sbd.SBDTimeout.get_stonith_timeout') - @mock.patch('crmsh.sbd.SBDManager.update_configuration') - @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') - @mock.patch('crmsh.sbd.SBDManager.is_using_diskless_sbd') + @mock.patch('crmsh.sbd.SBDManager.update_sbd_configuration') + @mock.patch('crmsh.sbd.SBDUtils.get_sbd_value_from_config') + @mock.patch('crmsh.sbd.SBDUtils.is_using_diskless_sbd') @mock.patch('crmsh.utils.check_all_nodes_reachable') def test_adjust_sbd_watchdog_timeout_with_qdevice(self, mock_check_reachable, mock_using_diskless_sbd, mock_get_sbd_value, mock_update_config, mock_get_timeout, mock_set_property): mock_using_diskless_sbd.return_value = True diff --git a/test/unittests/test_report_collect.py b/test/unittests/test_report_collect.py index a005a46331..9f663b086e 100644 --- a/test/unittests/test_report_collect.py +++ b/test/unittests/test_report_collect.py @@ -187,7 +187,7 @@ def test_collect_sbd_info(self, mock_exists, mock_copy, mock_which, mock_run, mo mock_open_write = mock.mock_open() file_handle = mock_open_write.return_value.__enter__.return_value mock_open_file.return_value = mock_open_write.return_value - mock_run.return_value = "data" + mock_run.side_effect = ["data", "data", "data"] mock_ctx_inst = mock.Mock(work_dir="/opt") collect.collect_sbd_info(mock_ctx_inst) @@ -199,6 +199,12 @@ def test_collect_sbd_info(self, mock_exists, mock_copy, mock_which, mock_run, mo file_handle.write.assert_has_calls([ mock.call("\n\n#=====[ Command ] ==========================#\n"), mock.call("# . /etc/sysconfig/sbd;export SBD_DEVICE;sbd dump;sbd list\n"), + mock.call("data"), + mock.call("\n\n#=====[ Command ] ==========================#\n"), + mock.call("# crm sbd configure show\n"), + mock.call("data"), + mock.call("\n\n#=====[ Command ] ==========================#\n"), + mock.call("# crm sbd status\n"), mock.call("data") ]) mock_debug.assert_called_once_with(f"Dump SBD config file into {constants.SBD_F}") diff --git a/test/unittests/test_sbd.py b/test/unittests/test_sbd.py index e8f0f259bc..f73a8734f6 100644 --- a/test/unittests/test_sbd.py +++ b/test/unittests/test_sbd.py @@ -1,895 +1,780 @@ -import os -import unittest import logging - -try: - from unittest import mock -except ImportError: - import mock - -from crmsh import bootstrap +import unittest +from unittest.mock import patch, MagicMock, call, Mock +from crmsh.sbd import SBDUtils, SBDManager +from crmsh import utils from crmsh import sbd +from crmsh import constants -class TestSBDTimeout(unittest.TestCase): - """ - Unitary tests for crmsh.sbd.SBDTimeout - """ - - @classmethod - def setUpClass(cls): - """ - Global setUp. - """ +class TestSBDUtils(unittest.TestCase): - def setUp(self): - """ - Test setUp. - """ - _dict = {"sbd.watchdog_timeout": 5, "sbd.msgwait": 10} - _inst_q = mock.Mock() - self.sbd_timeout_inst = sbd.SBDTimeout(mock.Mock(profiles_dict=_dict, is_s390=True, qdevice_inst=_inst_q)) + TEST_DATA = """ + UUID : 1234-5678 + Timeout (watchdog) : 5 + Timeout (msgwait) : 10 + """ - def tearDown(self): - """ - Test tearDown. - """ + @patch('crmsh.sh.cluster_shell') + def test_get_sbd_device_metadata_success(self, mock_cluster_shell): + mock_cluster_shell.return_value.get_stdout_or_raise_error.return_value = self.TEST_DATA + result = SBDUtils.get_sbd_device_metadata("/dev/sbd_device") + expected = {'uuid': '1234-5678', 'watchdog': 5, 'msgwait': 10} + self.assertEqual(result, expected) + + @patch('crmsh.sh.cluster_shell') + def test_get_sbd_device_metadata_exception(self, mock_cluster_shell): + mock_cluster_shell.return_value.get_stdout_or_raise_error.side_effect = Exception + result = SBDUtils.get_sbd_device_metadata("/dev/sbd_device") + self.assertEqual(result, {}) + + @patch('crmsh.sh.cluster_shell') + def test_get_sbd_device_metadata_timeout_only(self, mock_cluster_shell): + mock_cluster_shell.return_value.get_stdout_or_raise_error.return_value = self.TEST_DATA + result = SBDUtils.get_sbd_device_metadata("/dev/sbd_device", timeout_only=True) + expected = {'watchdog': 5, 'msgwait': 10} + self.assertNotIn('uuid', result) + self.assertEqual(result, expected) + + @patch('crmsh.sbd.SBDUtils.get_sbd_device_metadata') + def test_get_device_uuid_success(self, mock_get_sbd_device_metadata): + mock_get_sbd_device_metadata.return_value = {'uuid': '1234-5678'} + result = SBDUtils.get_device_uuid("/dev/sbd_device") + self.assertEqual(result, '1234-5678') + + @patch('crmsh.sbd.SBDUtils.get_sbd_device_metadata') + def test_get_device_uuid_no_uuid_found(self, mock_get_sbd_device_metadata): + mock_get_sbd_device_metadata.return_value = {} + with self.assertRaises(ValueError) as context: + SBDUtils.get_device_uuid("/dev/sbd_device") + self.assertTrue("Cannot find sbd device UUID for /dev/sbd_device" in str(context.exception)) + + @patch('crmsh.sbd.SBDUtils.get_device_uuid') + def test_compare_device_uuid_empty_node_list(self, mock_get_device_uuid): + result = SBDUtils.compare_device_uuid("/dev/sbd_device", []) + self.assertIsNone(result) + + @patch('crmsh.sbd.SBDUtils.get_device_uuid') + def test_compare_device_uuid_same_uuid(self, mock_get_device_uuid): + mock_get_device_uuid.return_value = '1234-5678' + SBDUtils.compare_device_uuid("/dev/sbd_device", ["node1", "node2"]) + + @patch('crmsh.sbd.SBDUtils.get_device_uuid') + def test_compare_device_uuid_different_uuid(self, mock_get_device_uuid): + mock_get_device_uuid.side_effect = lambda dev, node=None: '1234-5678' if node is None else '8765-4321' + with self.assertRaises(ValueError): + SBDUtils.compare_device_uuid("/dev/sbd_device", ["node1"]) - @classmethod - def tearDownClass(cls): - """ - Global tearDown. - """ + @patch('crmsh.utils.is_block_device') + @patch('crmsh.sbd.SBDUtils.compare_device_uuid') + def test_verify_sbd_device_exceeds_max(self, mock_compare_device_uuid, mock_is_block_device): + dev_list = [f"/dev/sbd_device_{i}" for i in range(SBDManager.SBD_DEVICE_MAX + 1)] + with self.assertRaises(ValueError): + SBDUtils.verify_sbd_device(dev_list) - def test_initialize_timeout(self): - self.sbd_timeout_inst._set_sbd_watchdog_timeout = mock.Mock() - self.sbd_timeout_inst._set_sbd_msgwait = mock.Mock() - self.sbd_timeout_inst._adjust_sbd_watchdog_timeout_with_diskless_and_qdevice = mock.Mock() - self.sbd_timeout_inst.initialize_timeout() - self.sbd_timeout_inst._set_sbd_watchdog_timeout.assert_called_once() - self.sbd_timeout_inst._set_sbd_msgwait.assert_not_called() - self.sbd_timeout_inst._adjust_sbd_watchdog_timeout_with_diskless_and_qdevice.assert_called_once() - - @mock.patch('logging.Logger.warning') - def test_set_sbd_watchdog_timeout(self, mock_warn): - self.sbd_timeout_inst._set_sbd_watchdog_timeout() - mock_warn.assert_called_once_with("sbd_watchdog_timeout is set to %d for s390, it was %d", sbd.SBDTimeout.SBD_WATCHDOG_TIMEOUT_DEFAULT_S390, 5) - - @mock.patch('logging.Logger.warning') - def test_set_sbd_msgwait(self, mock_warn): - self.sbd_timeout_inst.sbd_watchdog_timeout = 15 - self.sbd_timeout_inst._set_sbd_msgwait() - mock_warn.assert_called_once_with("sbd msgwait is set to %d, it was %d", 30, 10) - - @mock.patch('logging.Logger.warning') - @mock.patch('crmsh.utils.get_qdevice_sync_timeout') - @mock.patch('crmsh.service_manager.ServiceManager.service_is_active') - @mock.patch('crmsh.corosync.is_qdevice_configured') - def test_adjust_sbd_watchdog_timeout_with_diskless_and_qdevice_sbd_stage(self, mock_is_configured, mock_is_active, mock_get_sync, mock_warn): - mock_is_configured.return_value = True - mock_is_active.return_value = True - mock_get_sync.return_value = 15 - self.sbd_timeout_inst.sbd_watchdog_timeout = 5 - self.sbd_timeout_inst._adjust_sbd_watchdog_timeout_with_diskless_and_qdevice() - mock_warn.assert_called_once_with("sbd_watchdog_timeout is set to 20 for qdevice, it was 5") - - @mock.patch('logging.Logger.warning') - @mock.patch('crmsh.corosync.is_qdevice_configured') - def test_adjust_sbd_watchdog_timeout_with_diskless_and_qdevice_all(self, mock_is_configured, mock_warn): - mock_is_configured.return_value = False - self.sbd_timeout_inst.sbd_watchdog_timeout = 5 - self.sbd_timeout_inst._adjust_sbd_watchdog_timeout_with_diskless_and_qdevice() - mock_warn.assert_called_once_with("sbd_watchdog_timeout is set to 35 for qdevice, it was 5") - - @mock.patch('crmsh.sh.ClusterShell.get_stdout_or_raise_error') - def test_get_sbd_msgwait_exception(self, mock_run): - mock_run.return_value = "data" - with self.assertRaises(ValueError) as err: - sbd.SBDTimeout.get_sbd_msgwait("/dev/sda1") - self.assertEqual("Cannot get sbd msgwait for /dev/sda1", str(err.exception)) - mock_run.assert_called_once_with("sbd -d /dev/sda1 dump") - - @mock.patch('crmsh.sh.ClusterShell.get_stdout_or_raise_error') - def test_get_sbd_msgwait(self, mock_run): - mock_run.return_value = """ - Timeout (loop) : 1 - Timeout (msgwait) : 10 - ==Header on disk /dev/sda1 is dumped - """ - res = sbd.SBDTimeout.get_sbd_msgwait("/dev/sda1") - assert res == 10 - mock_run.assert_called_once_with("sbd -d /dev/sda1 dump") - - @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') - def test_get_sbd_watchdog_timeout_exception(self, mock_get): - mock_get.return_value = None - with self.assertRaises(ValueError) as err: - sbd.SBDTimeout.get_sbd_watchdog_timeout() - self.assertEqual("Cannot get the value of SBD_WATCHDOG_TIMEOUT", str(err.exception)) - mock_get.assert_called_once_with("SBD_WATCHDOG_TIMEOUT") - - @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') - def test_get_sbd_watchdog_timeout(self, mock_get): - mock_get.return_value = 5 - res = sbd.SBDTimeout.get_sbd_watchdog_timeout() - assert res == 5 - mock_get.assert_called_once_with("SBD_WATCHDOG_TIMEOUT") - - @mock.patch('crmsh.service_manager.ServiceManager.service_is_active') - def test_get_stonith_watchdog_timeout_return(self, mock_active): - mock_active.return_value = False - res = sbd.SBDTimeout.get_stonith_watchdog_timeout() - assert res == sbd.SBDTimeout.STONITH_WATCHDOG_TIMEOUT_DEFAULT - mock_active.assert_called_once_with("pacemaker.service") - - @mock.patch('crmsh.utils.get_property') - @mock.patch('crmsh.service_manager.ServiceManager.service_is_active') - def test_get_stonith_watchdog_timeout(self, mock_active, mock_get_property): - mock_active.return_value = True - mock_get_property.return_value = "60s" - res = sbd.SBDTimeout.get_stonith_watchdog_timeout() - assert res == 60 - mock_active.assert_called_once_with("pacemaker.service") - - @mock.patch('logging.Logger.debug') - @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') - @mock.patch('crmsh.utils.detect_virt') - @mock.patch('crmsh.sbd.SBDTimeout.get_sbd_delay_start_expected') - @mock.patch('crmsh.utils.get_pcmk_delay_max') - @mock.patch('crmsh.sbd.SBDTimeout.get_sbd_msgwait') - @mock.patch('crmsh.sbd.SBDManager.get_sbd_device_from_config') - @mock.patch('crmsh.utils.is_2node_cluster_without_qdevice') - def test_load_configurations(self, mock_2node, mock_get_sbd_dev, mock_get_msgwait, mock_pcmk_delay, mock_delay_expected, mock_detect, mock_get_sbd_value, mock_debug): - mock_2node.return_value = True - mock_debug.return_value = False - mock_get_sbd_value.return_value = "no" - mock_get_sbd_dev.return_value = ["/dev/sda1"] - mock_get_msgwait.return_value = 30 - mock_pcmk_delay.return_value = 30 - - self.sbd_timeout_inst._load_configurations() - - mock_2node.assert_called_once_with() - mock_get_sbd_dev.assert_called_once_with() - mock_get_msgwait.assert_called_once_with("/dev/sda1") - mock_pcmk_delay.assert_called_once_with(True) - - @mock.patch('logging.Logger.debug') - @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') - @mock.patch('crmsh.utils.detect_virt') - @mock.patch('crmsh.sbd.SBDTimeout.get_sbd_delay_start_expected') - @mock.patch('crmsh.sbd.SBDTimeout.get_stonith_watchdog_timeout') - @mock.patch('crmsh.sbd.SBDTimeout.get_sbd_watchdog_timeout') - @mock.patch('crmsh.sbd.SBDManager.get_sbd_device_from_config') - @mock.patch('crmsh.utils.is_2node_cluster_without_qdevice') - def test_load_configurations_diskless(self, mock_2node, mock_get_sbd_dev, mock_get_watchdog_timeout, mock_get_stonith_watchdog_timeout, mock_delay_expected, mock_detect, mock_get_sbd_value, mock_debug): - mock_2node.return_value = True - mock_debug.return_value = False - mock_get_sbd_value.return_value = "no" - mock_get_sbd_dev.return_value = [] - mock_get_watchdog_timeout.return_value = 30 - mock_get_stonith_watchdog_timeout.return_value = 30 - - self.sbd_timeout_inst._load_configurations() - - mock_2node.assert_called_once_with() - mock_get_sbd_dev.assert_called_once_with() - mock_get_watchdog_timeout.assert_called_once_with() - mock_get_stonith_watchdog_timeout.assert_called_once_with() - - @mock.patch('crmsh.corosync.token_and_consensus_timeout') - @mock.patch('logging.Logger.debug') - def test_get_stonith_timeout_expected(self, mock_debug, mock_general): - self.sbd_timeout_inst.disk_based = True - self.sbd_timeout_inst.pcmk_delay_max = 30 - self.sbd_timeout_inst.msgwait = 30 - mock_general.return_value = 11 - res = self.sbd_timeout_inst.get_stonith_timeout_expected() - assert res == 83 - - @mock.patch('crmsh.corosync.token_and_consensus_timeout') - @mock.patch('logging.Logger.debug') - def test_get_stonith_timeout_expected_diskless(self, mock_debug, mock_general): - self.sbd_timeout_inst.disk_based = False - self.sbd_timeout_inst.stonith_watchdog_timeout = -1 - self.sbd_timeout_inst.sbd_watchdog_timeout = 15 - mock_general.return_value = 11 - res = self.sbd_timeout_inst.get_stonith_timeout_expected() - assert res == 71 - - @mock.patch('crmsh.corosync.token_and_consensus_timeout') - def test_get_sbd_delay_start_expected(self, mock_corosync): - mock_corosync.return_value = 30 - self.sbd_timeout_inst.disk_based = True - self.sbd_timeout_inst.pcmk_delay_max = 30 - self.sbd_timeout_inst.msgwait = 30 - res = self.sbd_timeout_inst.get_sbd_delay_start_expected() - assert res == 90 - - @mock.patch('crmsh.corosync.token_and_consensus_timeout') - def test_get_sbd_delay_start_expected_diskless(self, mock_corosync): - mock_corosync.return_value = 30 - self.sbd_timeout_inst.disk_based = False - self.sbd_timeout_inst.sbd_watchdog_timeout = 30 - res = self.sbd_timeout_inst.get_sbd_delay_start_expected() - assert res == 90 - - @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') - def test_is_sbd_delay_start(self, mock_get_sbd_value): - mock_get_sbd_value.return_value = "100" - assert sbd.SBDTimeout.is_sbd_delay_start() is True - mock_get_sbd_value.assert_called_once_with("SBD_DELAY_START") - - @mock.patch('crmsh.sbd.SBDManager.update_configuration') - def test_adjust_sbd_delay_start_return(self, mock_update): - self.sbd_timeout_inst.sbd_delay_start_value_expected = 100 - self.sbd_timeout_inst.sbd_delay_start_value_from_config = "100" - self.sbd_timeout_inst.adjust_sbd_delay_start() - mock_update.assert_not_called() - - @mock.patch('crmsh.sbd.SBDManager.update_configuration') - def test_adjust_sbd_delay_start(self, mock_update): - self.sbd_timeout_inst.sbd_delay_start_value_expected = 100 - self.sbd_timeout_inst.sbd_delay_start_value_from_config = "no" - self.sbd_timeout_inst.adjust_sbd_delay_start() - mock_update.assert_called_once_with({"SBD_DELAY_START": "100"}) - - @mock.patch('crmsh.sh.ClusterShell.get_stdout_or_raise_error') - @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') - def test_adjust_systemd_start_timeout_no_delay_start_no(self, mock_get_sbd_value, mock_run): - mock_get_sbd_value.return_value = "no" - self.sbd_timeout_inst.adjust_systemd_start_timeout() - mock_run.assert_not_called() - - @mock.patch('crmsh.utils.mkdirp') - @mock.patch('crmsh.utils.get_systemd_timeout_start_in_sec') - @mock.patch('crmsh.sh.ClusterShell.get_stdout_or_raise_error') - @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') - def test_adjust_systemd_start_timeout_no_delay_start_return(self, mock_get_sbd_value, mock_run, mock_get_systemd_sec, mock_mkdirp): - mock_get_sbd_value.return_value = "10" - mock_run.return_value = "1min 30s" - mock_get_systemd_sec.return_value = 90 - self.sbd_timeout_inst.adjust_systemd_start_timeout() - mock_run.assert_called_once_with("systemctl show -p TimeoutStartUSec sbd --value") - mock_get_systemd_sec.assert_called_once_with("1min 30s") - mock_mkdirp.assert_not_called() - - @mock.patch('crmsh.utils.cluster_run_cmd') - @mock.patch('crmsh.bootstrap.sync_file') - @mock.patch('crmsh.utils.str2file') - @mock.patch('crmsh.utils.mkdirp') - @mock.patch('crmsh.utils.get_systemd_timeout_start_in_sec') - @mock.patch('crmsh.sh.ClusterShell.get_stdout_or_raise_error') - @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') - def test_adjust_systemd_start_timeout_no_delay_start(self, mock_get_sbd_value, mock_run, mock_get_systemd_sec, mock_mkdirp, mock_str2file, mock_csync2, mock_cluster_run): - mock_get_sbd_value.return_value = "100" - mock_run.return_value = "1min 30s" - mock_get_systemd_sec.return_value = 90 - self.sbd_timeout_inst.adjust_systemd_start_timeout() - mock_run.assert_called_once_with("systemctl show -p TimeoutStartUSec sbd --value") - mock_get_systemd_sec.assert_called_once_with("1min 30s") - mock_mkdirp.assert_called_once_with(bootstrap.SBD_SYSTEMD_DELAY_START_DIR) - mock_str2file.assert_called_once_with('[Service]\nTimeoutSec=120', '/etc/systemd/system/sbd.service.d/sbd_delay_start.conf') - mock_csync2.assert_called_once_with(bootstrap.SBD_SYSTEMD_DELAY_START_DIR) - mock_cluster_run.assert_called_once_with("systemctl daemon-reload") - - @mock.patch('crmsh.sbd.SBDTimeout.get_sbd_watchdog_timeout') - @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') - def test_get_sbd_delay_start_sec_from_sysconfig_yes(self, mock_get_sbd_value, mock_get_sbd_timeout): - mock_get_sbd_value.return_value = "yes" - mock_get_sbd_timeout.return_value = 30 - assert sbd.SBDTimeout.get_sbd_delay_start_sec_from_sysconfig() == 60 - mock_get_sbd_value.assert_called_once_with("SBD_DELAY_START") - - @mock.patch('crmsh.sbd.SBDManager.get_sbd_value_from_config') - def test_get_sbd_delay_start_sec_from_sysconfig(self, mock_get_sbd_value): - mock_get_sbd_value.return_value = "30" - assert sbd.SBDTimeout.get_sbd_delay_start_sec_from_sysconfig() == 30 - mock_get_sbd_value.assert_called_once_with("SBD_DELAY_START") + @patch('crmsh.utils.is_block_device') + @patch('crmsh.sbd.SBDUtils.compare_device_uuid') + def test_verify_sbd_device_non_block(self, mock_compare_device_uuid, mock_is_block_device): + mock_is_block_device.return_value = False + with self.assertRaises(ValueError): + SBDUtils.verify_sbd_device(["/dev/not_a_block_device"]) + + @patch('crmsh.utils.is_block_device') + @patch('crmsh.sbd.SBDUtils.compare_device_uuid') + def test_verify_sbd_device_valid(self, mock_compare_device_uuid, mock_is_block_device): + mock_is_block_device.return_value = True + SBDUtils.verify_sbd_device(["/dev/sbd_device"], ["node1", "node2"]) + + @patch('crmsh.utils.parse_sysconfig') + def test_get_sbd_value_from_config(self, mock_parse_sysconfig): + mock_parse_sysconfig.return_value = {'SBD_DEVICE': '/dev/sbd_device'} + result = SBDUtils.get_sbd_value_from_config("SBD_DEVICE") + self.assertEqual(result, '/dev/sbd_device') + + @patch('crmsh.sbd.SBDUtils.get_sbd_value_from_config') + def test_get_sbd_device_from_config(self, mock_get_sbd_value_from_config): + mock_get_sbd_value_from_config.return_value = '/dev/sbd_device;/dev/another_sbd_device' + result = SBDUtils.get_sbd_device_from_config() + self.assertEqual(result, ['/dev/sbd_device', '/dev/another_sbd_device']) + + @patch('crmsh.sbd.SBDUtils.get_sbd_device_from_config') + @patch('crmsh.service_manager.ServiceManager.service_is_active') + def test_is_using_diskless_sbd(self, mock_service_is_active, mock_get_sbd_device_from_config): + mock_get_sbd_device_from_config.return_value = [] + mock_service_is_active.return_value = True + result = SBDUtils.is_using_diskless_sbd() + self.assertTrue(result) + + @patch('crmsh.sbd.SBDUtils.get_sbd_device_from_config') + @patch('crmsh.service_manager.ServiceManager.service_is_active') + def test_is_using_disk_based_sbd(self, mock_service_is_active, mock_get_sbd_device_from_config): + mock_get_sbd_device_from_config.return_value = ['/dev/sbd_device'] + mock_service_is_active.return_value = True + result = SBDUtils.is_using_disk_based_sbd() + self.assertTrue(result) + + @patch('crmsh.sbd.ShellUtils.get_stdout_stderr') + def test_has_sbd_device_already_initialized(self, mock_get_stdout_stderr): + mock_get_stdout_stderr.return_value = (0, '', '') + result = SBDUtils.has_sbd_device_already_initialized('/dev/sbd_device') + self.assertTrue(result) + + @patch('crmsh.bootstrap.confirm') + @patch('crmsh.sbd.SBDUtils.has_sbd_device_already_initialized') + def test_no_overwrite_device_check(self, mock_has_sbd_device_already_initialized, mock_confirm): + mock_has_sbd_device_already_initialized.return_value = True + mock_confirm.return_value = False + result = SBDUtils.no_overwrite_device_check('/dev/sbd_device') + self.assertTrue(result) + + @patch('crmsh.sbd.SBDUtils.get_sbd_device_metadata') + def test_check_devices_metadata_consistent_single_device(self, mock_get_sbd_device_metadata): + dev_list = ['/dev/sbd_device'] + result = SBDUtils.check_devices_metadata_consistent(dev_list) + self.assertTrue(result) + + @patch('crmsh.sbd.SBDUtils.get_sbd_device_metadata') + def test_check_devices_metadata_consistent_multiple_devices_consistent(self, mock_get_sbd_device_metadata): + dev_list = ['/dev/sbd_device1', '/dev/sbd_device2'] + mock_get_sbd_device_metadata.side_effect = ['metadata1', 'metadata1'] + result = SBDUtils.check_devices_metadata_consistent(dev_list) + self.assertTrue(result) + + @patch('crmsh.sbd.SBDUtils.get_sbd_device_metadata') + @patch('logging.Logger.warning') + def test_check_devices_metadata_consistent_multiple_devices_inconsistent(self, mock_logger_warning, mock_get_sbd_device_metadata): + dev_list = ['/dev/sbd_device1', '/dev/sbd_device2'] + mock_get_sbd_device_metadata.side_effect = ['metadata1', 'metadata2'] + result = SBDUtils.check_devices_metadata_consistent(dev_list) + self.assertFalse(result) + mock_logger_warning.assert_called() + + @patch('crmsh.sbd.SBDUtils.check_devices_metadata_consistent') + @patch('crmsh.sbd.SBDUtils.no_overwrite_device_check') + def test_handle_input_sbd_devices_exception(self, mock_no_overwrite_device_check, mock_check_devices_metadata_consistent): + mock_no_overwrite_device_check.return_value = True + mock_check_devices_metadata_consistent.return_value = False + with self.assertRaises(utils.TerminateSubCommand): + SBDUtils.handle_input_sbd_devices(['/dev/sbd1'], dev_list_from_config=['/dev/sbd2']) + mock_no_overwrite_device_check.assert_called_once_with('/dev/sbd1') + mock_check_devices_metadata_consistent.assert_called_once_with(['/dev/sbd2', '/dev/sbd1']) + + @patch('crmsh.sbd.SBDUtils.check_devices_metadata_consistent') + @patch('crmsh.sbd.SBDUtils.no_overwrite_device_check') + def test_handle_input_sbd_devices(self, mock_no_overwrite_device_check, mock_check_devices_metadata_consistent): + mock_no_overwrite_device_check.return_value = False + _list1, _list2 = SBDUtils.handle_input_sbd_devices(['/dev/sbd1']) + self.assertEqual(_list1, ['/dev/sbd1']) + self.assertEqual(_list2, []) + mock_no_overwrite_device_check.assert_called_once_with('/dev/sbd1') + mock_check_devices_metadata_consistent.assert_not_called() -class TestSBDManager(unittest.TestCase): +class TestSBDTimeout(unittest.TestCase): """ - Unitary tests for crmsh.sbd.SBDManager + Unitary tests for crmsh.sbd.SBDTimeout """ + @patch('crmsh.sbd.SBDUtils.get_sbd_device_metadata') + def test_get_sbd_msgwait_exception(self, mock_get_sbd_device_metadata): + mock_get_sbd_device_metadata.return_value = {} + with self.assertRaises(ValueError) as context: + sbd.SBDTimeout.get_sbd_msgwait("/dev/sbd_device") + self.assertTrue("Cannot get sbd msgwait for /dev/sbd_device" in str(context.exception)) + + @patch('crmsh.sbd.SBDUtils.get_sbd_device_metadata') + def test_get_sbd_msgwait(self, mock_get_sbd_device_metadata): + mock_get_sbd_device_metadata.return_value = {'msgwait': 10} + result = sbd.SBDTimeout.get_sbd_msgwait("/dev/sbd_device") + self.assertEqual(result, 10) + + @patch('crmsh.sbd.SBDUtils.get_sbd_value_from_config') + def test_get_sbd_watchdog_timeout_exception(self, mock_get_sbd_value_from_config): + mock_get_sbd_value_from_config.return_value = None + with self.assertRaises(ValueError) as context: + sbd.SBDTimeout.get_sbd_watchdog_timeout() + self.assertTrue("Cannot get the value of SBD_WATCHDOG_TIMEOUT" in str(context.exception)) + + @patch('crmsh.sbd.SBDUtils.get_sbd_value_from_config') + def test_get_sbd_watchdog_timeout(self, mock_get_sbd_value_from_config): + mock_get_sbd_value_from_config.return_value = 5 + result = sbd.SBDTimeout.get_sbd_watchdog_timeout() + self.assertEqual(result, 5) + + @patch('crmsh.sbd.ServiceManager') + def test_get_stonith_watchdog_timeout_default(self, mock_ServiceManager): + mock_ServiceManager.return_value.service_is_active = MagicMock(return_value=False) + result = sbd.SBDTimeout.get_stonith_watchdog_timeout() + self.assertEqual(result, sbd.SBDTimeout.STONITH_WATCHDOG_TIMEOUT_DEFAULT) + + @patch('crmsh.utils.get_property') + @patch('crmsh.sbd.ServiceManager') + def test_get_stonith_watchdog_timeout(self, mock_ServiceManager, mock_get_property): + mock_ServiceManager.return_value.service_is_active = MagicMock(return_value=True) + mock_get_property.return_value = "5" + result = sbd.SBDTimeout.get_stonith_watchdog_timeout() + self.assertEqual(result, 5) + + @patch('crmsh.sbd.SBDTimeout.get_sbd_watchdog_timeout') + @patch('crmsh.utils.is_boolean_true') + @patch('crmsh.sbd.SBDUtils.get_sbd_value_from_config') + def test_get_sbd_delay_start_sec_from_sysconfig_yes(self, mock_get_sbd_value_from_config, mock_is_boolen_true, mock_get_sbd_watchdog_timeout): + mock_get_sbd_value_from_config.return_value = "yes" + mock_is_boolen_true.return_value = True + mock_get_sbd_watchdog_timeout.return_value = 10 + result = sbd.SBDTimeout.get_sbd_delay_start_sec_from_sysconfig() + self.assertEqual(result, 20) + + @patch('crmsh.utils.is_boolean_true') + @patch('crmsh.sbd.SBDUtils.get_sbd_value_from_config') + def test_get_sbd_delay_start_sec_from_sysconfig(self, mock_get_sbd_value_from_config, mock_is_boolen_true): + mock_get_sbd_value_from_config.return_value = 100 + mock_is_boolen_true.return_value = False + result = sbd.SBDTimeout.get_sbd_delay_start_sec_from_sysconfig() + self.assertEqual(result, 100) + + @patch('crmsh.sbd.SBDUtils.get_sbd_value_from_config') + def test_is_sbd_delay_start(self, mock_get_sbd_value_from_config): + mock_get_sbd_value_from_config.return_value = "yes" + result = sbd.SBDTimeout.is_sbd_delay_start() + self.assertTrue(result) + + @patch('crmsh.utils.get_systemd_timeout_start_in_sec') + @patch('crmsh.sh.cluster_shell') + def test_get_sbd_systemd_start_timeout(self, mock_cluster_shell, mock_get_systemd_timeout_start_in_sec): + mock_cluster_shell.return_value.get_stdout_or_raise_error = MagicMock(return_value="1min 30s") + mock_get_systemd_timeout_start_in_sec.return_value = 90 + result = sbd.SBDTimeout.get_sbd_systemd_start_timeout() + self.assertEqual(result, 90) + mock_cluster_shell.return_value.get_stdout_or_raise_error.assert_called_once_with(sbd.SBDTimeout.SHOW_SBD_START_TIMEOUT_CMD) + mock_get_systemd_timeout_start_in_sec.assert_called_once_with("1min 30s") + + @patch('crmsh.sbd.SBDTimeout.adjust_systemd_start_timeout') + @patch('crmsh.sbd.SBDTimeout.adjust_stonith_timeout') + @patch('crmsh.sbd.SBDTimeout.adjust_sbd_delay_start') + @patch('crmsh.sbd.SBDTimeout._load_configurations') + def test_adjust_sbd_timeout_related_cluster_configuration(self, mock_load_configurations, mock_adjust_sbd_delay_start, mock_adjust_stonith_timeout, mock_adjust_systemd_start_timeout): + sbd.SBDTimeout.adjust_sbd_timeout_related_cluster_configuration() + mock_load_configurations.assert_called_once() + mock_adjust_sbd_delay_start.assert_called_once() + mock_adjust_stonith_timeout.assert_called_once() + mock_adjust_systemd_start_timeout.assert_called_once() + + @patch('crmsh.sbd.SBDManager.update_sbd_configuration') + def test_adjust_sbd_delay_start_return(self, mock_update_sbd_configuration): + inst = sbd.SBDTimeout() + inst.sbd_delay_start_value_expected = 100 + inst.sbd_delay_start_value_from_config = "100" + inst.adjust_sbd_delay_start() + mock_update_sbd_configuration.assert_not_called() + + @patch('crmsh.sbd.SBDManager.update_sbd_configuration') + def test_adjust_sbd_delay_start(self, mock_update_sbd_configuration): + inst = sbd.SBDTimeout() + inst.sbd_delay_start_value_expected = "no" + inst.sbd_delay_start_value_from_config = 200 + inst.adjust_sbd_delay_start() + mock_update_sbd_configuration.assert_called_once_with({'SBD_DELAY_START': 'no'}) + + @patch('crmsh.utils.set_property') + def test_adjust_stonith_timeout(self, mock_set_property): + inst = sbd.SBDTimeout() + inst.get_stonith_timeout_expected = MagicMock(return_value=10) + inst.adjust_stonith_timeout() + mock_set_property.assert_called_once_with("stonith-timeout", 10, conditional=True) + + @patch('crmsh.sbd.SBDTimeout.get_sbd_systemd_start_timeout') + @patch('crmsh.sbd.SBDUtils.get_sbd_value_from_config') + def test_adjust_systemd_start_timeout_no_delay_start(self, mock_get_sbd_value_from_config, mock_get_sbd_systemd_start_timeout): + mock_get_sbd_value_from_config.return_value = "no" + inst = sbd.SBDTimeout() + inst.adjust_systemd_start_timeout() + mock_get_sbd_value_from_config.assert_called_once_with("SBD_DELAY_START") + mock_get_sbd_systemd_start_timeout.assert_not_called() + + @patch('crmsh.sbd.SBDTimeout.get_sbd_systemd_start_timeout') + @patch('crmsh.sbd.SBDUtils.get_sbd_value_from_config') + def test_adjust_systemd_start_timeout_return(self, mock_get_sbd_value_from_config, mock_get_sbd_systemd_start_timeout): + mock_get_sbd_value_from_config.return_value = "10" + mock_get_sbd_systemd_start_timeout.return_value = 90 + inst = sbd.SBDTimeout() + inst.adjust_systemd_start_timeout() + mock_get_sbd_value_from_config.assert_called_once_with("SBD_DELAY_START") + mock_get_sbd_systemd_start_timeout.assert_called_once() + + @patch('crmsh.utils.cluster_run_cmd') + @patch('crmsh.bootstrap.sync_file') + @patch('crmsh.utils.str2file') + @patch('crmsh.utils.mkdirp') + @patch('crmsh.sbd.SBDTimeout.get_sbd_systemd_start_timeout') + @patch('crmsh.sbd.SBDUtils.get_sbd_value_from_config') + def test_adjust_systemd_start_timeout(self, mock_get_sbd_value_from_config, mock_get_sbd_systemd_start_timeout, mock_mkdirp, mock_str2file, mock_sync_file, mock_cluster_run_cmd): + mock_get_sbd_value_from_config.return_value = "150" + mock_get_sbd_systemd_start_timeout.return_value = 90 + inst = sbd.SBDTimeout() + inst.adjust_systemd_start_timeout() + mock_get_sbd_value_from_config.assert_called_once_with("SBD_DELAY_START") + mock_get_sbd_systemd_start_timeout.assert_called_once() + mock_mkdirp.assert_called_once_with(sbd.SBDManager.SBD_SYSTEMD_DELAY_START_DIR) + mock_cluster_run_cmd.assert_called_once_with("systemctl daemon-reload") + + @patch('crmsh.corosync.token_and_consensus_timeout') + def test_get_sbd_delay_start_expected_diskbased(self, mock_token_and_consensus_timeout): + inst = sbd.SBDTimeout() + inst.disk_based = True + inst.pcmk_delay_max = 10 + inst.msgwait = 5 + mock_token_and_consensus_timeout.return_value = 10 + self.assertEqual(inst.get_sbd_delay_start_expected(), 25) + + @patch('crmsh.corosync.token_and_consensus_timeout') + def test_get_sbd_delay_start_expected_diskless(self, mock_token_and_consensus_timeout): + inst = sbd.SBDTimeout() + inst.disk_based = False + inst.sbd_watchdog_timeout = 5 + mock_token_and_consensus_timeout.return_value = 10 + self.assertEqual(inst.get_sbd_delay_start_expected(), 20) + + @patch('crmsh.sbd.SBDTimeout.get_stonith_timeout_expected') + @patch('crmsh.sbd.SBDTimeout._load_configurations') + def test_get_stonith_timeout(self, mock_load_configurations, mock_get_sbd_delay_start_expected): + mock_get_sbd_delay_start_expected.return_value = 10 + res = sbd.SBDTimeout.get_stonith_timeout() + self.assertEqual(res, 10) + mock_load_configurations.assert_called_once() + mock_get_sbd_delay_start_expected.assert_called_once() + + @patch('logging.Logger.debug') + @patch('crmsh.corosync.token_and_consensus_timeout') + def test_get_stonith_timeout_expected_diskbased(self, mock_token_and_consensus_timeout, mock_logger_debug): + inst = sbd.SBDTimeout() + inst.disk_based = True + inst.pcmk_delay_max = 10 + inst.msgwait = 5 + mock_token_and_consensus_timeout.return_value = 10 + result = inst.get_stonith_timeout_expected() + self.assertEqual(result, 70) + + @patch('logging.Logger.debug') + @patch('crmsh.corosync.token_and_consensus_timeout') + def test_get_stonith_timeout_expected_diskless(self, mock_token_and_consensus_timeout, mock_logger_debug): + inst = sbd.SBDTimeout() + inst.disk_based = False + inst.sbd_watchdog_timeout = 5 + inst.stonith_watchdog_timeout = 20 + mock_token_and_consensus_timeout.return_value = 20 + result = inst.get_stonith_timeout_expected() + self.assertEqual(result, 80) - @classmethod - def setUpClass(cls): - """ - Global setUp. - """ - - def setUp(self): - """ - Test setUp. - """ - self.sbd_inst = sbd.SBDManager(mock.Mock(sbd_devices=["/dev/sdb1", "/dev/sdc1"], diskless_sbd=False)) - self.sbd_inst_devices_gt_3 = sbd.SBDManager(mock.Mock(sbd_devices=["/dev/sdb1", "/dev/sdc1", "/dev/sdd1", "/dev/sde1"])) - self.sbd_inst_interactive = sbd.SBDManager(mock.Mock(sbd_devices=[], diskless_sbd=False)) - self.sbd_inst_diskless = sbd.SBDManager(mock.Mock(sbd_devices=[], diskless_sbd=True)) - - def tearDown(self): - """ - Test tearDown. - """ - @classmethod - def tearDownClass(cls): - """ - Global tearDown. - """ +class TestSBDManager(unittest.TestCase): - @mock.patch('logging.Logger.warning') - def test_get_sbd_device_interactive_yes_to_all(self, mock_warn): - self.sbd_inst._context = mock.Mock(yes_to_all=True) - self.sbd_inst._get_sbd_device_interactive() - mock_warn.assert_called_once_with(sbd.SBDManager.SBD_WARNING) - - @mock.patch('crmsh.bootstrap.confirm') - @mock.patch('logging.Logger.info') - @mock.patch('logging.Logger.warning') - def test_get_sbd_device_interactive_not_confirm(self, mock_warn, mock_status, mock_confirm): - self.sbd_inst._context.yes_to_all = False + def test_convert_timeout_dict_to_opt_str(self): + timeout_dict = {'watchdog': 5, 'msgwait': 10} + result = SBDManager.convert_timeout_dict_to_opt_str(timeout_dict) + self.assertEqual(result, '-1 5 -4 10') + + @patch('logging.Logger.info') + @patch('crmsh.sbd.ServiceManager') + @patch('crmsh.utils.list_cluster_nodes') + def test_enable_sbd_service(self, mock_list_cluster_nodes, mock_ServiceManager, mock_logger_info): + mock_list_cluster_nodes.return_value = ['node1', 'node2'] + mock_ServiceManager.return_value.service_is_enabled.side_effect = [False, False] + SBDManager.enable_sbd_service() + mock_logger_info.assert_has_calls([ + call("Enable %s on node %s", constants.SBD_SERVICE, 'node1'), + call("Enable %s on node %s", constants.SBD_SERVICE, 'node2') + ]) + + @patch('crmsh.xmlutil.CrmMonXmlParser') + @patch('crmsh.sbd.ServiceManager') + def test_restart_cluster_if_possible_return(self, mock_ServiceManager, mock_CrmMonXmlParser): + mock_ServiceManager.return_value.service_is_active.return_value = False + SBDManager.restart_cluster_if_possible() + mock_ServiceManager.return_value.service_is_active.assert_called_once_with(constants.PCMK_SERVICE) + mock_CrmMonXmlParser.assert_not_called() + + @patch('logging.Logger.warning') + @patch('crmsh.xmlutil.CrmMonXmlParser') + @patch('crmsh.sbd.ServiceManager') + def test_restart_cluster_if_possible_manually(self, mock_ServiceManager, mock_CrmMonXmlParser, mock_logger_warning): + mock_ServiceManager.return_value.service_is_active.return_value = True + mock_CrmMonXmlParser.return_value.is_any_resource_running.return_value = True + SBDManager.restart_cluster_if_possible() + mock_ServiceManager.return_value.service_is_active.assert_called_once_with(constants.PCMK_SERVICE) + mock_logger_warning.assert_called_once_with("Resource is running, need to restart cluster service manually on each node") + + @patch('crmsh.bootstrap.restart_cluster') + @patch('logging.Logger.warning') + @patch('crmsh.xmlutil.CrmMonXmlParser') + @patch('crmsh.sbd.ServiceManager') + def test_restart_cluster_if_possible(self, mock_ServiceManager, mock_CrmMonXmlParser, mock_logger_warning, mock_restart_cluster): + mock_ServiceManager.return_value.service_is_active.return_value = True + mock_CrmMonXmlParser.return_value.is_any_resource_running.return_value = False + SBDManager.restart_cluster_if_possible() + mock_ServiceManager.return_value.service_is_active.assert_called_once_with(constants.PCMK_SERVICE) + mock_restart_cluster.assert_called_once() + + @patch('crmsh.bootstrap.prompt_for_string') + def test_prompt_for_sbd_device_diskless(self, mock_prompt_for_string): + mock_prompt_for_string.return_value = "none" + sbdmanager_instance = SBDManager() + result = sbdmanager_instance._prompt_for_sbd_device() + self.assertEqual(result, []) + + @patch('crmsh.bootstrap.confirm') + @patch('logging.Logger.warning') + @patch('crmsh.sbd.SBDUtils.has_sbd_device_already_initialized') + @patch('logging.Logger.error') + @patch('crmsh.sbd.SBDUtils.verify_sbd_device') + @patch('crmsh.utils.re_split_string') + @patch('crmsh.bootstrap.prompt_for_string') + def test_prompt_for_sbd_device(self, mock_prompt_for_string, mock_re_split_string, mock_verify_sbd_device, mock_logger_error, mock_has_sbd_device_already_initialized, mock_logger_warning, mock_confirm): + mock_prompt_for_string.side_effect = ["/dev/dev1", "/dev/dev2", "/dev/dev3;/dev/dev4"] + mock_re_split_string.side_effect = [["/dev/dev1"], ["/dev/dev2"], ["/dev/dev3", "/dev/dev4"]] + mock_verify_sbd_device.side_effect = [ValueError("Not a block device"), None, None] + mock_has_sbd_device_already_initialized.side_effect = [False, True, False] + mock_confirm.side_effect = [False, True] + sbdmanager_instance = SBDManager() + result = sbdmanager_instance._prompt_for_sbd_device() + self.assertEqual(result, ["/dev/dev3", "/dev/dev4"]) + + @patch('crmsh.sbd.ServiceManager') + def test_get_sbd_device_interactive_yes_to_all(self, mock_ServiceManager): + mock_bootstrap_ctx = Mock(yes_to_all=True) + sbdmanager_instance = SBDManager(bootstrap_context=mock_bootstrap_ctx) + sbdmanager_instance._warn_and_raise_no_sbd = Mock() + sbdmanager_instance._warn_and_raise_no_sbd.side_effect = SBDManager.NotConfigSBD + with self.assertRaises(SBDManager.NotConfigSBD): + sbdmanager_instance.get_sbd_device_interactive() + sbdmanager_instance._warn_and_raise_no_sbd.assert_called_once() + + @patch('crmsh.bootstrap.confirm') + @patch('logging.Logger.info') + @patch('crmsh.sbd.ServiceManager') + def test_get_sbd_device_interactive_not_wish(self, mock_ServiceManager, mock_logger_info, mock_confirm): + mock_bootstrap_ctx = Mock(yes_to_all=False) mock_confirm.return_value = False - self.sbd_inst._get_sbd_device_interactive() - mock_status.assert_called_once_with(sbd.SBDManager.SBD_STATUS_DESCRIPTION) - mock_warn.assert_called_once_with("Not configuring SBD - STONITH will be disabled.") - - @mock.patch('crmsh.sbd.SBDManager._no_overwrite_check') - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') - @mock.patch('crmsh.bootstrap.confirm') - @mock.patch('logging.Logger.info') - def test_get_sbd_device_interactive_already_configured(self, mock_status, mock_confirm, mock_from_config, mock_no_overwrite): - self.sbd_inst._context = mock.Mock(yes_to_all=False) + sbdmanager_instance = SBDManager(bootstrap_context=mock_bootstrap_ctx) + sbdmanager_instance._warn_and_raise_no_sbd = Mock() + sbdmanager_instance._warn_and_raise_no_sbd.side_effect = SBDManager.NotConfigSBD + with self.assertRaises(SBDManager.NotConfigSBD): + sbdmanager_instance.get_sbd_device_interactive() + mock_logger_info.assert_called_once_with(SBDManager.SBD_STATUS_DESCRIPTION) + sbdmanager_instance._warn_and_raise_no_sbd.assert_called_once() + + @patch('crmsh.utils.fatal') + @patch('crmsh.utils.package_is_installed') + @patch('crmsh.bootstrap.confirm') + @patch('logging.Logger.info') + @patch('crmsh.sbd.ServiceManager') + def test_get_sbd_device_interactive_not_installed(self, mock_ServiceManager, mock_logger_info, mock_confirm, mock_package_is_installed, mock_fatal): + mock_bootstrap_ctx = Mock(yes_to_all=False) mock_confirm.return_value = True - mock_from_config.return_value = ["/dev/sda1"] - mock_no_overwrite.return_value = True - - res = self.sbd_inst._get_sbd_device_interactive() - self.assertEqual(res, ["/dev/sda1"]) - - mock_status.assert_called_once_with(sbd.SBDManager.SBD_STATUS_DESCRIPTION) - mock_confirm.assert_has_calls([ - mock.call("Do you wish to use SBD?"), - ]) - mock_status.assert_called_once_with(sbd.SBDManager.SBD_STATUS_DESCRIPTION) - mock_from_config.assert_called_once_with() - - @mock.patch('crmsh.bootstrap.prompt_for_string') - @mock.patch('crmsh.sbd.SBDManager._no_overwrite_check') - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') - @mock.patch('crmsh.bootstrap.confirm') - @mock.patch('logging.Logger.info') - def test_get_sbd_device_interactive_diskless(self, mock_status, mock_confirm, mock_from_config, mock_no_overwrite, mock_prompt): - self.sbd_inst._context = mock.Mock(yes_to_all=False) + mock_package_is_installed.return_value = False + mock_fatal.side_effect = ValueError("SBD is not installed") + sbdmanager_instance = SBDManager(bootstrap_context=mock_bootstrap_ctx) + with self.assertRaises(ValueError): + sbdmanager_instance.get_sbd_device_interactive() + mock_logger_info.assert_called_once_with(SBDManager.SBD_STATUS_DESCRIPTION) + mock_fatal.assert_called_once_with(SBDManager.SBD_NOT_INSTALLED_MSG) + + @patch('crmsh.sbd.SBDUtils.get_sbd_device_from_config') + @patch('crmsh.utils.package_is_installed') + @patch('crmsh.bootstrap.confirm') + @patch('logging.Logger.info') + @patch('crmsh.sbd.ServiceManager') + def test_get_sbd_device_interactive_not_overwrite(self, mock_ServiceManager, mock_logger_info, mock_confirm, mock_package_is_installed, mock_get_sbd_device_from_config): + mock_bootstrap_ctx = Mock(yes_to_all=False) mock_confirm.return_value = True - mock_no_overwrite.return_value = False - mock_from_config.return_value = [] - mock_prompt.return_value = "none" - - self.sbd_inst._get_sbd_device_interactive() - - mock_status.assert_called_once_with(sbd.SBDManager.SBD_STATUS_DESCRIPTION) - mock_from_config.assert_called_once_with() - mock_prompt.assert_called_once_with('Path to storage device (e.g. /dev/disk/by-id/...), or "none" for diskless sbd, use ";" as separator for multi path', 'none|\\/.*') - - @mock.patch('crmsh.bootstrap.prompt_for_string') - @mock.patch('crmsh.sbd.SBDManager._no_overwrite_check') - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') - @mock.patch('crmsh.bootstrap.confirm') - @mock.patch('logging.Logger.info') - def test_get_sbd_device_interactive_null_and_diskless(self, mock_status, mock_confirm, mock_from_config, mock_no_overwrite, mock_prompt): - self.sbd_inst._context = mock.Mock(yes_to_all=False) + mock_package_is_installed.return_value = True + mock_get_sbd_device_from_config.return_value = ['/dev/sbd_device'] + sbdmanager_instance = SBDManager(bootstrap_context=mock_bootstrap_ctx) + sbdmanager_instance._wants_to_overwrite = Mock() + sbdmanager_instance._wants_to_overwrite.return_value = False + self.assertEqual(sbdmanager_instance.get_sbd_device_interactive(), []) + mock_logger_info.assert_called_once_with(SBDManager.SBD_STATUS_DESCRIPTION) + sbdmanager_instance._wants_to_overwrite.assert_called_once_with(['/dev/sbd_device']) + + @patch('crmsh.sbd.SBDUtils.get_sbd_device_from_config') + @patch('crmsh.utils.package_is_installed') + @patch('crmsh.bootstrap.confirm') + @patch('logging.Logger.info') + @patch('crmsh.sbd.ServiceManager') + def test_get_sbd_device_interactive(self, mock_ServiceManager, mock_logger_info, mock_confirm, mock_package_is_installed, mock_get_sbd_device_from_config): + mock_bootstrap_ctx = Mock(yes_to_all=False) mock_confirm.return_value = True - mock_no_overwrite.return_value = False - mock_from_config.return_value = [] - mock_prompt.return_value = "none" - - self.sbd_inst._get_sbd_device_interactive() - - mock_status.assert_called_once_with(sbd.SBDManager.SBD_STATUS_DESCRIPTION) - mock_confirm.assert_called_once_with("Do you wish to use SBD?") - mock_from_config.assert_called_once_with() - mock_prompt.assert_has_calls([ - mock.call('Path to storage device (e.g. /dev/disk/by-id/...), or "none" for diskless sbd, use ";" as separator for multi path', 'none|\\/.*') - ]) - - @mock.patch('crmsh.utils.re_split_string') - @mock.patch('logging.Logger.warning') - @mock.patch('logging.Logger.error') - @mock.patch('crmsh.sbd.SBDManager._verify_sbd_device') - @mock.patch('crmsh.bootstrap.prompt_for_string') - @mock.patch('crmsh.sbd.SBDManager._no_overwrite_check') - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') - @mock.patch('crmsh.bootstrap.confirm') - @mock.patch('logging.Logger.info') - def test_get_sbd_device_interactive(self, mock_status, mock_confirm, mock_from_config, mock_no_overwrite, mock_prompt, mock_verify, mock_error_msg, mock_warn, mock_split): - self.sbd_inst._context = mock.Mock(yes_to_all=False) - mock_confirm.side_effect = [True, False, True] - mock_from_config.return_value = [] - mock_no_overwrite.return_value = False - mock_prompt.side_effect = ["/dev/test1", "/dev/sda1", "/dev/sdb1"] - mock_split.side_effect = [["/dev/test1"], ["/dev/sda1"], ["/dev/sdb1"]] - mock_verify.side_effect = [ValueError("/dev/test1 error"), None, None] - - res = self.sbd_inst._get_sbd_device_interactive() - self.assertEqual(res, ["/dev/sdb1"]) - - mock_status.assert_called_once_with(sbd.SBDManager.SBD_STATUS_DESCRIPTION) - mock_confirm.assert_has_calls([ - mock.call("Do you wish to use SBD?"), - mock.call("Are you sure you wish to use this device?") - ]) - mock_from_config.assert_called_once_with() - mock_error_msg.assert_called_once_with("/dev/test1 error") - mock_warn.assert_has_calls([ - mock.call("All data on /dev/sda1 will be destroyed!"), - mock.call("All data on /dev/sdb1 will be destroyed!") - ]) - mock_prompt.assert_has_calls([ - mock.call('Path to storage device (e.g. /dev/disk/by-id/...), or "none" for diskless sbd, use ";" as separator for multi path', 'none|\\/.*') for x in range(3) - ]) - mock_split.assert_has_calls([ - mock.call(sbd.SBDManager.PARSE_RE, "/dev/test1"), - mock.call(sbd.SBDManager.PARSE_RE, "/dev/sda1"), - mock.call(sbd.SBDManager.PARSE_RE, "/dev/sdb1"), - ]) - - def test_verify_sbd_device_gt_3(self): - assert self.sbd_inst_devices_gt_3.sbd_devices_input == ["/dev/sdb1", "/dev/sdc1", "/dev/sdd1", "/dev/sde1"] - dev_list = self.sbd_inst_devices_gt_3.sbd_devices_input - with self.assertRaises(ValueError) as err: - self.sbd_inst_devices_gt_3._verify_sbd_device(dev_list) - self.assertEqual("Maximum number of SBD device is 3", str(err.exception)) - - @mock.patch('crmsh.sbd.SBDManager._compare_device_uuid') - @mock.patch('crmsh.utils.is_block_device') - def test_verify_sbd_device_not_block(self, mock_block_device, mock_compare): - assert self.sbd_inst.sbd_devices_input == ["/dev/sdb1", "/dev/sdc1"] - dev_list = self.sbd_inst.sbd_devices_input - mock_block_device.side_effect = [True, False] - - with self.assertRaises(ValueError) as err: - self.sbd_inst._verify_sbd_device(dev_list) - self.assertEqual("/dev/sdc1 doesn't look like a block device", str(err.exception)) - - mock_block_device.assert_has_calls([mock.call("/dev/sdb1"), mock.call("/dev/sdc1")]) - mock_compare.assert_called_once_with("/dev/sdb1", []) - - @mock.patch('crmsh.sbd.SBDManager._verify_sbd_device') - def test_get_sbd_device_from_option(self, mock_verify): - self.sbd_inst._get_sbd_device() - mock_verify.assert_called_once_with(['/dev/sdb1', '/dev/sdc1']) - - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_interactive') - def test_get_sbd_device_from_interactive(self, mock_interactive): - mock_interactive.return_value = ["/dev/sdb1", "/dev/sdc1"] - self.sbd_inst_interactive._get_sbd_device() - mock_interactive.assert_called_once_with() - - def test_get_sbd_device_diskless(self): - self.sbd_inst_diskless._get_sbd_device() - - @mock.patch('crmsh.sbd.SBDTimeout') - @mock.patch('logging.Logger.info') - def test_initialize_sbd_return(self, mock_info, mock_sbd_timeout): - mock_inst = mock.Mock() - mock_sbd_timeout.return_value = mock_inst - self.sbd_inst_diskless._context = mock.Mock(profiles_dict={}) - self.sbd_inst_diskless._initialize_sbd() - mock_info.assert_called_once_with("Configuring diskless SBD") - mock_inst.initialize_timeout.assert_called_once_with() - - @mock.patch('crmsh.utils.fatal') - @mock.patch('crmsh.bootstrap.invoke') - @mock.patch('crmsh.sbd.SBDTimeout') - @mock.patch('logging.Logger.info') - def test_initialize_sbd(self, mock_info, mock_sbd_timeout, mock_invoke, mock_error): - mock_inst = mock.Mock(sbd_msgwait=10, sbd_watchdog_timeout=5) - mock_sbd_timeout.return_value = mock_inst - mock_inst.set_sbd_watchdog_timeout = mock.Mock() - mock_inst.set_sbd_msgwait = mock.Mock() - self.sbd_inst._sbd_devices = ["/dev/sdb1", "/dev/sdc1"] - mock_invoke.side_effect = [(True, None, None), (False, None, "error")] - mock_error.side_effect = ValueError - - with self.assertRaises(ValueError): - self.sbd_inst._initialize_sbd() - - mock_invoke.assert_has_calls([ - mock.call("sbd -4 10 -1 5 -d /dev/sdb1 create"), - mock.call("sbd -4 10 -1 5 -d /dev/sdc1 create") - ]) - mock_error.assert_called_once_with("Failed to initialize SBD device /dev/sdc1: error") - - @mock.patch('crmsh.bootstrap.sync_file') - @mock.patch('crmsh.utils.sysconfig_set') - @mock.patch('shutil.copyfile') - def test_update_configuration(self, mock_copy, mock_sysconfig, mock_update): - self.sbd_inst._sbd_devices = ["/dev/sdb1", "/dev/sdc1"] - self.sbd_inst._watchdog_inst = mock.Mock(watchdog_device_name="/dev/watchdog") - self.sbd_inst.timeout_inst = mock.Mock(sbd_watchdog_timeout=15) - - self.sbd_inst._update_sbd_configuration() - - mock_copy.assert_called_once_with("/usr/share/fillup-templates/sysconfig.sbd", "/etc/sysconfig/sbd") - mock_sysconfig.assert_called_once_with("/etc/sysconfig/sbd", SBD_WATCHDOG_DEV='/dev/watchdog', SBD_DEVICE='/dev/sdb1;/dev/sdc1', SBD_WATCHDOG_TIMEOUT="15") - mock_update.assert_called_once_with("/etc/sysconfig/sbd") - - @mock.patch('crmsh.bootstrap.utils.parse_sysconfig') - def test_get_sbd_device_from_config_none(self, mock_parse): - mock_parse_inst = mock.Mock() - mock_parse.return_value = mock_parse_inst - mock_parse_inst.get.return_value = None - - res = self.sbd_inst._get_sbd_device_from_config() - assert res == [] - - mock_parse.assert_called_once_with("/etc/sysconfig/sbd") - mock_parse_inst.get.assert_called_once_with("SBD_DEVICE") - - @mock.patch('crmsh.utils.re_split_string') - @mock.patch('crmsh.bootstrap.utils.parse_sysconfig') - def test_get_sbd_device_from_config(self, mock_parse, mock_split): - mock_parse_inst = mock.Mock() - mock_parse.return_value = mock_parse_inst - mock_parse_inst.get.return_value = "/dev/sdb1;/dev/sdc1" - mock_split.return_value = ["/dev/sdb1", "/dev/sdc1"] - - res = self.sbd_inst._get_sbd_device_from_config() - assert res == ["/dev/sdb1", "/dev/sdc1"] - - mock_parse.assert_called_once_with("/etc/sysconfig/sbd") - mock_parse_inst.get.assert_called_once_with("SBD_DEVICE") - mock_split.assert_called_once_with(sbd.SBDManager.PARSE_RE, "/dev/sdb1;/dev/sdc1") - - @mock.patch('logging.Logger.warning') - @mock.patch('crmsh.utils.get_quorum_votes_dict') - def test_warn_diskless_sbd_diskless(self, mock_vote, mock_warn): - self.sbd_inst_diskless._context = mock.Mock(cluster_is_running=False) - self.sbd_inst_diskless._warn_diskless_sbd() - mock_vote.assert_not_called() - mock_warn.assert_called_once_with(sbd.SBDManager.DISKLESS_SBD_WARNING) - - @mock.patch('logging.Logger.warning') - @mock.patch('crmsh.utils.get_quorum_votes_dict') - def test_warn_diskless_sbd_peer(self, mock_vote, mock_warn): - mock_vote.return_value = {'Expected': '1'} - self.sbd_inst_diskless._warn_diskless_sbd("node2") - mock_vote.assert_called_once_with("node2") - mock_warn.assert_called_once_with(sbd.SBDManager.DISKLESS_SBD_WARNING) - - @mock.patch('crmsh.utils.package_is_installed') - def test_sbd_init_not_installed(self, mock_package): - mock_package.return_value = False - self.sbd_inst.sbd_init() - mock_package.assert_called_once_with("sbd") - - @mock.patch('crmsh.bootstrap.invoke') - @mock.patch('crmsh.sbd.SBDManager._update_sbd_configuration') - @mock.patch('crmsh.sbd.SBDManager._initialize_sbd') - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device') - @mock.patch('crmsh.watchdog.Watchdog') - @mock.patch('crmsh.utils.package_is_installed') - def test_sbd_init_return(self, mock_package, mock_watchdog, mock_get_device, mock_initialize, mock_update, mock_invoke): - mock_package.return_value = True - self.sbd_inst._sbd_devices = None - self.sbd_inst.diskless_sbd = False - self.sbd_inst._context = mock.Mock(watchdog=None) - mock_watchdog_inst = mock.Mock() - mock_watchdog.return_value = mock_watchdog_inst - mock_watchdog_inst.init_watchdog = mock.Mock() - - self.sbd_inst.sbd_init() - - mock_package.assert_called_once_with("sbd") - mock_get_device.assert_called_once_with() - mock_initialize.assert_not_called() - mock_update.assert_not_called() - mock_watchdog.assert_called_once_with(_input=None) - mock_watchdog_inst.init_watchdog.assert_called_once_with() - mock_invoke.assert_called_once_with("systemctl disable sbd.service") - - @mock.patch('crmsh.sbd.SBDManager._enable_sbd_service') - @mock.patch('crmsh.sbd.SBDManager._warn_diskless_sbd') - @mock.patch('crmsh.sbd.SBDManager._update_sbd_configuration') - @mock.patch('crmsh.sbd.SBDManager._initialize_sbd') - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device') - @mock.patch('crmsh.watchdog.Watchdog') - @mock.patch('crmsh.utils.package_is_installed') - def test_sbd_init(self, mock_package, mock_watchdog, mock_get_device, mock_initialize, mock_update, mock_warn, mock_enable_sbd): - mock_package.return_value = True - self.sbd_inst_diskless._context = mock.Mock(watchdog=None) - mock_watchdog_inst = mock.Mock() - mock_watchdog.return_value = mock_watchdog_inst - mock_watchdog_inst.init_watchdog = mock.Mock() - self.sbd_inst_diskless.sbd_init() - - mock_package.assert_called_once_with("sbd") - mock_get_device.assert_called_once_with() - mock_initialize.assert_called_once_with() - mock_update.assert_called_once_with() - mock_watchdog.assert_called_once_with(_input=None) - mock_watchdog_inst.init_watchdog.assert_called_once_with() - mock_warn.assert_called_once_with() - mock_enable_sbd.assert_called_once_with() - - @mock.patch('crmsh.sbd.SBDManager.configure_sbd_resource_and_properties') - @mock.patch('crmsh.bootstrap.wait_for_cluster') - @mock.patch('crmsh.utils.cluster_run_cmd') - @mock.patch('logging.Logger.info') - @mock.patch('crmsh.xmlutil.CrmMonXmlParser') - def test_restart_cluster_on_needed_no_ra_running(self, mock_parser, mock_status, mock_cluster_run, mock_wait, mock_config_sbd_ra): - mock_parser().is_any_resource_running.return_value = False - self.sbd_inst._restart_cluster_and_configure_sbd_ra() - mock_status.assert_called_once_with("Restarting cluster service") - mock_cluster_run.assert_called_once_with("crm cluster restart") - mock_wait.assert_called_once_with() - mock_config_sbd_ra.assert_called_once_with() - - @mock.patch('crmsh.sbd.SBDTimeout.get_stonith_timeout') - @mock.patch('logging.Logger.warning') - @mock.patch('crmsh.xmlutil.CrmMonXmlParser') - def test_restart_cluster_on_needed_diskless(self, mock_parser, mock_warn, mock_get_timeout): - mock_parser().is_any_resource_running.return_value = True - mock_get_timeout.return_value = 60 - self.sbd_inst_diskless.timeout_inst = mock.Mock(stonith_watchdog_timeout=-1) - self.sbd_inst_diskless._restart_cluster_and_configure_sbd_ra() - mock_warn.assert_has_calls([ - mock.call("To start sbd.service, need to restart cluster service manually on each node"), - mock.call("Then run \"crm configure property stonith-enabled=true stonith-watchdog-timeout=-1 stonith-timeout=60\" on any node") - ]) - - @mock.patch('crmsh.sbd.SBDManager.configure_sbd_resource_and_properties') - @mock.patch('logging.Logger.warning') - @mock.patch('crmsh.xmlutil.CrmMonXmlParser') - def test_restart_cluster_on_needed(self, mock_parser, mock_warn, mock_config_sbd_ra): - mock_parser().is_any_resource_running.return_value = True - self.sbd_inst._restart_cluster_and_configure_sbd_ra() - mock_warn.assert_has_calls([ - mock.call("To start sbd.service, need to restart cluster service manually on each node"), - ]) - - @mock.patch('crmsh.bootstrap.invoke') - def test_enable_sbd_service_init(self, mock_invoke): - self.sbd_inst._context = mock.Mock(cluster_is_running=False) - self.sbd_inst._enable_sbd_service() - mock_invoke.assert_called_once_with("systemctl enable sbd.service") - - @mock.patch('crmsh.sbd.SBDManager._restart_cluster_and_configure_sbd_ra') - @mock.patch('crmsh.utils.cluster_run_cmd') - def test_enable_sbd_service_restart(self, mock_cluster_run, mock_restart): - self.sbd_inst._context = mock.Mock(cluster_is_running=True) - self.sbd_inst._enable_sbd_service() - mock_cluster_run.assert_has_calls([ - mock.call("systemctl enable sbd.service"), - ]) - mock_restart.assert_called_once_with() - - @mock.patch('crmsh.utils.package_is_installed') - def test_configure_sbd_resource_and_properties_not_installed(self, mock_package): - mock_package.return_value = False - self.sbd_inst.configure_sbd_resource_and_properties() - mock_package.assert_called_once_with("sbd") - - @mock.patch('crmsh.service_manager.ServiceManager.service_is_active') - @mock.patch('crmsh.sbd.SBDTimeout.adjust_sbd_timeout_related_cluster_configuration') - @mock.patch('crmsh.utils.set_property') - @mock.patch('crmsh.sh.ClusterShell.get_stdout_or_raise_error') - @mock.patch('crmsh.xmlutil.CrmMonXmlParser') - @mock.patch('crmsh.service_manager.ServiceManager.service_is_enabled') - @mock.patch('crmsh.utils.package_is_installed') - def test_configure_sbd_resource_and_properties( - self, - mock_package, mock_enabled, mock_parser, mock_run, mock_set_property, sbd_adjust, mock_is_active, - ): - mock_package.return_value = True - mock_enabled.return_value = True - mock_parser().is_resource_configured.return_value = False - mock_is_active.return_value = False - self.sbd_inst._context = mock.Mock(cluster_is_running=True) - self.sbd_inst._get_sbd_device_from_config = mock.Mock() - self.sbd_inst._get_sbd_device_from_config.return_value = ["/dev/sda1"] - self.sbd_inst._sbd_devices = ["/dev/sda1"] - - self.sbd_inst.configure_sbd_resource_and_properties() - - mock_package.assert_called_once_with("sbd") - mock_enabled.assert_called_once_with("sbd.service") - mock_run.assert_called_once_with("crm configure primitive {} {} params devices=\"/dev/sda1\"".format(sbd.SBDManager.SBD_RA_ID, sbd.SBDManager.SBD_RA)) - mock_set_property.assert_called_once_with("stonith-enabled", "true") - - @mock.patch('crmsh.utils.package_is_installed') - def test_join_sbd_config_not_installed(self, mock_package): - mock_package.return_value = False - self.sbd_inst.join_sbd("alice", "node1") - mock_package.assert_called_once_with("sbd") - - @mock.patch('crmsh.bootstrap.invoke') - @mock.patch('os.path.exists') - @mock.patch('crmsh.utils.package_is_installed') - def test_join_sbd_config_not_exist(self, mock_package, mock_exists, mock_invoke): - mock_package.return_value = True + mock_package_is_installed.return_value = True + mock_get_sbd_device_from_config.return_value = [] + sbdmanager_instance = SBDManager(bootstrap_context=mock_bootstrap_ctx) + sbdmanager_instance._wants_to_overwrite = Mock() + sbdmanager_instance._prompt_for_sbd_device = Mock() + sbdmanager_instance._prompt_for_sbd_device.return_value = ['/dev/sbd_device'] + self.assertEqual(sbdmanager_instance.get_sbd_device_interactive(), ['/dev/sbd_device']) + mock_logger_info.assert_called_once_with(SBDManager.SBD_STATUS_DESCRIPTION) + sbdmanager_instance._wants_to_overwrite.assert_not_called() + sbdmanager_instance._prompt_for_sbd_device.assert_called_once() + + @patch('crmsh.sbd.SBDUtils.check_devices_metadata_consistent') + @patch('crmsh.bootstrap.confirm') + @patch('crmsh.sbd.ServiceManager') + def test_wants_to_overwrite_exception(self, mock_ServiceManager, mock_confirm, mock_check_devices_metadata_consistent): + sbdmanager_instance = SBDManager() + mock_confirm.return_value = False + mock_check_devices_metadata_consistent.return_value = False + with self.assertRaises(utils.TerminateSubCommand): + sbdmanager_instance._wants_to_overwrite(['/dev/sbd_device']) + + @patch('logging.Logger.warning') + def test_warn_and_raise_no_sbd(self, mock_logger_warning): + sbdmanager_instance = SBDManager() + with self.assertRaises(SBDManager.NotConfigSBD): + sbdmanager_instance._warn_and_raise_no_sbd() + mock_logger_warning.assert_called_once_with('%s', SBDManager.NO_SBD_WARNING) + + @patch('crmsh.sbd.ServiceManager') + @patch('crmsh.utils.get_quorum_votes_dict') + @patch('logging.Logger.warning') + def test_warn_diskless_sbd(self, mock_logger_warning, mock_get_quorum_votes_dict, mock_ServiceManager): + mock_ServiceManager.return_value.service_is_active = MagicMock(return_value=True) + mock_get_quorum_votes_dict.return_value = {'Expected': '2', 'Total': '2'} + sbdmanager_instance = SBDManager() + sbdmanager_instance._warn_diskless_sbd(peer="node1") + mock_logger_warning.assert_called_once_with('%s', SBDManager.DISKLESS_SBD_WARNING) + + @patch('crmsh.sbd.ServiceManager') + @patch('logging.Logger.warning') + def test_warn_diskless_sbd_init(self, mock_logger_warning, mock_ServiceManager): + mock_ServiceManager.return_value.service_is_active = MagicMock(return_value=False) + sbdmanager_instance = SBDManager(diskless_sbd=True) + sbdmanager_instance._warn_diskless_sbd() + mock_logger_warning.assert_called_once_with('%s', SBDManager.DISKLESS_SBD_WARNING) + + @patch('crmsh.sbd.SBDUtils.check_devices_metadata_consistent') + @patch('crmsh.bootstrap.confirm') + @patch('crmsh.sbd.ServiceManager') + def test_wants_to_overwrite_return_false(self, mock_ServiceManager, mock_confirm, mock_check_devices_metadata_consistent): + sbdmanager_instance = SBDManager() + mock_confirm.return_value = False + mock_check_devices_metadata_consistent.return_value = True + result = sbdmanager_instance._wants_to_overwrite(['/dev/sbd_device']) + self.assertFalse(result) + + @patch('crmsh.sbd.SBDUtils.check_devices_metadata_consistent') + @patch('crmsh.bootstrap.confirm') + @patch('crmsh.sbd.ServiceManager') + def test_wants_to_overwrite_return_true(self, mock_ServiceManager, mock_confirm, mock_check_devices_metadata_consistent): + mock_confirm.return_value = True + sbdmanager_instance = SBDManager() + result = sbdmanager_instance._wants_to_overwrite(['/dev/sbd_device']) + self.assertTrue(result) + + @patch('crmsh.sbd.SBDUtils.handle_input_sbd_devices') + @patch('crmsh.sbd.ServiceManager') + def test_get_sbd_device_from_bootstrap_return(self, mock_ServiceManager, mock_handle_input_sbd_devices): + mock_bootstrap_ctx = Mock(sbd_devices=[], diskless_sbd=False) + sbdmanager_instance = SBDManager(bootstrap_context=mock_bootstrap_ctx) + sbdmanager_instance.get_sbd_device_interactive = Mock() + sbdmanager_instance.get_sbd_device_interactive.return_value = [] + sbdmanager_instance.get_sbd_device_from_bootstrap() + mock_handle_input_sbd_devices.assert_not_called() + + @patch('crmsh.sbd.SBDUtils.get_sbd_device_metadata') + @patch('crmsh.sbd.SBDUtils.handle_input_sbd_devices') + @patch('crmsh.sbd.ServiceManager') + def test_get_sbd_device_from_bootstrap(self, mock_ServiceManager, mock_handle_input_sbd_devices, mock_get_sbd_device_metadata): + mock_bootstrap_ctx = Mock(sbd_devices=['/dev/sda1', '/dev/sda2'], diskless_sbd=False) + mock_handle_input_sbd_devices.return_value = (['/dev/sda1'], ['/dev/sda2']) + mock_get_sbd_device_metadata.return_value = {'uuid': '1234-5678'} + sbdmanager_instance = SBDManager(bootstrap_context=mock_bootstrap_ctx) + sbdmanager_instance.get_sbd_device_from_bootstrap() + mock_handle_input_sbd_devices.assert_called_once_with(['/dev/sda1', '/dev/sda2']) + mock_get_sbd_device_metadata.assert_called_once_with('/dev/sda2', timeout_only=True) + + @patch('crmsh.sbd.ServiceManager') + def test_init_and_deploy_sbd_not_config_sbd(self, mock_ServiceManager): + mock_bootstrap_ctx = Mock() + sbdmanager_instance = SBDManager(bootstrap_context=mock_bootstrap_ctx) + sbdmanager_instance.get_sbd_device_from_bootstrap = Mock(side_effect=SBDManager.NotConfigSBD) + sbdmanager_instance._load_attributes_from_bootstrap = Mock() + sbdmanager_instance.init_and_deploy_sbd() + mock_ServiceManager.return_value.disable_service.assert_called_once_with(constants.SBD_SERVICE) + sbdmanager_instance._load_attributes_from_bootstrap.assert_not_called() + + @patch('crmsh.bootstrap.adjust_properties') + @patch('crmsh.sbd.SBDManager.restart_cluster_if_possible') + @patch('crmsh.sbd.SBDManager.enable_sbd_service') + @patch('crmsh.sbd.ServiceManager') + def test_init_and_deploy_sbd(self, mock_ServiceManager, mock_enable_sbd_service, mock_restart_cluster_if_possible, mock_adjust_properties): + mock_bootstrap_ctx = Mock(cluster_is_running=True) + sbdmanager_instance = SBDManager(bootstrap_context=mock_bootstrap_ctx) + sbdmanager_instance.get_sbd_device_from_bootstrap = Mock() + sbdmanager_instance._load_attributes_from_bootstrap = Mock() + sbdmanager_instance.initialize_sbd = Mock() + sbdmanager_instance.update_configuration = Mock() + sbdmanager_instance.configure_sbd = Mock() + sbdmanager_instance.init_and_deploy_sbd() + mock_ServiceManager.return_value.disable_service.assert_not_called() + mock_adjust_properties.assert_called_once() + + @patch('os.path.exists') + @patch('crmsh.sbd.ServiceManager') + def test_join_sbd_return(self, mock_ServiceManager, mock_exists): mock_exists.return_value = False - self.sbd_inst.join_sbd("alice", "node1") - mock_package.assert_called_once_with("sbd") - mock_exists.assert_called_once_with("/etc/sysconfig/sbd") - mock_invoke.assert_called_once_with("systemctl disable sbd.service") - - @mock.patch('crmsh.bootstrap.invoke') - @mock.patch('crmsh.service_manager.ServiceManager.service_is_enabled') - @mock.patch('os.path.exists') - @mock.patch('crmsh.utils.package_is_installed') - def test_join_sbd_config_disabled(self, mock_package, mock_exists, mock_enabled, mock_invoke): - mock_package.return_value = True + mock_ServiceManager.return_value.disable_service = Mock() + sbdmanager_instance = SBDManager() + sbdmanager_instance.join_sbd("remote_user", "peer_host") + mock_exists.assert_called_once_with(sbd.SBDManager.SYSCONFIG_SBD) + mock_ServiceManager.return_value.disable_service.assert_called_once_with(constants.SBD_SERVICE) + + @patch('logging.Logger.info') + @patch('crmsh.sbd.SBDUtils.verify_sbd_device') + @patch('crmsh.sbd.SBDUtils.get_sbd_device_from_config') + @patch('crmsh.watchdog.Watchdog') + @patch('os.path.exists') + @patch('crmsh.sbd.ServiceManager') + def test_join_sbd_diskbased(self, mock_ServiceManager, mock_exists, mock_Watchdog, mock_get_sbd_device_from_config, mock_verify_sbd_device, mock_logger_info): mock_exists.return_value = True - mock_enabled.return_value = False - - self.sbd_inst.join_sbd("alice", "node1") - - mock_package.assert_called_once_with("sbd") - mock_exists.assert_called_once_with("/etc/sysconfig/sbd") - mock_invoke.assert_called_once_with("systemctl disable sbd.service") - mock_enabled.assert_called_once_with("sbd.service", "node1") - - @mock.patch('logging.Logger.info') - @mock.patch('crmsh.sbd.SBDManager._verify_sbd_device') - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') - @mock.patch('crmsh.watchdog.Watchdog') - @mock.patch('crmsh.bootstrap.invoke') - @mock.patch('crmsh.service_manager.ServiceManager.service_is_enabled') - @mock.patch('os.path.exists') - @mock.patch('crmsh.utils.package_is_installed') - def test_join_sbd(self, mock_package, mock_exists, mock_enabled, mock_invoke, mock_watchdog, mock_get_device, mock_verify, mock_status): - mock_package.return_value = True - mock_exists.return_value = True - mock_enabled.return_value = True - mock_get_device.return_value = ["/dev/sdb1"] - mock_watchdog_inst = mock.Mock() - mock_watchdog.return_value = mock_watchdog_inst - mock_watchdog_inst.join_watchdog = mock.Mock() - - self.sbd_inst.join_sbd("alice", "node1") - - mock_package.assert_called_once_with("sbd") - mock_exists.assert_called_once_with("/etc/sysconfig/sbd") - mock_invoke.assert_called_once_with("systemctl enable sbd.service") - mock_get_device.assert_called_once_with() - mock_verify.assert_called_once_with(["/dev/sdb1"], ["node1"]) - mock_enabled.assert_called_once_with("sbd.service", "node1") - mock_status.assert_called_once_with("Got SBD configuration") - mock_watchdog.assert_called_once_with(remote_user="alice", peer_host="node1") - mock_watchdog_inst.join_watchdog.assert_called_once_with() - - @mock.patch('crmsh.utils.sysconfig_set') - @mock.patch('logging.Logger.info') - @mock.patch('crmsh.sbd.SBDManager._warn_diskless_sbd') - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') - @mock.patch('crmsh.watchdog.Watchdog') - @mock.patch('crmsh.bootstrap.invoke') - @mock.patch('crmsh.service_manager.ServiceManager.service_is_enabled') - @mock.patch('os.path.exists') - @mock.patch('crmsh.utils.package_is_installed') - def test_join_sbd_diskless(self, mock_package, mock_exists, mock_enabled, mock_invoke, mock_watchdog, mock_get_device, mock_warn, mock_status, mock_set): - mock_package.return_value = True + mock_ServiceManager.return_value.service_is_enabled.return_value = True + mock_Watchdog.return_value.join_watchdog = Mock() + mock_get_sbd_device_from_config.return_value = ['/dev/sbd_device'] + + sbdmanager_instance = SBDManager() + sbdmanager_instance.join_sbd("remote_user", "peer_host") + + mock_logger_info.assert_called_once_with("Got SBD configuration") + + @patch('logging.Logger.info') + @patch('crmsh.sbd.SBDUtils.get_sbd_device_from_config') + @patch('crmsh.watchdog.Watchdog') + @patch('os.path.exists') + @patch('crmsh.sbd.ServiceManager') + def test_join_sbd_diskless(self, mock_ServiceManager, mock_exists, mock_Watchdog, mock_get_sbd_device_from_config, mock_logger_info): mock_exists.return_value = True - mock_enabled.return_value = True - mock_get_device.return_value = [] - mock_watchdog_inst = mock.Mock() - mock_watchdog.return_value = mock_watchdog_inst - mock_watchdog_inst.join_watchdog = mock.Mock() - - self.sbd_inst.join_sbd("alice", "node1") - - mock_package.assert_called_once_with("sbd") - mock_exists.assert_called_once_with("/etc/sysconfig/sbd") - mock_invoke.assert_called_once_with("systemctl enable sbd.service") - mock_get_device.assert_called_once_with() - mock_warn.assert_called_once_with("node1") - mock_enabled.assert_called_once_with("sbd.service", "node1") - mock_status.assert_called_once_with("Got diskless SBD configuration") - mock_watchdog.assert_called_once_with(remote_user="alice", peer_host="node1") - mock_watchdog_inst.join_watchdog.assert_called_once_with() - - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') - def test_verify_sbd_device_classmethod_exception(self, mock_get_config): - mock_get_config.return_value = [] - with self.assertRaises(ValueError) as err: - sbd.SBDManager.verify_sbd_device() - self.assertEqual("No sbd device configured", str(err.exception)) - mock_get_config.assert_called_once_with() - - @mock.patch('crmsh.sbd.SBDManager._verify_sbd_device') - @mock.patch('crmsh.utils.list_cluster_nodes_except_me') - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') - def test_verify_sbd_device_classmethod(self, mock_get_config, mock_list_nodes, mock_verify): - mock_get_config.return_value = ["/dev/sda1"] - mock_list_nodes.return_value = ["node1"] - sbd.SBDManager.verify_sbd_device() - mock_get_config.assert_called_once_with() - mock_verify.assert_called_once_with(["/dev/sda1"], ["node1"]) - - @mock.patch('crmsh.sbd.SBDManager._get_device_uuid') - def test_compare_device_uuid_return(self, mock_get_uuid): - self.sbd_inst._compare_device_uuid("/dev/sdb1", None) - mock_get_uuid.assert_not_called() - - @mock.patch('crmsh.sbd.SBDManager._get_device_uuid') - def test_compare_device_uuid(self, mock_get_uuid): - mock_get_uuid.side_effect = ["1234", "5678"] - with self.assertRaises(ValueError) as err: - self.sbd_inst._compare_device_uuid("/dev/sdb1", ["node1"]) - self.assertEqual("Device /dev/sdb1 doesn't have the same UUID with node1", str(err.exception)) - mock_get_uuid.assert_has_calls([mock.call("/dev/sdb1"), mock.call("/dev/sdb1", "node1")]) - - @mock.patch('crmsh.sh.ClusterShell.get_stdout_or_raise_error') - def test_get_device_uuid_not_match(self, mock_run): - mock_run.return_value = "data" - with self.assertRaises(ValueError) as err: - self.sbd_inst._get_device_uuid("/dev/sdb1") - self.assertEqual("Cannot find sbd device UUID for /dev/sdb1", str(err.exception)) - mock_run.assert_called_once_with("sbd -d /dev/sdb1 dump", None) - - @mock.patch('crmsh.sh.ClusterShell.get_stdout_or_raise_error') - def test_get_device_uuid(self, mock_run): - output = """ - ==Dumping header on disk /dev/sda1 - Header version : 2.1 - UUID : a2e9a92c-cc72-4ef9-ac55-ccc342f3546b - Number of slots : 255 - Sector size : 512 - Timeout (watchdog) : 5 - Timeout (allocate) : 2 - Timeout (loop) : 1 - Timeout (msgwait) : 10 - ==Header on disk /dev/sda1 is dumped + mock_ServiceManager.return_value.service_is_enabled.return_value = True + mock_Watchdog.return_value.join_watchdog = Mock() + mock_get_sbd_device_from_config.return_value = [] + + sbdmanager_instance = SBDManager() + sbdmanager_instance._warn_diskless_sbd = Mock() + sbdmanager_instance.join_sbd("remote_user", "peer_host") + + mock_logger_info.assert_called_once_with("Got diskless SBD configuration") + + @patch('crmsh.sbd.SBDManager.update_configuration') + def test_update_sbd_configuration(self, mock_update_configuration): + SBDManager.update_sbd_configuration({'key': 'value'}) + mock_update_configuration.assert_called_once() + + @patch('crmsh.utils.sysconfig_set') + @patch('crmsh.sbd.ServiceManager') + def test_update_configuration_return(self, mock_ServiceManager, mock_sysconfig_set): + sbdmanager_instance = SBDManager() + sbdmanager_instance.update_configuration() + mock_sysconfig_set.assert_not_called() + + @patch('crmsh.bootstrap.sync_file') + @patch('crmsh.utils.sysconfig_set') + @patch('logging.Logger.info') + @patch('crmsh.utils.copy_local_file') + @patch('crmsh.sbd.ServiceManager') + def test_update_configuration(self, mock_ServiceManager, mock_copy_local_file, mock_logger_info, mock_sysconfig_set, mock_sync_file): + sbdmanager_instance = SBDManager(update_dict={'key': 'value'}) + sbdmanager_instance.overwrite_sysconfig = True + sbdmanager_instance.update_configuration() + mock_logger_info.assert_has_calls([ + call("Update %s in %s: %s", 'key', sbd.SBDManager.SYSCONFIG_SBD, 'value'), + call('Already synced %s to all nodes', sbd.SBDManager.SYSCONFIG_SBD) + ]) + + @patch('logging.Logger.info') + def test_initialize_sbd_diskless(self, mock_logger_info): + sbdmanager_instance = SBDManager(diskless_sbd=True) + sbdmanager_instance._warn_diskless_sbd = Mock() + sbdmanager_instance.initialize_sbd() + mock_logger_info.assert_called_once_with("Configuring diskless SBD") + + @patch('crmsh.sbd.ServiceManager') + @patch('logging.Logger.debug') + @patch('crmsh.sbd.sh.cluster_shell') + @patch('crmsh.sbd.SBDManager.convert_timeout_dict_to_opt_str') + @patch('logging.Logger.info') + def test_initialize_sbd_diskbased(self, mock_logger_info, mock_convert_timeout_dict_to_opt_str, mock_cluster_shell, mock_logger_debug, mock_ServiceManager): + sbdmanager_instance = SBDManager(device_list_to_init=['/dev/sbd_device'], timeout_dict={'watchdog': 5, 'msgwait': 10}) + sbdmanager_instance.initialize_sbd() + mock_logger_info.assert_has_calls([ + call("Configuring disk-based SBD"), + call("Initializing SBD device %s", '/dev/sbd_device') + ]) + + @patch('crmsh.sbd.SBDManager.convert_timeout_dict_to_opt_str') + def test_initialize_sbd_return(self, mock_convert_timeout_dict_to_opt_str): + sbdmanager_instance = SBDManager() + sbdmanager_instance.initialize_sbd() + mock_convert_timeout_dict_to_opt_str.assert_not_called() + + @patch('crmsh.utils.set_property') + @patch('crmsh.sbd.ServiceManager') + def test_configure_sbd_diskless(self, mock_ServiceManager, mock_set_property): + sbdmanager_instance = SBDManager(diskless_sbd=True) + sbdmanager_instance.configure_sbd() + mock_set_property.assert_has_calls([ + call("stonith-watchdog-timeout", sbd.SBDTimeout.STONITH_WATCHDOG_TIMEOUT_DEFAULT), + call("stonith-enabled", "true") + ]) + + @patch('crmsh.utils.delete_property') + @patch('crmsh.utils.get_property') + @patch('crmsh.sbd.sh.cluster_shell') + @patch('crmsh.sbd.SBDUtils.get_sbd_device_from_config') + @patch('crmsh.sbd.xmlutil.CrmMonXmlParser') + @patch('crmsh.utils.set_property') + @patch('crmsh.sbd.ServiceManager') + def test_configure_sbd(self, mock_ServiceManager, mock_set_property, mock_CrmMonXmlParser, mock_get_sbd_device_from_config, mock_cluster_shell, mock_get_property, mock_delete_property): + mock_get_property.return_value = -1 + mock_CrmMonXmlParser.return_value.is_resource_configured.return_value = False + mock_get_sbd_device_from_config.return_value = ['/dev/sbd_device'] + mock_cluster_shell.return_value.get_stdout_or_raise_error.return_value = "data" + sbdmanager_instance = SBDManager() + sbdmanager_instance.configure_sbd() + mock_cluster_shell.return_value.get_stdout_or_raise_error.assert_called_once_with("crm configure primitive stonith-sbd stonith:fence_sbd params devices=\"/dev/sbd_device\"") + + +class TestOutterFunctions(unittest.TestCase): + """ + Unitary tests for crmsh.sbd outter functions + """ + @patch('crmsh.utils.ext_cmd') + @patch('logging.Logger.info') + @patch('crmsh.sbd.xmlutil.CrmMonXmlParser') + def test_cleanup_existing_sbd_resource(self, mock_CrmMonXmlParser, mock_logger_info, mock_ext_cmd): + mock_CrmMonXmlParser.return_value.is_resource_configured.return_value = True + mock_CrmMonXmlParser.return_value.get_resource_id_list_via_type.return_value = ['sbd_resource'] + mock_CrmMonXmlParser.return_value.is_resource_started.return_value = True + sbd.cleanup_existing_sbd_resource() + mock_logger_info.assert_has_calls([ + call("Stop sbd resource '%s'(%s)", 'sbd_resource', sbd.SBDManager.SBD_RA), + call("Remove sbd resource '%s'", 'sbd_resource') + ]) + + @patch('crmsh.parallax.parallax_call') + @patch('crmsh.utils.cleanup_stonith_related_properties') + @patch('crmsh.sbd.sh.cluster_shell') + @patch('crmsh.utils.cluster_run_cmd') + @patch('logging.Logger.info') + @patch('crmsh.sbd.ServiceManager') + @patch('crmsh.utils.list_cluster_nodes') + @patch('crmsh.sbd.cleanup_existing_sbd_resource') + def test_purge_sbd_from_cluster(self, mock_cleanup_existing_sbd_resource, mock_list_cluster_nodes, mock_ServiceManager, mock_logger_info, mock_cluster_run_cmd, mock_cluster_shell, mock_cleanup_stonith_related_properties, mock_parallax_call): + mock_list_cluster_nodes.return_value = ['node1', 'node2'] + mock_ServiceManager.return_value.service_is_enabled.side_effect = [True, True] + stonith_data = """stonith-sbd +1 fence device found """ - mock_run.return_value = output - res = self.sbd_inst._get_device_uuid("/dev/sda1", node="node1") - self.assertEqual(res, "a2e9a92c-cc72-4ef9-ac55-ccc342f3546b") - mock_run.assert_called_once_with("sbd -d /dev/sda1 dump", "node1") - - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') - @mock.patch('crmsh.service_manager.ServiceManager.service_is_active') - @mock.patch('crmsh.bootstrap.Context') - def test_is_using_diskless_sbd_true(self, mock_context, mock_is_active, mock_get_sbd): - context_inst = mock.Mock() - mock_context.return_value = context_inst - mock_get_sbd.return_value = [] - mock_is_active.return_value = True - assert sbd.SBDManager.is_using_diskless_sbd() is True - mock_context.assert_called_once_with() - mock_get_sbd.assert_called_once_with() - mock_is_active.assert_called_once_with("sbd.service") - - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') - @mock.patch('crmsh.bootstrap.Context') - def test_is_using_diskless_sbd_false(self, mock_context, mock_get_sbd): - context_inst = mock.Mock() - mock_context.return_value = context_inst - mock_get_sbd.return_value = ["/dev/sda1"] - assert sbd.SBDManager.is_using_diskless_sbd() is False - mock_context.assert_called_once_with() - mock_get_sbd.assert_called_once_with() - - @mock.patch('crmsh.sbd.SBDManager._get_sbd_device_from_config') - @mock.patch('crmsh.bootstrap.Context') - def test_get_sbd_device_from_config_classmethod(self, mock_context, mock_get_sbd): - context_inst = mock.Mock() - mock_context.return_value = context_inst - mock_get_sbd.return_value = ["/dev/sda1"] - assert sbd.SBDManager.get_sbd_device_from_config() == ["/dev/sda1"] - mock_context.assert_called_once_with() - mock_get_sbd.assert_called_once_with() - - @mock.patch('crmsh.bootstrap.sync_file') - @mock.patch('crmsh.utils.sysconfig_set') - def test_update_configuration_static(self, mock_config_set, mock_csync2): - sbd_config_dict = { - "SBD_PACEMAKER": "yes", - "SBD_STARTMODE": "always", - "SBD_DELAY_START": "no", - } - self.sbd_inst.update_configuration(sbd_config_dict) - mock_config_set.assert_called_once_with(bootstrap.SYSCONFIG_SBD, **sbd_config_dict) - mock_csync2.assert_called_once_with(bootstrap.SYSCONFIG_SBD) + mock_cluster_shell.return_value.get_stdout_or_raise_error.return_value = stonith_data + sbd.purge_sbd_from_cluster() + mock_logger_info.assert_has_calls([ + call("Disable %s on node %s", constants.SBD_SERVICE, 'node1'), + call("Disable %s on node %s", constants.SBD_SERVICE, 'node2'), + call("Move %s to %s on all nodes", sbd.SBDManager.SYSCONFIG_SBD, sbd.SBDManager.SYSCONFIG_SBD+'.bak') + ]) + mock_cleanup_stonith_related_properties.assert_called_once() diff --git a/test/unittests/test_ui_sbd.py b/test/unittests/test_ui_sbd.py new file mode 100644 index 0000000000..196f6ed14f --- /dev/null +++ b/test/unittests/test_ui_sbd.py @@ -0,0 +1,580 @@ +import io +import unittest +from unittest import mock + +from crmsh import ui_sbd +from crmsh import constants +from crmsh import sbd + + +class TestOutterFunctions(unittest.TestCase): + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + def test_sbd_device_completer_return_no_diskbased_sbd(self, mock_is_using_disk_based_sbd): + mock_is_using_disk_based_sbd.return_value = False + self.assertEqual(ui_sbd.sbd_device_completer([]), []) + mock_is_using_disk_based_sbd.assert_called_once() + + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + def test_sbd_device_completer_return_options(self, mock_is_using_disk_based_sbd): + mock_is_using_disk_based_sbd.return_value = True + self.assertEqual(ui_sbd.sbd_device_completer(["device", ""]), ["add", "remove"]) + mock_is_using_disk_based_sbd.assert_called_once() + + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + def test_sbd_device_completer_return_no_options(self, mock_is_using_disk_based_sbd): + mock_is_using_disk_based_sbd.return_value = True + self.assertEqual(ui_sbd.sbd_device_completer(["device", "add", "/dev/sda1"]), []) + mock_is_using_disk_based_sbd.assert_called_once() + + @mock.patch('crmsh.sbd.SBDUtils.get_sbd_device_from_config') + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + def test_sbd_device_completer_return_no_last_dev(self, mock_is_using_disk_based_sbd, mock_get_sbd_device_from_config): + mock_is_using_disk_based_sbd.return_value = True + mock_get_sbd_device_from_config.return_value = ["/dev/sda1", "/dev/sda2"] + self.assertEqual(ui_sbd.sbd_device_completer(["device", "remove", "/dev/sda1"]), []) + mock_is_using_disk_based_sbd.assert_called_once() + mock_get_sbd_device_from_config.assert_called_once() + + @mock.patch('crmsh.sbd.SBDUtils.get_sbd_device_from_config') + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + def test_sbd_device_completer(self, mock_is_using_disk_based_sbd, mock_get_sbd_device_from_config): + mock_is_using_disk_based_sbd.return_value = True + mock_get_sbd_device_from_config.return_value = ["/dev/sda1", "/dev/sda2"] + self.assertEqual(ui_sbd.sbd_device_completer(["device", "remove", "/dev"]), mock_get_sbd_device_from_config.return_value) + mock_is_using_disk_based_sbd.assert_called_once() + mock_get_sbd_device_from_config.assert_called_once() + + @mock.patch('crmsh.ui_sbd.ServiceManager') + def test_sbd_configure_completer_return(self, mock_ServiceManager): + mock_ServiceManager.return_value.service_is_active.side_effect = [True, False] + self.assertEqual(ui_sbd.sbd_configure_completer([]), []) + mock_ServiceManager.return_value.service_is_active.assert_has_calls([ + mock.call(constants.PCMK_SERVICE), + mock.call(constants.SBD_SERVICE) + ]) + + @mock.patch('crmsh.sbd.SBDUtils.is_using_diskless_sbd') + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + @mock.patch('crmsh.ui_sbd.ServiceManager') + def test_sbd_configure_completer_show_return(self, mock_ServiceManager, mock_is_using_disk_based_sbd, mock_is_using_diskless_sbd): + mock_ServiceManager.return_value.service_is_active.side_effect = [True, True] + mock_is_using_disk_based_sbd.return_value = True + mock_is_using_diskless_sbd.return_value = False + self.assertEqual(ui_sbd.sbd_configure_completer(["configure", "show", ""]), list(ui_sbd.SBD.SHOW_TYPES)) + mock_ServiceManager.return_value.service_is_active.assert_has_calls([ + mock.call(constants.PCMK_SERVICE), + mock.call(constants.SBD_SERVICE) + ]) + mock_is_using_disk_based_sbd.assert_called_once() + mock_is_using_diskless_sbd.assert_called_once() + + @mock.patch('crmsh.sbd.SBDUtils.is_using_diskless_sbd') + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + @mock.patch('crmsh.ui_sbd.ServiceManager') + def test_sbd_configure_completer_show_return_empty(self, mock_ServiceManager, mock_is_using_disk_based_sbd, mock_is_using_diskless_sbd): + mock_ServiceManager.return_value.service_is_active.side_effect = [True, True] + mock_is_using_disk_based_sbd.return_value = True + mock_is_using_diskless_sbd.return_value = False + self.assertEqual(ui_sbd.sbd_configure_completer(["configure", "show", "xx", ""]), []) + mock_ServiceManager.return_value.service_is_active.assert_has_calls([ + mock.call(constants.PCMK_SERVICE), + mock.call(constants.SBD_SERVICE) + ]) + mock_is_using_disk_based_sbd.assert_called_once() + mock_is_using_diskless_sbd.assert_called_once() + + @mock.patch('crmsh.sbd.SBDUtils.is_using_diskless_sbd') + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + @mock.patch('crmsh.ui_sbd.ServiceManager') + def test_sbd_configure_completer_success(self, mock_ServiceManager, mock_is_using_disk_based_sbd, mock_is_using_diskless_sbd): + mock_ServiceManager.return_value.service_is_active.side_effect = [True, True] + mock_is_using_disk_based_sbd.return_value = False + mock_is_using_diskless_sbd.return_value = True + self.assertEqual(ui_sbd.sbd_configure_completer(["configure", ""]), ["show", "watchdog-timeout=", "watchdog-device="]) + mock_ServiceManager.return_value.service_is_active.assert_has_calls([ + mock.call(constants.PCMK_SERVICE), + mock.call(constants.SBD_SERVICE) + ]) + mock_is_using_disk_based_sbd.assert_called_once() + mock_is_using_diskless_sbd.assert_called_once() + + +class TestSBD(unittest.TestCase): + + @mock.patch('crmsh.utils.node_reachable_check') + @mock.patch('crmsh.utils.list_cluster_nodes') + @mock.patch('crmsh.ui_sbd.sh.cluster_shell') + @mock.patch('crmsh.ui_sbd.ServiceManager') + @mock.patch('crmsh.watchdog.Watchdog.get_watchdog_device_from_sbd_config') + @mock.patch('crmsh.sbd.SBDTimeout.get_sbd_watchdog_timeout') + @mock.patch('crmsh.sbd.SBDUtils.get_sbd_device_metadata') + @mock.patch('crmsh.sbd.SBDUtils.get_sbd_device_from_config') + def setUp(self, mock_get_sbd_device_from_config, mock_get_sbd_device_metadata, mock_get_sbd_watchdog_timeout, mock_get_watchdog_device_from_sbd_config, mock_ServiceManager, mock_cluster_shell, mock_list_cluster_nodes, mock_node_reachable_check): + + mock_list_cluster_nodes.return_value = ["node1", "node2"] + mock_get_sbd_device_from_config.return_value = ["/dev/sda1"] + mock_get_watchdog_device_from_sbd_config.return_value = "/dev/watchdog0" + mock_get_sbd_watchdog_timeout.return_value = 10 + mock_get_sbd_device_metadata.return_value = {"watchdog": 10, "msgwait": 20} + self.sbd_instance_diskbased = ui_sbd.SBD() + self.sbd_instance_diskbased._load_attributes() + + mock_get_sbd_device_from_config.return_value = [] + self.sbd_instance_diskless = ui_sbd.SBD() + self.sbd_instance_diskless._load_attributes() + + @mock.patch('logging.Logger.error') + @mock.patch('crmsh.utils.package_is_installed') + def test_requires(self, mock_package_is_installed, mock_logger_error): + mock_package_is_installed.return_value = False + self.assertFalse(self.sbd_instance_diskbased.requires()) + mock_package_is_installed.assert_called_with("sbd") + mock_package_is_installed.return_value = True + self.assertTrue(self.sbd_instance_diskbased.requires()) + mock_package_is_installed.assert_called_with("sbd") + + @mock.patch('logging.Logger.error') + def test_service_is_active_false(self, mock_logger_error): + self.sbd_instance_diskbased.service_manager.service_is_active = mock.Mock(return_value=False) + self.assertFalse(self.sbd_instance_diskbased.service_is_active(constants.PCMK_SERVICE)) + mock_logger_error.assert_called_once_with("%s is not active", constants.PCMK_SERVICE) + + @mock.patch('logging.Logger.error') + def test_service_is_active_true(self, mock_logger_error): + self.sbd_instance_diskbased.service_manager.service_is_active = mock.Mock(return_value=True) + self.assertTrue(self.sbd_instance_diskbased.service_is_active(constants.PCMK_SERVICE)) + mock_logger_error.assert_not_called() + + @mock.patch('crmsh.sbd.SBDUtils.is_using_diskless_sbd') + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + def test_configure_usage_none(self, mock_is_using_disk_based_sbd, mock_is_using_diskless_sbd): + mock_is_using_disk_based_sbd.return_value = False + mock_is_using_diskless_sbd.return_value = False + self.assertEqual(self.sbd_instance_diskbased.configure_usage, "") + mock_is_using_disk_based_sbd.assert_called_once() + mock_is_using_diskless_sbd.assert_called_once() + + @mock.patch('crmsh.sbd.SBDUtils.is_using_diskless_sbd') + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + def test_configure_usage_disk_diskbased(self, mock_is_using_disk_based_sbd, mock_is_using_diskless_sbd): + mock_is_using_disk_based_sbd.return_value = True + timeout_usage_str = " ".join([f"[{t}-timeout=]" for t in ui_sbd.SBD.TIMEOUT_TYPES]) + show_usage = f"crm sbd configure show [{'|'.join(ui_sbd.SBD.SHOW_TYPES)}]" + expected = f"Usage:\n{show_usage}\ncrm sbd configure {timeout_usage_str} [watchdog-device=]\n" + self.assertEqual(self.sbd_instance_diskbased.configure_usage, expected) + mock_is_using_disk_based_sbd.assert_called_once() + mock_is_using_diskless_sbd.assert_not_called() + + @mock.patch('crmsh.sbd.SBDUtils.is_using_diskless_sbd') + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + def test_configure_usage_disk_diskless(self, mock_is_using_disk_based_sbd, mock_is_using_diskless_sbd): + mock_is_using_disk_based_sbd.return_value = False + mock_is_using_diskless_sbd.return_value = True + timeout_usage_str = " ".join([f"[{t}-timeout=]" for t in ui_sbd.SBD.DISKLESS_TIMEOUT_TYPES]) + show_usage = f"crm sbd configure show [{'|'.join(ui_sbd.SBD.DISKLESS_SHOW_TYPES)}]" + expected = f"Usage:\n{show_usage}\ncrm sbd configure {timeout_usage_str} [watchdog-device=]\n" + self.assertEqual(self.sbd_instance_diskless.configure_usage, expected) + mock_is_using_disk_based_sbd.assert_called_once() + mock_is_using_diskless_sbd.assert_called_once() + + @mock.patch('logging.Logger.info') + @mock.patch('builtins.open', new_callable=mock.mock_open, read_data="# Comment line\nKEY1=value1\nKEY2=value2\n") + def test_show_sysconfig(self, mock_open, mock_logger_info): + with mock.patch('sys.stdout', new_callable=io.StringIO) as mock_stdout: + self.sbd_instance_diskbased._show_sysconfig() + self.assertTrue(mock_logger_info.called) + mock_logger_info.assert_called_with("crm sbd configure show sysconfig") + self.assertEqual(mock_stdout.getvalue(), "KEY1=value1\nKEY2=value2\n") + + @mock.patch('logging.Logger.info') + def test_show_disk_metadata(self, mock_logger_info): + self.sbd_instance_diskbased.cluster_shell.get_stdout_or_raise_error.return_value = "disk metadata: data" + with mock.patch('sys.stdout', new_callable=io.StringIO) as mock_stdout: + self.sbd_instance_diskbased._show_disk_metadata() + self.assertTrue(mock_logger_info.called) + mock_logger_info.assert_called_with("crm sbd configure show disk_metadata") + self.assertEqual(mock_stdout.getvalue(), "disk metadata: data\n\n") + self.sbd_instance_diskbased.cluster_shell.get_stdout_or_raise_error.assert_called_with("sbd -d /dev/sda1 dump") + + def test_do_configure_no_service(self): + self.sbd_instance_diskbased.service_is_active = mock.Mock(return_value=False) + res = self.sbd_instance_diskbased.do_configure(mock.Mock()) + self.assertFalse(res) + + @mock.patch('crmsh.sbd.SBDTimeout.get_sbd_systemd_start_timeout') + @mock.patch('logging.Logger.info') + @mock.patch('builtins.print') + def test_show_property(self, mock_print, mock_logger_info, mock_get_sbd_systemd_start_timeout): + data = """property cib-bootstrap-options: \ + have-watchdog=true \ + dc-version="2.1.7+20240711.239cba384-1.1-2.1.7+20240711.239cba384" \ + cluster-infrastructure=corosync \ + cluster-name=hacluster \ + stonith-enabled=true \ + stonith-timeout=83 \ + priority-fencing-delay=60 + """ + self.sbd_instance_diskbased.cluster_shell.get_stdout_or_raise_error = mock.Mock(return_value=data) + mock_get_sbd_systemd_start_timeout.return_value = 10 + self.sbd_instance_diskbased._show_property() + mock_logger_info.assert_has_calls([ + mock.call("crm sbd configure show property"), + mock.call("%s", sbd.SBDTimeout.SHOW_SBD_START_TIMEOUT_CMD) + ]) + mock_print.assert_has_calls([ + mock.call("have-watchdog=true"), + mock.call("stonith-enabled=true"), + mock.call("stonith-timeout=83"), + mock.call("priority-fencing-delay=60"), + mock.call(), + mock.call(f"TimeoutStartUSec=10") + ]) + + def test_configure_show_invalid_arg(self): + with self.assertRaises(ui_sbd.SBD.SyntaxError) as e: + res = self.sbd_instance_diskbased._configure_show(["arg1", "arg2", "arg3"]) + self.assertEqual(str(e.exception), "Invalid argument") + + def test_configure_show_unknown_arg(self): + with self.assertRaises(ui_sbd.SBD.SyntaxError) as e: + res = self.sbd_instance_diskbased._configure_show(["xxx1", "xxx2"]) + self.assertEqual(str(e.exception), f"Unknown argument: xxx2") + + def test_configure_show_disk_metadata(self): + self.sbd_instance_diskbased._show_disk_metadata = mock.Mock() + self.sbd_instance_diskbased._configure_show(["show", "disk_metadata"]) + self.sbd_instance_diskbased._show_disk_metadata.assert_called_once() + + @mock.patch('crmsh.ui_sbd.SBD._show_sysconfig') + def test_configure_show_sysconfig(self, mock_show_sysconfig): + self.sbd_instance_diskbased._configure_show(["show", "sysconfig"]) + mock_show_sysconfig.assert_called_once() + + def test_configure_show_property(self): + self.sbd_instance_diskbased._show_property = mock.Mock() + self.sbd_instance_diskbased._configure_show(["show", "property"]) + self.sbd_instance_diskbased._show_property.assert_called_once() + + @mock.patch('crmsh.ui_sbd.SBD._show_sysconfig') + @mock.patch('builtins.print') + def test_configure_show(self, mock_print, mock_show_sysconfig): + self.sbd_instance_diskbased._show_disk_metadata = mock.Mock() + self.sbd_instance_diskbased._show_property = mock.Mock() + self.sbd_instance_diskbased._configure_show(["show"]) + mock_print.assert_has_calls([mock.call(), mock.call()]) + + def test_parse_args_invalid_args(self): + with self.assertRaises(ui_sbd.SBD.SyntaxError) as e: + self.sbd_instance_diskbased._parse_args(["arg1"]) + self.assertEqual(str(e.exception), "Invalid argument: arg1") + + def test_parse_args_invalid_timeout_value(self): + with self.assertRaises(ui_sbd.SBD.SyntaxError) as e: + self.sbd_instance_diskbased._parse_args(["watchdog-timeout=xxx"]) + self.assertEqual(str(e.exception), "Invalid timeout value: xxx") + + def test_parse_args_unknown_arg(self): + with self.assertRaises(ui_sbd.SBD.SyntaxError) as e: + self.sbd_instance_diskbased._parse_args(["name=xin"]) + self.assertEqual(str(e.exception), "Unknown argument: name=xin") + + @mock.patch('logging.Logger.debug') + @mock.patch('crmsh.watchdog.Watchdog.get_watchdog_device') + def test_parse_args(self, mock_get_watchdog_device, mock_logger_debug): + mock_get_watchdog_device.return_value = "/dev/watchdog0" + args = self.sbd_instance_diskbased._parse_args(["watchdog-timeout=10", "watchdog-device=/dev/watchdog0"]) + self.assertEqual(args, {"watchdog": 10, "watchdog-device": "/dev/watchdog0"}) + + @mock.patch('logging.Logger.warning') + @mock.patch('logging.Logger.info') + def test_adjust_timeout_dict(self, mock_logger_info, mock_logger_warning): + timeout_dict = {"watchdog": 10, "msgwait": 10} + res = ui_sbd.SBD._adjust_timeout_dict(timeout_dict) + self.assertEqual(res, timeout_dict) + mock_logger_warning.assert_called_once_with("It's recommended to set msgwait timeout >= 2*watchdog timeout") + timeout_dict = {"watchdog": 10} + res = ui_sbd.SBD._adjust_timeout_dict(timeout_dict) + self.assertEqual(res, {"watchdog": 10, "msgwait": 20}) + timeout_dict = {"msgwait": 10} + res = ui_sbd.SBD._adjust_timeout_dict(timeout_dict) + self.assertEqual(res, {"watchdog": 5, "msgwait": 10}) + + @mock.patch("crmsh.ui_sbd.SBD.configure_usage", new_callable=mock.PropertyMock) + @mock.patch('builtins.print') + @mock.patch('logging.Logger.error') + def test_do_configure_no_args(self, mock_logger_error, mock_print, mock_configure_usage): + self.sbd_instance_diskbased.service_is_active = mock.Mock(side_effect=[True, True]) + mock_configure_usage.return_value = "usage data" + res = self.sbd_instance_diskbased.do_configure(mock.Mock()) + self.assertFalse(res) + mock_logger_error.assert_called_once_with('%s', "No argument") + mock_print.assert_called_once_with("usage data") + + @mock.patch('crmsh.sbd.SBDManager') + def test_configure_diskbase(self, mock_SBDManager): + parameter_dict = {"watchdog": 12, "watchdog-device": "/dev/watchdog100"} + self.sbd_instance_diskbased._adjust_timeout_dict = mock.Mock(return_value=parameter_dict) + mock_SBDManager.return_value.init_and_deploy_sbd = mock.Mock() + self.sbd_instance_diskbased._configure_diskbase(parameter_dict) + mock_SBDManager.assert_called_once_with( + device_list_to_init=self.sbd_instance_diskbased.device_list_from_config, + timeout_dict={"watchdog": 12, "msgwait": 20, "watchdog-device": "/dev/watchdog100"}, + update_dict={ + "SBD_WATCHDOG_DEV": "/dev/watchdog100" + } + ) + mock_SBDManager.return_value.init_and_deploy_sbd.assert_called_once() + + @mock.patch('logging.Logger.info') + @mock.patch('crmsh.utils.is_subdict') + @mock.patch('crmsh.sbd.SBDManager') + def test_configure_diskbase_no_change(self, mock_SBDManager, mock_is_subdict, mock_logger_info): + parameter_dict = {"watchdog": 10, "watchdog-device": "/dev/watchdog0"} + mock_is_subdict.return_value = True + self.sbd_instance_diskbased._configure_diskbase(parameter_dict) + mock_logger_info.assert_called_once_with("No change in SBD configuration") + + @mock.patch('crmsh.sbd.SBDManager') + def test_configure_diskless(self, mock_SBDManager): + parameter_dict = {"watchdog": 12, "watchdog-device": "/dev/watchdog100"} + self.sbd_instance_diskless._adjust_timeout_dict = mock.Mock(return_value=parameter_dict) + mock_SBDManager.return_value.init_and_deploy_sbd = mock.Mock() + self.sbd_instance_diskless._configure_diskless(parameter_dict) + mock_SBDManager.assert_called_once_with( + update_dict={ + "SBD_WATCHDOG_DEV": "/dev/watchdog100", + "SBD_WATCHDOG_TIMEOUT": "12" + }, + diskless_sbd=True + ) + mock_SBDManager.return_value.init_and_deploy_sbd.assert_called_once() + + @mock.patch('logging.Logger.info') + @mock.patch('crmsh.sbd.SBDManager') + def test_configure_diskless_no_change(self, mock_SBDManager, mock_logger_info): + parameter_dict = {"watchdog": 10, "watchdog-device": "/dev/watchdog0"} + self.sbd_instance_diskless._configure_diskless(parameter_dict) + mock_logger_info.assert_called_once_with("No change in SBD configuration") + + @mock.patch('crmsh.sbd.SBDManager') + @mock.patch('logging.Logger.info') + @mock.patch('crmsh.sbd.SBDUtils.handle_input_sbd_devices') + @mock.patch('crmsh.sbd.SBDUtils.verify_sbd_device') + def test_device_add(self, mock_verify_sbd_device, mock_handle_input_sbd_devices, mock_logger_info, mock_SBDManager): + mock_handle_input_sbd_devices.return_value = [["/dev/sda2"], ["/dev/sda1"]] + mock_SBDManager.return_value.init_and_deploy_sbd = mock.Mock() + self.sbd_instance_diskbased._device_add(["/dev/sda2"]) + mock_verify_sbd_device.assert_called_once_with(["/dev/sda1", "/dev/sda2"]) + mock_handle_input_sbd_devices.assert_called_once_with(["/dev/sda2"], ["/dev/sda1"]) + mock_SBDManager.assert_called_once_with( + device_list_to_init=["/dev/sda2"], + update_dict={"SBD_DEVICE": "/dev/sda1;/dev/sda2"}, + timeout_dict=self.sbd_instance_diskbased.device_meta_dict_runtime + ) + mock_logger_info.assert_called_once_with("Append devices: %s", "/dev/sda2") + + def test_device_remove_dev_not_in_config(self): + with self.assertRaises(ui_sbd.SBD.SyntaxError) as e: + self.sbd_instance_diskbased._device_remove(["/dev/sda2"]) + self.assertEqual(str(e.exception), "Device /dev/sda2 is not in config") + + def test_device_remove_last_dev(self): + with self.assertRaises(ui_sbd.SBD.SyntaxError) as e: + self.sbd_instance_diskbased._device_remove(["/dev/sda1"]) + self.assertEqual(str(e.exception), "Not allowed to remove all devices") + + @mock.patch('crmsh.sbd.SBDManager.restart_cluster_if_possible') + @mock.patch('crmsh.sbd.SBDManager.update_sbd_configuration') + @mock.patch('logging.Logger.info') + def test_device_remove(self, mock_logger_info, mock_update_sbd_configuration, mock_restart_cluster_if_possible): + self.sbd_instance_diskbased.device_list_from_config = ["/dev/sda1", "/dev/sda2"] + self.sbd_instance_diskbased._device_remove(["/dev/sda1"]) + mock_update_sbd_configuration.assert_called_once_with({"SBD_DEVICE": "/dev/sda2"}) + mock_restart_cluster_if_possible.assert_called_once() + mock_logger_info.assert_called_once_with("Remove devices: %s", "/dev/sda1") + + def test_do_device_no_service(self): + self.sbd_instance_diskbased.service_is_active = mock.Mock(return_value=False) + res = self.sbd_instance_diskbased.do_device(mock.Mock()) + self.assertFalse(res) + + @mock.patch('logging.Logger.info') + @mock.patch('logging.Logger.error') + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + def test_do_device_no_diskbase(self, mock_is_using_disk_based_sbd, mock_logger_error, mock_logger_info): + mock_is_using_disk_based_sbd.return_value = False + self.sbd_instance_diskbased.service_is_active = mock.Mock(return_value=True) + res = self.sbd_instance_diskbased.do_device(mock.Mock()) + self.assertFalse(res) + mock_logger_error.assert_called_once_with("Only works for disk-based SBD") + mock_logger_info.assert_called_once_with("Please use 'crm cluster init sbd -s [-s [-s ]]' to configure the disk-based SBD first") + + @mock.patch('logging.Logger.error') + @mock.patch('logging.Logger.info') + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + def test_do_device_no_args(self, mock_is_using_disk_based_sbd, mock_logger_info, mock_logger_error): + mock_is_using_disk_based_sbd.return_value = True + self.sbd_instance_diskbased.service_is_active = mock.Mock(return_value=True) + res = self.sbd_instance_diskbased.do_device(mock.Mock()) + self.assertFalse(res) + mock_logger_error.assert_called_once_with('%s', "No argument") + mock_logger_info.assert_called_once_with("Usage: crm sbd device ...") + + @mock.patch('logging.Logger.error') + @mock.patch('logging.Logger.info') + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + def test_do_device_invalid_args(self, mock_is_using_disk_based_sbd, mock_logger_info, mock_logger_error): + mock_is_using_disk_based_sbd.return_value = True + self.sbd_instance_diskbased.service_is_active = mock.Mock(return_value=True) + res = self.sbd_instance_diskbased.do_device(mock.Mock(), "arg1") + self.assertFalse(res) + mock_logger_error.assert_called_once_with('%s', "Invalid argument: arg1") + mock_logger_info.assert_called_once_with("Usage: crm sbd device ...") + + @mock.patch('logging.Logger.error') + @mock.patch('logging.Logger.info') + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + def test_do_device_no_device(self, mock_is_using_disk_based_sbd, mock_logger_info, mock_logger_error): + mock_is_using_disk_based_sbd.return_value = True + self.sbd_instance_diskbased.service_is_active = mock.Mock(return_value=True) + res = self.sbd_instance_diskbased.do_device(mock.Mock(), "add") + self.assertFalse(res) + mock_logger_error.assert_called_once_with('%s', "No device specified") + mock_logger_info.assert_called_once_with("Usage: crm sbd device ...") + + @mock.patch('logging.Logger.info') + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + def test_do_device_add(self, mock_is_using_disk_based_sbd, mock_logger_info): + mock_is_using_disk_based_sbd.return_value = True + self.sbd_instance_diskbased.service_is_active = mock.Mock(return_value=True) + self.sbd_instance_diskbased._device_add = mock.Mock() + res = self.sbd_instance_diskbased.do_device(mock.Mock(), "add", "/dev/sda2;/dev/sda3") + self.assertTrue(res) + self.sbd_instance_diskbased._device_add.assert_called_once_with(["/dev/sda2", "/dev/sda3"]) + mock_logger_info.assert_called_once_with("Configured sbd devices: %s", "/dev/sda1") + + @mock.patch('logging.Logger.info') + @mock.patch('crmsh.sbd.SBDUtils.is_using_disk_based_sbd') + def test_do_device_remove(self, mock_is_using_disk_based_sbd, mock_logger_info): + mock_is_using_disk_based_sbd.return_value = True + self.sbd_instance_diskbased.service_is_active = mock.Mock(return_value=True) + self.sbd_instance_diskbased._device_remove = mock.Mock() + res = self.sbd_instance_diskbased.do_device(mock.Mock(), "remove", "/dev/sda1") + self.assertTrue(res) + self.sbd_instance_diskbased._device_remove.assert_called_once_with(["/dev/sda1"]) + mock_logger_info.assert_called_once_with("Configured sbd devices: %s", "/dev/sda1") + + @mock.patch('crmsh.sbd.purge_sbd_from_cluster') + def test_do_purge_no_service(self, mock_purge_sbd_from_cluster): + self.sbd_instance_diskbased.service_is_active = mock.Mock(return_value=False) + res = self.sbd_instance_diskbased.do_purge(mock.Mock()) + self.assertFalse(res) + mock_purge_sbd_from_cluster.assert_not_called() + + @mock.patch('crmsh.sbd.SBDManager.restart_cluster_if_possible') + @mock.patch('crmsh.sbd.purge_sbd_from_cluster') + def test_do_purge(self, mock_purge_sbd_from_cluster, mock_restart_cluster_if_possible): + self.sbd_instance_diskbased.service_is_active = mock.Mock(return_value=True) + res = self.sbd_instance_diskbased.do_purge(mock.Mock()) + self.assertTrue(res) + mock_purge_sbd_from_cluster.assert_called_once() + mock_restart_cluster_if_possible.assert_called_once() + + @mock.patch('crmsh.xmlutil.CrmMonXmlParser') + def test_print_sbd_agent_status(self, mock_CrmMonXmlParser): + mock_CrmMonXmlParser.return_value.is_resource_configured.return_value = True + mock_CrmMonXmlParser.return_value.get_resource_id_list_via_type.return_value = ["sbd"] + self.sbd_instance_diskbased.cluster_shell.get_rc_output_without_input.return_value = (0, "active") + with mock.patch('sys.stdout', new_callable=io.StringIO) as mock_stdout: + self.sbd_instance_diskbased._print_sbd_agent_status() + self.assertEqual(mock_stdout.getvalue(), "# Status of fence_sbd:\nactive\n") + + @mock.patch('builtins.print') + def test_print_sbd_type_no_sbd(self, mock_print): + self.sbd_instance_diskbased.service_manager.service_is_active = mock.Mock(return_value=False) + self.sbd_instance_diskbased._print_sbd_type() + mock_print.assert_not_called() + + @mock.patch('builtins.print') + def test_print_sbd_type(self, mock_print): + self.sbd_instance_diskbased.service_manager.service_is_active = mock.Mock(return_value=True) + self.sbd_instance_diskbased._print_sbd_type() + mock_print.assert_has_calls([ + mock.call('# Type of SBD:'), + mock.call('Disk-based SBD configured'), + mock.call() + ]) + + @mock.patch('builtins.print') + def test_print_sbd_type_diskless(self, mock_print): + self.sbd_instance_diskless.service_manager.service_is_active = mock.Mock(return_value=True) + self.sbd_instance_diskless._print_sbd_type() + mock_print.assert_has_calls([ + mock.call('# Type of SBD:'), + mock.call('Diskless SBD configured'), + mock.call() + ]) + + @mock.patch('builtins.print') + def test_print_sbd_status(self, mock_print): + self.sbd_instance_diskbased.service_manager.service_is_active = mock.Mock(side_effect=[True, False]) + self.sbd_instance_diskbased.service_manager.service_is_enabled = mock.Mock(side_effect=[True, False]) + self.sbd_instance_diskbased.cluster_shell.get_stdout_or_raise_error = mock.Mock(side_effect=["10min", "10sec"]) + self.sbd_instance_diskbased._print_sbd_status() + mock_print.assert_has_calls([ + mock.call('# Status of sbd.service:'), + mock.call('Node |Active |Enabled |Since'), + mock.call('node1 |YES |YES |active since: 10min'), + mock.call('node2 |NO |NO |disactive since: 10sec'), + mock.call() + ]) + + @mock.patch('builtins.print') + def test_print_watchdog_info_no_cluster_nodes(self, mock_print): + data_node1 = """Discovered 1 watchdog devices: + + [1] /dev/watchdog0 + Identity: iTCO_wdt + Driver: iTCO_wdt + """ + data_node2 = """Discovered 1 watchdog devices: + + [1] /dev/watchdog0 + Identity: iTCO_wdt + Driver: iTCO_wdt + """ + self.sbd_instance_diskbased.cluster_shell.get_stdout_or_raise_error.side_effect = [data_node1, data_node2] + self.sbd_instance_diskbased._print_watchdog_info() + mock_print.assert_has_calls([ + mock.call("# Watchdog info:"), + mock.call('Node |Device |Driver |Kernel Timeout'), + mock.call('node1 |N/A |N/A |N/A'), + mock.call('node2 |N/A |N/A |N/A'), + mock.call() + ]) + + @mock.patch('builtins.print') + def test_print_watchdog_info(self, mock_print): + data_node1 = """Discovered 1 watchdog devices: + +[1] /dev/watchdog0 +Identity: Busy: PID 3120 (sbd) +Driver: iTCO_wdt + """ + data_node2 = """Discovered 1 watchdog devices: + +[1] /dev/watchdog0 +Identity: Busy: PID 3120 (sbd) +Driver: iTCO_wdt + """ + self.sbd_instance_diskbased.cluster_shell.get_stdout_or_raise_error.side_effect = [data_node1, "10", data_node2, "10"] + self.sbd_instance_diskbased._print_watchdog_info() + + def test_do_status(self): + self.sbd_instance_diskbased._print_sbd_type = mock.Mock() + self.sbd_instance_diskbased._print_sbd_status = mock.Mock() + self.sbd_instance_diskbased._print_watchdog_info = mock.Mock() + self.sbd_instance_diskbased._print_sbd_agent_status = mock.Mock() + self.sbd_instance_diskbased._print_sbd_cgroup_status = mock.Mock() + mock_context = mock.Mock() + self.sbd_instance_diskbased.do_status(mock_context) diff --git a/test/unittests/test_utils.py b/test/unittests/test_utils.py index ac24e08ce4..fd1237571c 100644 --- a/test/unittests/test_utils.py +++ b/test/unittests/test_utils.py @@ -934,7 +934,7 @@ def test_has_disk_mounted(mock_run): mock_run.assert_called_once_with("mount") -@mock.patch('crmsh.sbd.SBDManager.is_using_diskless_sbd') +@mock.patch('crmsh.sbd.SBDUtils.is_using_diskless_sbd') @mock.patch('crmsh.sh.ClusterShell.get_stdout_or_raise_error') def test_has_stonith_running(mock_run, mock_diskless): mock_run.return_value = """ @@ -1348,3 +1348,9 @@ def test_check_user_access_cluster(mock_user, mock_in, mock_sudo, mock_error): with pytest.raises(utils.TerminateSubCommand) as err: utils.check_user_access('cluster') mock_error.assert_called_once_with('Operation is denied. The current user lacks the necessary privilege.') + + +def test_is_subdict(): + d1 = {"a": 1, "b": 2} + d2 = {"a": 1} + assert utils.is_subdict(d2, d1) is True diff --git a/test/unittests/test_watchdog.py b/test/unittests/test_watchdog.py index 957f21ffb6..0c505a2e33 100644 --- a/test/unittests/test_watchdog.py +++ b/test/unittests/test_watchdog.py @@ -5,6 +5,7 @@ except ImportError: import mock +from crmsh import sbd from crmsh import watchdog from crmsh import bootstrap from crmsh import constants @@ -46,7 +47,7 @@ def test_watchdog_device_name(self): @mock.patch('crmsh.sh.ShellUtils.get_stdout_stderr') def test_verify_watchdog_device_ignore_error(self, mock_run): mock_run.return_value = (1, None, "error") - res = self.watchdog_inst._verify_watchdog_device("/dev/watchdog", True) + res = self.watchdog_inst.verify_watchdog_device("/dev/watchdog", True) self.assertEqual(res, False) mock_run.assert_called_once_with("wdctl /dev/watchdog") @@ -56,21 +57,21 @@ def test_verify_watchdog_device_error(self, mock_run, mock_error): mock_run.return_value = (1, None, "error") mock_error.side_effect = ValueError with self.assertRaises(ValueError) as err: - self.watchdog_inst._verify_watchdog_device("/dev/watchdog") + self.watchdog_inst.verify_watchdog_device("/dev/watchdog") mock_error.assert_called_once_with("Invalid watchdog device /dev/watchdog: error") mock_run.assert_called_once_with("wdctl /dev/watchdog") @mock.patch('crmsh.sh.ShellUtils.get_stdout_stderr') def test_verify_watchdog_device(self, mock_run): mock_run.return_value = (0, None, None) - res = self.watchdog_inst._verify_watchdog_device("/dev/watchdog") + res = self.watchdog_inst.verify_watchdog_device("/dev/watchdog") self.assertEqual(res, True) - @mock.patch('crmsh.watchdog.invoke') - def test_load_watchdog_driver(self, mock_invoke): + @mock.patch('crmsh.sh.ShellUtils.get_stdout_stderr') + def test_load_watchdog_driver(self, mock_run): self.watchdog_inst._load_watchdog_driver("softdog") - mock_invoke.assert_has_calls([ - mock.call("echo softdog > /etc/modules-load.d/watchdog.conf"), + mock_run.assert_has_calls([ + mock.call(f"echo softdog > {watchdog.Watchdog.WATCHDOG_CFG}"), mock.call("systemctl restart systemd-modules-load") ]) @@ -79,9 +80,9 @@ def test_get_watchdog_device_from_sbd_config(self, mock_parse): mock_parse_inst = mock.Mock() mock_parse.return_value = mock_parse_inst mock_parse_inst.get.return_value = "/dev/watchdog" - res = self.watchdog_inst._get_watchdog_device_from_sbd_config() + res = self.watchdog_inst.get_watchdog_device_from_sbd_config() self.assertEqual(res, "/dev/watchdog") - mock_parse.assert_called_once_with(bootstrap.SYSCONFIG_SBD) + mock_parse.assert_called_once_with(sbd.SBDManager.SYSCONFIG_SBD) @mock.patch('crmsh.sh.ShellUtils.get_stdout_stderr') def test_driver_is_loaded(self, mock_run): @@ -128,7 +129,7 @@ def test_set_watchdog_info(self, mock_run): self.watchdog_inst._set_watchdog_info() self.assertEqual(self.watchdog_inst._watchdog_info_dict, {'/dev/watchdog': 'softdog', '/dev/watchdog0': 'softdog', '/dev/watchdog1': 'iTCO_wdt'}) - @mock.patch('crmsh.watchdog.Watchdog._verify_watchdog_device') + @mock.patch('crmsh.watchdog.Watchdog.verify_watchdog_device') def test_get_device_through_driver_none(self, mock_verify): self.watchdog_inst._watchdog_info_dict = {'/dev/watchdog': 'softdog', '/dev/watchdog0': 'softdog', '/dev/watchdog1': 'iTCO_wdt'} mock_verify.return_value = False @@ -136,7 +137,7 @@ def test_get_device_through_driver_none(self, mock_verify): self.assertEqual(res, None) mock_verify.assert_called_once_with("/dev/watchdog1") - @mock.patch('crmsh.watchdog.Watchdog._verify_watchdog_device') + @mock.patch('crmsh.watchdog.Watchdog.verify_watchdog_device') def test_get_device_through_driver(self, mock_verify): self.watchdog_inst._watchdog_info_dict = {'/dev/watchdog': 'softdog', '/dev/watchdog0': 'softdog', '/dev/watchdog1': 'iTCO_wdt'} mock_verify.return_value = True @@ -187,7 +188,7 @@ def test_get_first_unused_device_none(self): res = self.watchdog_inst._get_first_unused_device() self.assertEqual(res, None) - @mock.patch('crmsh.watchdog.Watchdog._verify_watchdog_device') + @mock.patch('crmsh.watchdog.Watchdog.verify_watchdog_device') def test_get_first_unused_device(self, mock_verify): mock_verify.return_value = True self.watchdog_inst._watchdog_info_dict = {'/dev/watchdog': 'softdog', '/dev/watchdog0': 'softdog', '/dev/watchdog1': 'iTCO_wdt'} @@ -196,8 +197,8 @@ def test_get_first_unused_device(self, mock_verify): mock_verify.assert_called_once_with("/dev/watchdog", ignore_error=True) @mock.patch('crmsh.watchdog.Watchdog._get_first_unused_device') - @mock.patch('crmsh.watchdog.Watchdog._verify_watchdog_device') - @mock.patch('crmsh.watchdog.Watchdog._get_watchdog_device_from_sbd_config') + @mock.patch('crmsh.watchdog.Watchdog.verify_watchdog_device') + @mock.patch('crmsh.watchdog.Watchdog.get_watchdog_device_from_sbd_config') def test_set_input_from_config(self, mock_from_config, mock_verify, mock_first): mock_from_config.return_value = "/dev/watchdog" mock_verify.return_value = True @@ -206,8 +207,8 @@ def test_set_input_from_config(self, mock_from_config, mock_verify, mock_first): mock_from_config.assert_called_once_with() @mock.patch('crmsh.watchdog.Watchdog._get_first_unused_device') - @mock.patch('crmsh.watchdog.Watchdog._verify_watchdog_device') - @mock.patch('crmsh.watchdog.Watchdog._get_watchdog_device_from_sbd_config') + @mock.patch('crmsh.watchdog.Watchdog.verify_watchdog_device') + @mock.patch('crmsh.watchdog.Watchdog.get_watchdog_device_from_sbd_config') def test_set_input(self, mock_from_config, mock_verify, mock_first): mock_from_config.return_value = None mock_first.return_value = None @@ -221,7 +222,7 @@ def test_valid_device_false(self): res = self.watchdog_inst._valid_device("test") self.assertEqual(res, False) - @mock.patch('crmsh.watchdog.Watchdog._verify_watchdog_device') + @mock.patch('crmsh.watchdog.Watchdog.verify_watchdog_device') def test_valid_device(self, mock_verify): mock_verify.return_value = True self.watchdog_inst._watchdog_info_dict = {'/dev/watchdog': 'softdog', '/dev/watchdog0': 'softdog', '/dev/watchdog1': 'iTCO_wdt'} @@ -229,7 +230,7 @@ def test_valid_device(self, mock_verify): self.assertEqual(res, True) @mock.patch('crmsh.utils.fatal') - @mock.patch('crmsh.watchdog.Watchdog._get_watchdog_device_from_sbd_config') + @mock.patch('crmsh.watchdog.Watchdog.get_watchdog_device_from_sbd_config') @mock.patch('crmsh.watchdog.Watchdog._set_watchdog_info') def test_join_watchdog_error(self, mock_set_info, mock_from_config, mock_error): mock_from_config.return_value = None @@ -238,12 +239,12 @@ def test_join_watchdog_error(self, mock_set_info, mock_from_config, mock_error): self.watchdog_join_inst.join_watchdog() mock_set_info.assert_called_once_with() mock_from_config.assert_called_once_with() - mock_error.assert_called_once_with("Failed to get watchdog device from {}".format(bootstrap.SYSCONFIG_SBD)) + mock_error.assert_called_once_with("Failed to get watchdog device from {}".format(sbd.SBDManager.SYSCONFIG_SBD)) @mock.patch('crmsh.watchdog.Watchdog._load_watchdog_driver') @mock.patch('crmsh.watchdog.Watchdog._get_driver_through_device_remotely') @mock.patch('crmsh.watchdog.Watchdog._valid_device') - @mock.patch('crmsh.watchdog.Watchdog._get_watchdog_device_from_sbd_config') + @mock.patch('crmsh.watchdog.Watchdog.get_watchdog_device_from_sbd_config') @mock.patch('crmsh.watchdog.Watchdog._set_watchdog_info') def test_join_watchdog(self, mock_set_info, mock_from_config, mock_valid, mock_get_driver_remotely, mock_load): mock_from_config.return_value = "/dev/watchdog" @@ -258,25 +259,26 @@ def test_join_watchdog(self, mock_set_info, mock_from_config, mock_valid, mock_g mock_get_driver_remotely.assert_called_once_with("/dev/watchdog") mock_load.assert_called_once_with("softdog") - @mock.patch('crmsh.watchdog.invokerc') + @mock.patch('crmsh.sh.ShellUtils.get_stdout_stderr') @mock.patch('crmsh.watchdog.Watchdog._valid_device') @mock.patch('crmsh.watchdog.Watchdog._set_input') @mock.patch('crmsh.watchdog.Watchdog._set_watchdog_info') - def test_init_watchdog_valid(self, mock_set_info, mock_set_input, mock_valid, mock_invokerc): + def test_init_watchdog_valid(self, mock_set_info, mock_set_input, mock_valid, mock_run): mock_valid.return_value = True + mock_run.return_value = (0, None, None) self.watchdog_inst._input = "/dev/watchdog" self.watchdog_inst.init_watchdog() - mock_invokerc.assert_not_called() + mock_run.assert_not_called() mock_valid.assert_called_once_with("/dev/watchdog") @mock.patch('crmsh.utils.fatal') - @mock.patch('crmsh.watchdog.invokerc') + @mock.patch('crmsh.sh.ShellUtils.get_stdout_stderr') @mock.patch('crmsh.watchdog.Watchdog._valid_device') @mock.patch('crmsh.watchdog.Watchdog._set_input') @mock.patch('crmsh.watchdog.Watchdog._set_watchdog_info') - def test_init_watchdog_error(self, mock_set_info, mock_set_input, mock_valid, mock_invokerc, mock_error): + def test_init_watchdog_error(self, mock_set_info, mock_set_input, mock_valid, mock_run, mock_error): mock_valid.return_value = False - mock_invokerc.return_value = False + mock_run.return_value = (1, None, None) self.watchdog_inst._input = "test" mock_error.side_effect = SystemExit @@ -284,27 +286,27 @@ def test_init_watchdog_error(self, mock_set_info, mock_set_input, mock_valid, mo self.watchdog_inst.init_watchdog() mock_valid.assert_called_once_with("test") - mock_invokerc.assert_called_once_with("modinfo test") + mock_run.assert_called_once_with("modinfo test") mock_error.assert_called_once_with("Should provide valid watchdog device or driver name by -w option") @mock.patch('crmsh.watchdog.Watchdog._get_device_through_driver') @mock.patch('crmsh.watchdog.Watchdog._load_watchdog_driver') @mock.patch('crmsh.watchdog.Watchdog._driver_is_loaded') - @mock.patch('crmsh.watchdog.invokerc') + @mock.patch('crmsh.sh.ShellUtils.get_stdout_stderr') @mock.patch('crmsh.watchdog.Watchdog._valid_device') @mock.patch('crmsh.watchdog.Watchdog._set_input') @mock.patch('crmsh.watchdog.Watchdog._set_watchdog_info') - def test_init_watchdog(self, mock_set_info, mock_set_input, mock_valid, mock_invokerc, mock_is_loaded, mock_load, mock_get_device): + def test_init_watchdog(self, mock_set_info, mock_set_input, mock_valid, mock_run, mock_is_loaded, mock_load, mock_get_device): mock_valid.return_value = False self.watchdog_inst._input = "softdog" - mock_invokerc.return_value = True + mock_run.return_value = (0, None, None) mock_is_loaded.return_value = False mock_get_device.return_value = "/dev/watchdog" self.watchdog_inst.init_watchdog() mock_valid.assert_called_once_with("softdog") - mock_invokerc.assert_called_once_with("modinfo softdog") + mock_run.assert_called_once_with("modinfo softdog") mock_is_loaded.assert_called_once_with("softdog") mock_load.assert_called_once_with("softdog") mock_set_info.assert_has_calls([mock.call(), mock.call()])