Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 'crm sbd' sub-level (jsc#PED-8256) #1491

Draft
wants to merge 17 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
99adff3
Dev: behave: Add sbd_ui.feature to test the crm sbd UI
liangxin1300 Jul 16, 2024
87a2a29
Dev: ui_sbd: Add new 'crm sbd' sublevel (jsc#PED-8256)
liangxin1300 Jun 14, 2024
ef7087c
Dev: behave: Adjust functional test for previous changes
liangxin1300 Aug 13, 2024
d7c9299
Dev: doc: Add help info for crm sbd sublevel
liangxin1300 Aug 19, 2024
0926c60
Dev: ui_sbd: Add property/sysconfig section header for sbd configure …
liangxin1300 Sep 11, 2024
ef30a96
Dev: ui_sbd: No need to consider static case when calling crm configu…
liangxin1300 Sep 11, 2024
15f8384
Dev: doc: Upadate crm.8.adoc for SBD help text
liangxin1300 Sep 11, 2024
83dabf9
Dev: ui_sbd: Catch both stderr and stdout for crm resource status
liangxin1300 Sep 11, 2024
4c63398
Dev: ui_sbd: Update regex for parsing SBD device by partlabel
liangxin1300 Sep 11, 2024
1b65864
Dev: ui_sbd: Clean up existing fence_sbd resource before configure di…
liangxin1300 Sep 11, 2024
52de833
Dev: ui_sbd: Minor changes to the code
liangxin1300 Sep 11, 2024
28f418e
Dev: bootstrap: Check if sbd package is installed in the right place
liangxin1300 Sep 13, 2024
221eaa6
Dev: ui_sbd: Refactor do_status method
liangxin1300 Sep 13, 2024
b187d8f
Dev: Refactor the code to avoid circular import
liangxin1300 Sep 19, 2024
4e8e361
Dev: report: Dump output of 'crm sbd configure show' and 'crm sbd sta…
liangxin1300 Sep 20, 2024
c58b6b8
Dev: ui_sbd: No need to specify device="" when trying to modify prope…
liangxin1300 Sep 20, 2024
8cd7705
Dev: unittests: Adjust unit test for previous commits
liangxin1300 Sep 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 34 additions & 23 deletions crmsh/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
from .sh import ShellUtils
from .ui_node import NodeMgmt
from .user_of_host import UserOfHost, UserNotFoundError
from .sbd import SBDUtils, SBDManager, SBDTimeout
from . import watchdog
import crmsh.healthcheck


Expand All @@ -56,21 +58,18 @@
COROSYNC_AUTH = "/etc/corosync/authkey"
CRM_CFG = "/etc/crm/crm.conf"
PROFILES_FILE = "/etc/crm/profiles.yml"
SYSCONFIG_SBD = "/etc/sysconfig/sbd"
SYSCONFIG_PCMK = "/etc/sysconfig/pacemaker"
SYSCONFIG_NFS = "/etc/sysconfig/nfs"
PCMK_REMOTE_AUTH = "/etc/pacemaker/authkey"
COROSYNC_CONF_ORIG = tmpfiles.create()[1]
SERVICES_STOP_LIST = ["corosync-qdevice.service", "corosync.service", "hawk.service", CSYNC2_SERVICE]
WATCHDOG_CFG = "/etc/modules-load.d/watchdog.conf"
BOOTH_DIR = "/etc/booth"
BOOTH_CFG = "/etc/booth/booth.conf"
BOOTH_AUTH = "/etc/booth/authkey"
SBD_SYSTEMD_DELAY_START_DIR = "/etc/systemd/system/sbd.service.d"
FILES_TO_SYNC = (BOOTH_DIR, corosync.conf(), COROSYNC_AUTH, CSYNC2_CFG, CSYNC2_KEY, "/etc/ctdb/nodes",
"/etc/drbd.conf", "/etc/drbd.d", "/etc/ha.d/ldirectord.cf", "/etc/lvm/lvm.conf", "/etc/multipath.conf",
"/etc/samba/smb.conf", SYSCONFIG_NFS, SYSCONFIG_PCMK, SYSCONFIG_SBD, PCMK_REMOTE_AUTH, WATCHDOG_CFG,
PROFILES_FILE, CRM_CFG, SBD_SYSTEMD_DELAY_START_DIR)
"/etc/samba/smb.conf", SYSCONFIG_NFS, SYSCONFIG_PCMK, SBDManager.SYSCONFIG_SBD, PCMK_REMOTE_AUTH, watchdog.Watchdog.WATCHDOG_CFG,
PROFILES_FILE, CRM_CFG, SBDManager.SBD_SYSTEMD_DELAY_START_DIR)

INIT_STAGES_EXTERNAL = ("ssh", "csync2", "corosync", "sbd", "cluster", "ocfs2", "admin", "qdevice")
INIT_STAGES_INTERNAL = ("csync2_remote", "qnetd_remote", "remote_auth")
Expand Down Expand Up @@ -136,7 +135,7 @@
self.profiles_dict = {}
self.default_nic = None
self.default_ip_list = []
self.rm_list = [SYSCONFIG_SBD, CSYNC2_CFG, corosync.conf(), CSYNC2_KEY,
self.rm_list = [SBDManager.SYSCONFIG_SBD, CSYNC2_CFG, corosync.conf(), CSYNC2_KEY,
COROSYNC_AUTH, "/var/lib/heartbeat/crm/*", "/var/lib/pacemaker/cib/*",
"/var/lib/corosync/*", "/var/lib/pacemaker/pengine/*", PCMK_REMOTE_AUTH,
"/var/lib/csync2/*", "~/.config/crm/*"]
Expand Down Expand Up @@ -215,12 +214,21 @@
"""
Validate sbd options
"""
with_sbd_option = self.sbd_devices or self.diskless_sbd
sbd_installed = utils.package_is_installed("sbd")

if with_sbd_option and not sbd_installed:
utils.fatal(SBDManager.SBD_NOT_INSTALLED_MSG)

Check warning on line 221 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L221

Added line #L221 was not covered by tests
if self.sbd_devices and self.diskless_sbd:
utils.fatal("Can't use -s and -S options together")
if self.sbd_devices:
SBDUtils.verify_sbd_device(self.sbd_devices)

Check warning on line 225 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L225

Added line #L225 was not covered by tests
if self.stage == "sbd":
if not self.sbd_devices and not self.diskless_sbd and self.yes_to_all:
if not sbd_installed:
utils.fatal(SBDManager.SBD_NOT_INSTALLED_MSG)

Check warning on line 228 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L228

Added line #L228 was not covered by tests
if not with_sbd_option and self.yes_to_all:
utils.fatal("Stage sbd should specify sbd device by -s or diskless sbd by -S option")
if ServiceManager().service_is_active("sbd.service") and not config.core.force:
if ServiceManager().service_is_active(constants.SBD_SERVICE) and not config.core.force:
utils.fatal("Can't configure stage sbd: sbd.service already running! Please use crm option '-F' if need to redeploy")
if self.cluster_is_running:
utils.check_all_nodes_reachable()
Expand Down Expand Up @@ -297,8 +305,7 @@
self._validate_sbd_option()

def init_sbd_manager(self):
from .sbd import SBDManager
self.sbd_manager = SBDManager(self)
self.sbd_manager = SBDManager(bootstrap_context=self)

def detect_platform(self):
"""
Expand Down Expand Up @@ -400,7 +407,7 @@


def confirm(msg):
if _context.yes_to_all:
if config.core.force or (_context and _context.yes_to_all):
return True
disable_completion()
rc = logger_utils.confirm(msg)
Expand All @@ -410,12 +417,12 @@


def disable_completion():
if _context.ui_context:
if _context and _context.ui_context:

Check warning on line 420 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L420

Added line #L420 was not covered by tests
_context.ui_context.disable_completion()


def enable_completion():
if _context.ui_context:
if _context and _context.ui_context:

Check warning on line 425 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L425

Added line #L425 was not covered by tests
_context.ui_context.setup_readline()


Expand Down Expand Up @@ -500,7 +507,7 @@
return False

# if peer_node is None, this is in the init process
if _context.cluster_node is None:
if not _context or _context.cluster_node is None:
return True
# In join process
# If the joining node is already online but can't find the init node
Expand Down Expand Up @@ -769,11 +776,10 @@

Return success node list
"""
from .sbd import SBDTimeout
# not _context means not in init or join process
if not _context and \
utils.package_is_installed("sbd") and \
ServiceManager().service_is_enabled("sbd.service") and \
ServiceManager().service_is_enabled(constants.SBD_SERVICE) and \
SBDTimeout.is_sbd_delay_start():
target_dir = "/run/systemd/system/sbd.service.d/"
cmd1 = "mkdir -p {}".format(target_dir)
Expand Down Expand Up @@ -1400,7 +1406,7 @@
import crmsh.sbd
if _context.stage == "sbd":
crmsh.sbd.clean_up_existing_sbd_resource()
_context.sbd_manager.sbd_init()
_context.sbd_manager.init_and_deploy_sbd()


def init_upgradeutil():
Expand Down Expand Up @@ -1437,7 +1443,9 @@
rsc_defaults rsc-options: resource-stickiness=1 migration-threshold=3
""")

_context.sbd_manager.configure_sbd_resource_and_properties()
if ServiceManager().service_is_enabled(constants.SBD_SERVICE):
_context.sbd_manager.configure_sbd()

Check warning on line 1447 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L1447

Added line #L1447 was not covered by tests



def init_admin():
Expand Down Expand Up @@ -2089,8 +2097,7 @@
shell.get_stdout_or_raise_error("rm -f {}".format(' '.join(_context.rm_list)), remote)
# restore original sbd configuration file from /usr/share/fillup-templates/sysconfig.sbd
if utils.package_is_installed("sbd", remote_addr=remote):
from .sbd import SBDManager
cmd = "cp {} {}".format(SBDManager.SYSCONFIG_SBD_TEMPLATE, SYSCONFIG_SBD)
cmd = "cp {} {}".format(SBDManager.SYSCONFIG_SBD_TEMPLATE, SBDManager.SYSCONFIG_SBD)
shell.get_stdout_or_raise_error(cmd, remote)


Expand Down Expand Up @@ -2737,8 +2744,7 @@
"""
Adjust stonith-timeout for sbd and other scenarios
"""
if ServiceManager().service_is_active("sbd.service"):
from .sbd import SBDTimeout
if ServiceManager().service_is_active(constants.SBD_SERVICE):
SBDTimeout.adjust_sbd_timeout_related_cluster_configuration()
else:
value = get_stonith_timeout_generally_expected()
Expand Down Expand Up @@ -2801,7 +2807,12 @@
"""
Sync files between cluster nodes
"""
if _context.skip_csync2:
if _context:
skip_csync2 = _context.skip_csync2
else:
skip_csync2 = not ServiceManager().service_is_active(CSYNC2_SERVICE)

Check warning on line 2813 in crmsh/bootstrap.py

View check run for this annotation

Codecov / codecov/patch

crmsh/bootstrap.py#L2813

Added line #L2813 was not covered by tests

if skip_csync2:
utils.cluster_copy_file(path, nodes=_context.node_list_in_cluster, output=False)
else:
csync2_update(path)
Expand Down
5 changes: 5 additions & 0 deletions crmsh/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,4 +450,9 @@
HIDDEN_COMMANDS = {'ms'}

NO_SSH_ERROR_MSG = "ssh-related operations are disabled. crmsh works in local mode."

PCMK_SERVICE = "pacemaker.service"
SBD_SERVICE = "sbd.service"

SHOW_SBD_START_TIMEOUT_CMD = "systemctl show -p TimeoutStartUSec sbd.service --value"
# vim:ts=4:sw=4:et:
2 changes: 1 addition & 1 deletion crmsh/ocfs2.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def _check_sbd_and_ocfs2_dev(self):
"""
from . import sbd
if ServiceManager().service_is_enabled("sbd.service"):
sbd_device_list = sbd.SBDManager.get_sbd_device_from_config()
sbd_device_list = sbd.SBDUtils.get_sbd_device_from_config()
for dev in self.ocfs2_devices:
if dev in sbd_device_list:
self._dynamic_raise_error("{} cannot be the same with SBD device".format(dev))
Expand Down
8 changes: 4 additions & 4 deletions crmsh/qdevice.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from . import lock
from . import log
from .service_manager import ServiceManager
from .sbd import SBDManager, SBDTimeout, SBDUtils


logger = log.setup_logger(__name__)
Expand Down Expand Up @@ -614,15 +615,14 @@ def adjust_sbd_watchdog_timeout_with_qdevice(self):
"""
Adjust SBD_WATCHDOG_TIMEOUT when configuring qdevice and diskless SBD
"""
from .sbd import SBDManager, SBDTimeout
utils.check_all_nodes_reachable()
self.using_diskless_sbd = SBDManager.is_using_diskless_sbd()
self.using_diskless_sbd = SBDUtils.is_using_diskless_sbd()
# add qdevice after diskless sbd started
if self.using_diskless_sbd:
res = SBDManager.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT")
res = SBDUtils.get_sbd_value_from_config("SBD_WATCHDOG_TIMEOUT")
if not res or int(res) < SBDTimeout.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE:
sbd_watchdog_timeout_qdevice = SBDTimeout.SBD_WATCHDOG_TIMEOUT_DEFAULT_WITH_QDEVICE
SBDManager.update_configuration({"SBD_WATCHDOG_TIMEOUT": str(sbd_watchdog_timeout_qdevice)})
SBDManager.update_sbd_configuration({"SBD_WATCHDOG_TIMEOUT": str(sbd_watchdog_timeout_qdevice)})
utils.set_property("stonith-timeout", SBDTimeout.get_stonith_timeout())

@qnetd_lock_for_same_cluster_name
Expand Down
13 changes: 9 additions & 4 deletions crmsh/report/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,11 +447,16 @@ def collect_sbd_info(context: core.Context) -> None:
return

sbd_f = os.path.join(context.work_dir, constants.SBD_F)
cmd = ". {};export SBD_DEVICE;{};{}".format(constants.SBDCONF, "sbd dump", "sbd list")
cmd_list = [
f". {constants.SBDCONF};export SBD_DEVICE;sbd dump;sbd list",
"crm sbd configure show",
"crm sbd status"
]
with open(sbd_f, "w") as f:
f.write("\n\n#=====[ Command ] ==========================#\n")
f.write(f"# {cmd}\n")
f.write(utils.get_cmd_output(cmd))
for cmd in cmd_list:
f.write("\n\n#=====[ Command ] ==========================#\n")
f.write(f"# {cmd}\n")
f.write(utils.get_cmd_output(cmd))

logger.debug(f"Dump SBD config file into {utils.real_path(sbd_f)}")

Expand Down
2 changes: 1 addition & 1 deletion crmsh/report/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,7 @@ def get_cmd_output(cmd: str, timeout: int = None) -> str:
out_str += f"{out}\n"
if err:
out_str += f"{err}\n"
return out_str
return crmutils.strip_ansi_escape_sequences(out_str)


def get_timespan_str(context: core.Context) -> str:
Expand Down
Loading