From 3be5b668a1b4497d33b38f3a45882938ec0b25dd Mon Sep 17 00:00:00 2001 From: themylogin Date: Tue, 17 Sep 2024 12:46:51 +0200 Subject: [PATCH] `system.reboot.info` --- .../middlewared/api/v25_04_0/__init__.py | 1 + .../api/v25_04_0/failover_reboot.py | 25 ++- .../middlewared/api/v25_04_0/system_reboot.py | 18 ++ .../plugins/failover_/disabled_reasons.py | 4 +- .../middlewared/plugins/failover_/reboot.py | 157 ++++++++++-------- .../middlewared/plugins/security/update.py | 41 ++--- .../middlewared/plugins/system/reboot.py | 76 +++++++++ .../middlewared/scripts/configure_fips.py | 31 +--- tests/api2/test_zz.py | 15 -- 9 files changed, 210 insertions(+), 158 deletions(-) create mode 100644 src/middlewared/middlewared/api/v25_04_0/system_reboot.py create mode 100644 src/middlewared/middlewared/plugins/system/reboot.py delete mode 100644 tests/api2/test_zz.py diff --git a/src/middlewared/middlewared/api/v25_04_0/__init__.py b/src/middlewared/middlewared/api/v25_04_0/__init__.py index ba64061a9008..058f1cb9398b 100644 --- a/src/middlewared/middlewared/api/v25_04_0/__init__.py +++ b/src/middlewared/middlewared/api/v25_04_0/__init__.py @@ -6,5 +6,6 @@ from .failover_reboot import * # noqa from .group import * # noqa from .privilege import * # noqa +from .system_reboot import * # noqa from .user import * # noqa from .vendor import * # noqa diff --git a/src/middlewared/middlewared/api/v25_04_0/failover_reboot.py b/src/middlewared/middlewared/api/v25_04_0/failover_reboot.py index 7dfd3d74299b..7146ef631539 100644 --- a/src/middlewared/middlewared/api/v25_04_0/failover_reboot.py +++ b/src/middlewared/middlewared/api/v25_04_0/failover_reboot.py @@ -3,26 +3,25 @@ # Licensed under the terms of the TrueNAS Enterprise License Agreement # See the file LICENSE.IX for complete terms and conditions from middlewared.api.base import BaseModel, single_argument_result +from .system_reboot import SystemRebootInfoResult -__all__ = ["FailoverRebootRequiredArgs", "FailoverRebootRequiredResult"] +__all__ = ["FailoverRebootInfoArgs", "FailoverRebootInfoResult", + "FailoverRebootOtherNodeArgs", "FailoverRebootOtherNodeResult"] -class FailoverRebootRequiredArgs(BaseModel): +class FailoverRebootInfoArgs(BaseModel): pass -class FailoverRebootRequiredResultThisNode(BaseModel): - id: str - reboot_required: bool - reboot_required_reasons: list[str] +@single_argument_result +class FailoverRebootInfoResult(BaseModel): + this_node: SystemRebootInfoResult + other_node: SystemRebootInfoResult | None -class FailoverRebootRequiredResultOtherNode(FailoverRebootRequiredResultThisNode): - id: str | None - reboot_required: bool | None +class FailoverRebootOtherNodeArgs(BaseModel): + pass -@single_argument_result -class FailoverRebootRequiredResult(BaseModel): - this_node: FailoverRebootRequiredResultThisNode - other_node: FailoverRebootRequiredResultOtherNode +class FailoverRebootOtherNodeResult(BaseModel): + result: None diff --git a/src/middlewared/middlewared/api/v25_04_0/system_reboot.py b/src/middlewared/middlewared/api/v25_04_0/system_reboot.py new file mode 100644 index 000000000000..1904258cf36e --- /dev/null +++ b/src/middlewared/middlewared/api/v25_04_0/system_reboot.py @@ -0,0 +1,18 @@ +from middlewared.api.base import BaseModel, single_argument_result + +__all__ = ["SystemRebootInfoArgs", "SystemRebootInfoResult"] + + +class SystemRebootInfoArgs(BaseModel): + pass + + +class RebootRequiredReason(BaseModel): + code: str + reason: str + + +@single_argument_result +class SystemRebootInfoResult(BaseModel): + boot_id: str + reboot_required_reasons: list[RebootRequiredReason] diff --git a/src/middlewared/middlewared/plugins/failover_/disabled_reasons.py b/src/middlewared/middlewared/plugins/failover_/disabled_reasons.py index 15be5860c01d..5e3b54e6ba8b 100644 --- a/src/middlewared/middlewared/plugins/failover_/disabled_reasons.py +++ b/src/middlewared/middlewared/plugins/failover_/disabled_reasons.py @@ -91,9 +91,9 @@ def get_local_reasons(self, app, ifaces, reasons): return reboot_info = self.middleware.call_sync('failover.reboot.info') - if reboot_info['this_node']['reboot_required']: + if reboot_info['this_node']['reboot_required_reasons']: reasons.add(DisabledReasonsEnum.LOC_FIPS_REBOOT_REQ.name) - if reboot_info['other_node']['reboot_required']: + if reboot_info['other_node']['reboot_required_reasons']: reasons.add(DisabledReasonsEnum.REM_FIPS_REBOOT_REQ.name) if self.SYSTEM_DATASET_SETUP_IN_PROGRESS: diff --git a/src/middlewared/middlewared/plugins/failover_/reboot.py b/src/middlewared/middlewared/plugins/failover_/reboot.py index 9397d69f3181..fe562de3b01f 100644 --- a/src/middlewared/middlewared/plugins/failover_/reboot.py +++ b/src/middlewared/middlewared/plugins/failover_/reboot.py @@ -3,15 +3,23 @@ # Licensed under the terms of the TrueNAS Enterprise License Agreement # See the file LICENSE.IX for complete terms and conditions import asyncio +from dataclasses import dataclass import errno import time from middlewared.api import api_method -from middlewared.api.current import FailoverRebootRequiredArgs, FailoverRebootRequiredResult -from middlewared.schema import accepts, Bool, Dict, UUID, returns +from middlewared.api.current import ( + FailoverRebootInfoArgs, FailoverRebootInfoResult, + FailoverRebootOtherNodeArgs, FailoverRebootOtherNodeResult, +) from middlewared.service import CallError, job, private, Service -FIPS_KEY = 'fips_toggled' + +@dataclass +class RemoteRebootReason: + # Boot ID for which the reboot is required. `None` means that the system must be rebooted when it comes online. + boot_id: str | None + reason: str class FailoverRebootService(Service): @@ -20,83 +28,74 @@ class Config: cli_namespace = 'system.failover.reboot' namespace = 'failover.reboot' - reboot_reasons : dict[str, str] = {} + remote_reboot_reasons: dict[str, RemoteRebootReason] = {} @private - async def add_reason(self, key: str, value: str): + async def add_remote_reason(self, code: str, reason: str): """ - Adds a reason on why this system needs a reboot. - :param key: unique identifier for the reason. - :param value: text explanation for the reason. + Adds a reason for why the remote system needs a reboot. + This will be appended to the list of the reasons that the remote node itself returns. + :param code: unique identifier for the reason. + :param reason: text explanation for the reason. """ - self.reboot_reasons[key] = value + try: + boot_id = await self.middleware.call('failover.call_remote', 'system.boot_id', [], { + 'raise_connect_error': False, + 'timeout': 2, + 'connect_timeout': 2, + }) + except Exception: + self.logger.warning('Unexpected error querying remote reboot boot id', exc_info=True) + # Remote system is inaccessible, so, when it comes back, another reboot will be required. + boot_id = None + + self.remote_reboot_reasons[code] = RemoteRebootReason(boot_id, reason) + + await self.send_event() + + @api_method(FailoverRebootInfoArgs, FailoverRebootInfoResult, roles=['FAILOVER_READ']) + async def info(self): + changed = False - @private - async def boot_ids(self): - info = { - 'this_node': {'id': await self.middleware.call('system.boot_id')}, - 'other_node': {'id': None} - } try: - info['other_node']['id'] = await self.middleware.call( - 'failover.call_remote', 'system.boot_id', [], { - 'raise_connect_error': False, 'timeout': 2, 'connect_timeout': 2, - } - ) + other_node = await self.middleware.call('failover.call_remote', 'system.reboot.info', [], { + 'raise_connect_error': False, + 'timeout': 2, + 'connect_timeout': 2, + }) except Exception: - self.logger.warning('Unexpected error querying remote node boot id', exc_info=True) + self.logger.warning('Unexpected error querying remote reboot info', exc_info=True) + other_node = None + + if other_node is not None: + for remote_reboot_reason_code, remote_reboot_reason in list(self.remote_reboot_reasons.items()): + if remote_reboot_reason.boot_id is None: + # This reboot reason was added while the remote node was not functional. + # In that case, when the remote system comes online, an additional reboot is required. + remote_reboot_reason.boot_id = other_node['boot_id'] + changed = True + + if remote_reboot_reason.boot_id == other_node['boot_id']: + other_node['reboot_required_reasons'].append({ + 'code': remote_reboot_reason_code, + 'reason': remote_reboot_reason.reason, + }) + else: + # The system was rebooted, this reason is not valid anymore + self.remote_reboot_reasons.pop(remote_reboot_reason_code) + changed = True + + info = { + 'this_node': await self.middleware.call('system.reboot.info'), + 'other_node': other_node, + } + + if changed: + await self.send_event(info) return info - @private - async def info_impl(self): - # initial state - current_info = await self.boot_ids() - current_info['this_node'].update({'reboot_required': None, 'reboot_required_reasons': []}) - current_info['other_node'].update({'reboot_required': None, 'reboot_required_reasons': []}) - - fips_change_info = await self.middleware.call('keyvalue.get', FIPS_KEY, False) - if not fips_change_info: - for i in current_info: - current_info[i]['reboot_required'] = False - else: - for key in ('this_node', 'other_node'): - current_info[key]['reboot_required'] = all(( - fips_change_info[key]['id'] == current_info[key]['id'], - fips_change_info[key]['reboot_required'] - )) - if current_info[key]['reboot_required']: - current_info[key]['reboot_required_reasons'].append('FIPS configuration was changed.') - - if all(( - current_info['this_node']['reboot_required'] is False, - current_info['other_node']['reboot_required'] is False, - )): - # no reboot required for either controller so delete - await self.middleware.call('keyvalue.delete', FIPS_KEY) - - for reason in self.reboot_reasons.values(): - current_info['this_node']['reboot_required'] = True - current_info['this_node']['reboot_required_reasons'].append(reason) - - return current_info - - @api_method(FailoverRebootRequiredArgs, FailoverRebootRequiredResult, roles=['FAILOVER_READ']) - async def info(self): - """Returns the local and remote nodes boot_ids along with their - reboot statuses (i.e. does a reboot need to take place)""" - return await self.info_impl() - - @accepts(roles=['FAILOVER_READ']) - @returns(Bool()) - async def required(self): - """Returns whether this node needs to be rebooted for failover/security - system configuration changes to take effect.""" - # TODO: should we raise Callerror/ValidationError if reboot_required is None? - return (await self.info())['this_node']['reboot_required'] is True - - @accepts(roles=['FULL_ADMIN']) - @returns() + @api_method(FailoverRebootOtherNodeArgs, FailoverRebootOtherNodeResult, roles=['FULL_ADMIN']) @job(lock='reboot_standby') async def other_node(self, job): """ @@ -145,3 +144,21 @@ async def other_node(self, job): job.set_progress(100, 'Other controller rebooted successfully') return True + + @private + async def send_event(self, info=None): + if info is None: + info = await self.info() + + self.middleware.send_event('failover.reboot.info', 'CHANGED', id=None, fields=info) + + +async def remote_reboot_info(middleware, *args, **kwargs): + await middleware.call('failover.reboot.send_event') + + +async def setup(middleware): + middleware.event_register('failover.reboot.info', 'Sent when a system reboot is required.', roles=['FAILOVER_READ']) + + await middleware.call('failover.remote_on_connect', remote_reboot_info) + await middleware.call('failover.remote_subscribe', 'system.reboot.info', remote_reboot_info) diff --git a/src/middlewared/middlewared/plugins/security/update.py b/src/middlewared/middlewared/plugins/security/update.py index db7398808d97..6ae99af81717 100644 --- a/src/middlewared/middlewared/plugins/security/update.py +++ b/src/middlewared/middlewared/plugins/security/update.py @@ -1,10 +1,12 @@ import middlewared.sqlalchemy as sa from middlewared.plugins.failover_.disabled_reasons import DisabledReasonsEnum -from middlewared.plugins.failover_.reboot import FIPS_KEY from middlewared.schema import accepts, Bool, Dict, Int, Patch from middlewared.service import ConfigService, ValidationError, job, private +FIPS_REBOOT_CODE = 'FIPS' +FIPS_REBOOT_REASON = 'FIPS configuration was changed.' + class SystemSecurityModel(sa.Model): __tablename__ = 'system_security' @@ -32,46 +34,23 @@ async def configure_fips_on_ha(self, is_ha, job): return await self.middleware.call('failover.call_remote', 'etc.generate', ['fips']) - boot_info = await self.middleware.call('failover.reboot.info') - if boot_info['this_node']['reboot_required']: - # means FIPS is being toggled but this node is already pending a reboot - # so it means the user toggled FIPS twice without a reboot in between - boot_info['this_node']['reboot_required'] = False - await self.middleware.call('keyvalue.set', FIPS_KEY, boot_info) - else: - # means FIPS is toggled and this node isn't pending a reboot, so mark it - # as such - boot_info['this_node']['reboot_required'] = True - await self.middleware.call('keyvalue.set', FIPS_KEY, boot_info) - if boot_info['other_node']['reboot_required']: - # means FIPS is being toggled but other node is already pending a reboot + remote_reboot_reasons = await self.middleware.call('failover.call_remote', 'system.reboot.list_reasons') + if FIPS_REBOOT_CODE in remote_reboot_reasons: + # means FIPS is being toggled but other node is already pending a reboot, # so it means the user toggled FIPS twice and somehow the other node # didn't reboot (even though we do this automatically). This is an edge # case and means someone or something is doing things behind our backs - boot_info['other_node']['reboot_required'] = False - await self.middleware.call('keyvalue.set', FIPS_KEY, boot_info) + await self.middleware.call('failover.call_remote', 'system.reboot.remove_reason', [FIPS_REBOOT_CODE]) else: try: # we automatically reboot (and wait for) the other controller reboot_job = await self.middleware.call('failover.reboot.other_node') await job.wrap(reboot_job) except Exception: - self.logger.error('Unexpected failure rebooting the other node', exc_info=True) - # something extravagant happened so we'll just play it safe and say that + # something extravagant happened, so we'll just play it safe and say that # another reboot is required - boot_info['other_node']['reboot_required'] = True - else: - new_info = await self.middleware.call('failover.reboot.info') - if boot_info['other_node']['id'] == new_info['other_node']['id']: - # standby "rebooted" but the boot id is the same....not good - self.logger.warning('Other node claims it rebooted but boot id is the same') - boot_info['other_node']['reboot_required'] = True - else: - boot_info['other_node']['id'] = new_info['other_node']['id'] - boot_info['other_node']['reboot_required'] = False - - await self.middleware.call('keyvalue.set', FIPS_KEY, boot_info) + await self.middleware.call('failover.reboot.add_remote_reason', FIPS_REBOOT_CODE, FIPS_REBOOT_REASON) @private async def validate(self, is_ha, ha_disabled_reasons): @@ -125,7 +104,7 @@ async def do_update(self, job, data): # TODO: We likely need to do some SSH magic as well # let's investigate the exact configuration there await self.middleware.call('etc.generate', 'fips') - # TODO: We need to fix this for non-HA iX hardware... + await self.middleware.call('system.reboot.toggle_reason', FIPS_REBOOT_CODE, FIPS_REBOOT_REASON) await self.configure_fips_on_ha(is_ha, job) return await self.config() diff --git a/src/middlewared/middlewared/plugins/system/reboot.py b/src/middlewared/middlewared/plugins/system/reboot.py new file mode 100644 index 000000000000..5328313a5aa3 --- /dev/null +++ b/src/middlewared/middlewared/plugins/system/reboot.py @@ -0,0 +1,76 @@ +from middlewared.api import api_method +from middlewared.api.current import SystemRebootInfoArgs, SystemRebootInfoResult +from middlewared.service import private, Service + + +class SystemRebootService(Service): + + class Config: + cli_namespace = 'system.reboot_required' + namespace = 'system.reboot' + + reboot_reasons : dict[str, str] = {} + + @api_method(SystemRebootInfoArgs, SystemRebootInfoResult, roles=['SYSTEM_GENERAL_READ']) + async def info(self): + return { + 'boot_id': await self.middleware.call('system.boot_id'), + 'reboot_required_reasons': [ + { + 'code': code, + 'reason': reason, + } + for code, reason in self.reboot_reasons.items() + ], + } + + @private + async def add_reason(self, code: str, reason: str): + """ + Adds a reason for why this system needs a reboot. + :param code: unique identifier for the reason. + :param reason: text explanation for the reason. + """ + self.reboot_reasons[code] = reason + + await self._send_event() + + @private + async def toggle_reason(self, code: str, reason: str): + """ + Adds a reason for why this system needs a reboot if it does not exist, removes it otherwise. + :param code: unique identifier for the reason. + :param reason: text explanation for the reason. + """ + if code in self.reboot_reasons: + self.reboot_reasons.pop(code) + else: + self.reboot_reasons[code] = reason + + await self._send_event() + + @private + async def list_reasons(self): + """ + List reasons code for why this system needs a reboot. + :return: a list of reason codes + """ + return list(self.reboot_reasons.keys()) + + @private + async def remove_reason(self, code: str): + """ + Removes a reason for why this system needs a reboot. + :param code: unique identifier for the reason that was used to add it. + """ + self.reboot_reasons.pop(code, None) + + await self._send_event() + + async def _send_event(self): + self.middleware.send_event('system.reboot.info', 'CHANGED', id=None, fields=await self.info()) + + +async def setup(middleware): + middleware.event_register('system.reboot.info', 'Sent when a system reboot is required.', + roles=['SYSTEM_GENERAL_READ']) diff --git a/src/middlewared/middlewared/scripts/configure_fips.py b/src/middlewared/middlewared/scripts/configure_fips.py index f1327baeb381..821ef4fbd2ae 100755 --- a/src/middlewared/middlewared/scripts/configure_fips.py +++ b/src/middlewared/middlewared/scripts/configure_fips.py @@ -3,9 +3,9 @@ import shutil import sqlite3 import subprocess -import typing from middlewared.utils.db import query_config_table +from middlewared.utils.rootfs import ReadonlyRootfsManager FIPS_MODULE_FILE = '/usr/lib/ssl/fipsmodule.cnf' @@ -38,29 +38,6 @@ def configure_fips(enable_fips: bool) -> None: modify_openssl_config(enable_fips) -def get_active_be() -> typing.Optional[str]: - cp = subprocess.run(['zfs', 'get', '-o', 'name', '-H', 'name', '/'], capture_output=True, check=False) - if cp.returncode or not (active_be := cp.stdout.decode().strip()): - return None - - return active_be - - -def set_readonly(readonly: bool) -> None: - active_be = get_active_be() - if not active_be or subprocess.run( - ['zfs', 'get', '-H', 'truenas:developer', active_be], capture_output=True, check=False - ).stdout.decode().split()[-2] == 'on': - # We do not want to do anything here if developer mode is enabled or if we are not able to find active be - # because we are in chroot env in that case - return - - subprocess.run( - ['zfs', 'set', f'readonly={"on" if readonly else "off"}', os.path.join(active_be, 'usr')], - capture_output=True, check=False - ) - - def main() -> None: validate_system_state() try: @@ -71,9 +48,9 @@ def main() -> None: # into the system security_settings = {'enable_fips': False} - set_readonly(False) - configure_fips(security_settings['enable_fips']) - set_readonly(True) + with ReadonlyRootfsManager('/') as readonly_rootfs: + readonly_rootfs.make_writeable() + configure_fips(security_settings['enable_fips']) if __name__ == '__main__': diff --git a/tests/api2/test_zz.py b/tests/api2/test_zz.py deleted file mode 100644 index 99b07b5cbe4f..000000000000 --- a/tests/api2/test_zz.py +++ /dev/null @@ -1,15 +0,0 @@ -# These tests need to be executing at the very last stage of the testing process as they semi-break the system -# in different ways. -from middlewared.test.integration.utils import call - - -def test_failover_reboot_add_reason(): - info = call("failover.reboot.info") - assert not info["this_node"]["reboot_required"] - assert info["this_node"]["reboot_required_reasons"] == [] - - call("failover.reboot.add_reason", "test", "Test reason.") - - info = call("failover.reboot.info") - assert info["this_node"]["reboot_required"] - assert info["this_node"]["reboot_required_reasons"] == ["Test reason."]