diff --git a/doc/news/DM-47086.feature.rst b/doc/news/DM-47086.feature.rst new file mode 100644 index 000000000..5c08539d2 --- /dev/null +++ b/doc/news/DM-47086.feature.rst @@ -0,0 +1,27 @@ +Add option to mute watcher alarms when setting CSCs to OFFLINE + +Added `mute_alarms` and `mute_duration` parameters to the `set_summary_state` script +configuration. +`mute_alarms` defaults to `False` +`mute_duration` defaults to `30 mins` + +E.g. + data: + - + - MTMount + - Offline + mute_alarms: true + + or + + data: + - + - MTMount + - Offline + mute_alarms: true + mute_duration: 60.0 + +When `mute_alarms` is enabled and a component is transitioned to OFFLINE, related watcher +alarms are temporarily muted for the specified duration, defaulting to 30 minutes. + +Muting is applied only to components transitioning to OFFLINE state. diff --git a/python/lsst/ts/standardscripts/set_summary_state.py b/python/lsst/ts/standardscripts/set_summary_state.py index 69d343a12..dd282d631 100755 --- a/python/lsst/ts/standardscripts/set_summary_state.py +++ b/python/lsst/ts/standardscripts/set_summary_state.py @@ -33,6 +33,7 @@ from lsst.ts.standardscripts.utils import name_to_name_index, WildcardIndexError from lsst.ts.standardscripts.utils import find_running_instances +from lsst.ts.xml.enums.Watcher import AlarmSeverity class SetSummaryState(salobj.BaseScript): @@ -66,6 +67,8 @@ def __init__(self, index): # make it generous enough to handle any CSC self.cmd_timeout = 60 + self.watcher = None + @classmethod def get_schema(cls): schema_yaml = """ @@ -88,8 +91,18 @@ def get_schema(cls): maxItems: 3 items: type: string + mute_alarms: + description: If true, temporarily mute watcher alarms for components being sent to Offline. + type: boolean + default: false + mute_duration: + description: Duration in minutes to mute the alarms. Default is 30.0 minutes. + type: number + minimum: 0 + default: 30.0 required: [data] additionalProperties: false + """ return yaml.safe_load(schema_yaml) @@ -189,6 +202,13 @@ async def configure(self, config): self.nameind_state_override = nameind_state_override self.remotes = remotes + self.mute_alarms = getattr(config, "mute_alarms", False) + self.mute_duration = getattr(config, "mute_duration", 30.0) + + if self.mute_alarms and self.watcher is None: + self.watcher = salobj.Remote(self.domain, "Watcher") + await self.watcher.start_task + def set_metadata(self, metadata): """Compute estimated duration. @@ -213,8 +233,24 @@ async def run(self): for name_index, state, override in self.nameind_state_override: name, index = name_index - await self.checkpoint(f"set {name}:{index}") remote = self.remotes[(name, index)] + if self.mute_alarms and state == salobj.State.OFFLINE: + self.log.info( + f"Muting alarms for (Enabled|Heartbeat).{name}:{index} Severity " + f"{AlarmSeverity.CRITICAL.name} for {self.mute_duration} minutes" + ) + try: + alarm_name_pattern = rf"^(Enabled|Heartbeat)\.{name}:{index}" + await self.watcher.cmd_mute.set_start( + name=alarm_name_pattern, + duration=self.mute_duration * 60, # Convert to seconds + severity=AlarmSeverity.CRITICAL, + mutedBy="set_summary_state script", + ) + except Exception as e: + self.log.warning(f"Failed to mute alarms for {name}:{index}: {e}") + + await self.checkpoint(f"set {name}:{index}") await salobj.set_summary_state( remote=remote, state=state, override=override, timeout=self.cmd_timeout ) diff --git a/tests/test_set_summary_state.py b/tests/test_set_summary_state.py index c9d56402a..8c0dc555c 100644 --- a/tests/test_set_summary_state.py +++ b/tests/test_set_summary_state.py @@ -29,6 +29,7 @@ import pytest from lsst.ts import salobj, standardscripts from lsst.ts.xml.enums.Script import ScriptState +from lsst.ts.xml.enums.Watcher import AlarmSeverity random.seed(47) # for set_random_lsst_dds_partition_prefix @@ -296,6 +297,78 @@ async def test_configure_wildcard_index_salobj(self): await self.run_configure_wildcard_index_test() + async def test_mute_alarms_when_offline(self): + """Test that alarms are muted when CSCs are set to OFFLINE with + mute_alarms=True.""" + async with self.make_script(): + self.script.watcher = unittest.mock.AsyncMock() + + await self.add_test_cscs(initial_state=salobj.State.ENABLED) + await self.add_test_cscs(initial_state=salobj.State.ENABLED) + await self.add_test_cscs(initial_state=salobj.State.ENABLED) + await self.add_test_cscs(initial_state=salobj.State.ENABLED) + + controllers = self.controllers + csc_info = [] + for controller in controllers: + name = controller.salinfo.name + index = controller.salinfo.index + name_ind = f"{name}:{index}" + csc_info.append((controller, name, index, name_ind)) + + offline_cscs = [csc_info[0][0], csc_info[2][0]] + + config_data = [] + for controller, name, index, name_ind in csc_info: + if controller in offline_cscs: + config_data.append((name_ind, "OFFLINE")) + else: + config_data.append((name_ind, "STANDBY")) + + await self.configure_script( + data=config_data, mute_alarms=True, mute_duration=31.0 + ) + + await self.run_script() + + expected_mute_calls = [ + mock.call( + name=rf"^(Enabled|Heartbeat)\.{name}:{index}", + duration=1860.0, # mute_duration * 60 secs` + severity=AlarmSeverity.CRITICAL, + mutedBy="set_summary_state script", + ) + for controller, name, index, name_ind in csc_info + if controller in offline_cscs + ] + + self.script.watcher.cmd_mute.set_start.assert_has_awaits( + expected_mute_calls, any_order=True + ) + + expected_mute_calls_count = len(offline_cscs) + actual_mute_calls_count = self.script.watcher.cmd_mute.set_start.await_count + self.assertEqual( + actual_mute_calls_count, + expected_mute_calls_count, + f"Expected {expected_mute_calls_count} mute command(s), but got {actual_mute_calls_count}", + ) + + # Verify that CSCs have transitioned to the correct states + for controller, name, index, name_ind in csc_info: + expected_state = ( + salobj.State.OFFLINE + if controller in offline_cscs + else salobj.State.STANDBY + ) + actual_state = controller.evt_summaryState.data.summaryState + self.assertEqual( + actual_state, + expected_state, + f"CSC {name_ind} expected to be in state {expected_state.name}, but found " + f"{actual_state.name}", + ) + if __name__ == "__main__": unittest.main()