Skip to content

Commit

Permalink
Merge pull request #239 from lsst-ts/tickets/DM-47086
Browse files Browse the repository at this point in the history
tickets/DM-47086: Add option to disable watcher alarms in `set_summary_state` scripts when sending a component to Offline.
  • Loading branch information
iglesu authored Nov 1, 2024
2 parents 60074b0 + 023da16 commit cee3d75
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 1 deletion.
27 changes: 27 additions & 0 deletions doc/news/DM-47086.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Add option to mute watcher alarms when setting CSCs to OFFLINE

Added `mute_alarms` and `mute_duration` parameters to the `set_summary_state` script
configuration.
`mute_alarms` defaults to `False`
`mute_duration` defaults to `30 mins`

E.g.
data:
-
- MTMount
- Offline
mute_alarms: true

or

data:
-
- MTMount
- Offline
mute_alarms: true
mute_duration: 60.0

When `mute_alarms` is enabled and a component is transitioned to OFFLINE, related watcher
alarms are temporarily muted for the specified duration, defaulting to 30 minutes.

Muting is applied only to components transitioning to OFFLINE state.
38 changes: 37 additions & 1 deletion python/lsst/ts/standardscripts/set_summary_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from lsst.ts.standardscripts.utils import name_to_name_index, WildcardIndexError

from lsst.ts.standardscripts.utils import find_running_instances
from lsst.ts.xml.enums.Watcher import AlarmSeverity


class SetSummaryState(salobj.BaseScript):
Expand Down Expand Up @@ -66,6 +67,8 @@ def __init__(self, index):
# make it generous enough to handle any CSC
self.cmd_timeout = 60

self.watcher = None

@classmethod
def get_schema(cls):
schema_yaml = """
Expand All @@ -88,8 +91,18 @@ def get_schema(cls):
maxItems: 3
items:
type: string
mute_alarms:
description: If true, temporarily mute watcher alarms for components being sent to Offline.
type: boolean
default: false
mute_duration:
description: Duration in minutes to mute the alarms. Default is 30.0 minutes.
type: number
minimum: 0
default: 30.0
required: [data]
additionalProperties: false
"""
return yaml.safe_load(schema_yaml)

Expand Down Expand Up @@ -189,6 +202,13 @@ async def configure(self, config):
self.nameind_state_override = nameind_state_override
self.remotes = remotes

self.mute_alarms = getattr(config, "mute_alarms", False)
self.mute_duration = getattr(config, "mute_duration", 30.0)

if self.mute_alarms and self.watcher is None:
self.watcher = salobj.Remote(self.domain, "Watcher")
await self.watcher.start_task

def set_metadata(self, metadata):
"""Compute estimated duration.
Expand All @@ -213,8 +233,24 @@ async def run(self):

for name_index, state, override in self.nameind_state_override:
name, index = name_index
await self.checkpoint(f"set {name}:{index}")
remote = self.remotes[(name, index)]
if self.mute_alarms and state == salobj.State.OFFLINE:
self.log.info(
f"Muting alarms for (Enabled|Heartbeat).{name}:{index} Severity "
f"{AlarmSeverity.CRITICAL.name} for {self.mute_duration} minutes"
)
try:
alarm_name_pattern = rf"^(Enabled|Heartbeat)\.{name}:{index}"
await self.watcher.cmd_mute.set_start(
name=alarm_name_pattern,
duration=self.mute_duration * 60, # Convert to seconds
severity=AlarmSeverity.CRITICAL,
mutedBy="set_summary_state script",
)
except Exception as e:
self.log.warning(f"Failed to mute alarms for {name}:{index}: {e}")

await self.checkpoint(f"set {name}:{index}")
await salobj.set_summary_state(
remote=remote, state=state, override=override, timeout=self.cmd_timeout
)
73 changes: 73 additions & 0 deletions tests/test_set_summary_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import pytest
from lsst.ts import salobj, standardscripts
from lsst.ts.xml.enums.Script import ScriptState
from lsst.ts.xml.enums.Watcher import AlarmSeverity

random.seed(47) # for set_random_lsst_dds_partition_prefix

Expand Down Expand Up @@ -296,6 +297,78 @@ async def test_configure_wildcard_index_salobj(self):

await self.run_configure_wildcard_index_test()

async def test_mute_alarms_when_offline(self):
"""Test that alarms are muted when CSCs are set to OFFLINE with
mute_alarms=True."""
async with self.make_script():
self.script.watcher = unittest.mock.AsyncMock()

await self.add_test_cscs(initial_state=salobj.State.ENABLED)
await self.add_test_cscs(initial_state=salobj.State.ENABLED)
await self.add_test_cscs(initial_state=salobj.State.ENABLED)
await self.add_test_cscs(initial_state=salobj.State.ENABLED)

controllers = self.controllers
csc_info = []
for controller in controllers:
name = controller.salinfo.name
index = controller.salinfo.index
name_ind = f"{name}:{index}"
csc_info.append((controller, name, index, name_ind))

offline_cscs = [csc_info[0][0], csc_info[2][0]]

config_data = []
for controller, name, index, name_ind in csc_info:
if controller in offline_cscs:
config_data.append((name_ind, "OFFLINE"))
else:
config_data.append((name_ind, "STANDBY"))

await self.configure_script(
data=config_data, mute_alarms=True, mute_duration=31.0
)

await self.run_script()

expected_mute_calls = [
mock.call(
name=rf"^(Enabled|Heartbeat)\.{name}:{index}",
duration=1860.0, # mute_duration * 60 secs`
severity=AlarmSeverity.CRITICAL,
mutedBy="set_summary_state script",
)
for controller, name, index, name_ind in csc_info
if controller in offline_cscs
]

self.script.watcher.cmd_mute.set_start.assert_has_awaits(
expected_mute_calls, any_order=True
)

expected_mute_calls_count = len(offline_cscs)
actual_mute_calls_count = self.script.watcher.cmd_mute.set_start.await_count
self.assertEqual(
actual_mute_calls_count,
expected_mute_calls_count,
f"Expected {expected_mute_calls_count} mute command(s), but got {actual_mute_calls_count}",
)

# Verify that CSCs have transitioned to the correct states
for controller, name, index, name_ind in csc_info:
expected_state = (
salobj.State.OFFLINE
if controller in offline_cscs
else salobj.State.STANDBY
)
actual_state = controller.evt_summaryState.data.summaryState
self.assertEqual(
actual_state,
expected_state,
f"CSC {name_ind} expected to be in state {expected_state.name}, but found "
f"{actual_state.name}",
)


if __name__ == "__main__":
unittest.main()

0 comments on commit cee3d75

Please sign in to comment.