Skip to content

Commit

Permalink
Add watchdog timeout event test (sonic-net#13468)
Browse files Browse the repository at this point in the history
### Description of PR
Summary:
Fixes # (issue)26636373

### Approach
#### What is the motivation for this PR?

Add event for watchdog timeout

#### How did you do it?

Create mock test service that will sleep and therefore not send watchdog signal. This will generate watchdog timeout event

#### How did you verify/test it?

Manual test/Pipeline
  • Loading branch information
zbud-msft authored Jul 10, 2024
1 parent eae56a9 commit 01ab063
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 0 deletions.
15 changes: 15 additions & 0 deletions tests/telemetry/events/event_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,21 @@
PUBLISHED = 1


def add_test_watchdog_timeout_service(duthost):
logger.info("Adding mock watchdog.service to systemd")
duthost.copy(src="telemetry/events/events_data/test-watchdog-timeout.service", dest="/etc/systemd/system/")
duthost.shell("systemctl daemon-reload")
duthost.shell("systemctl start test-watchdog-timeout.service")


def delete_test_watchdog_timeout_service(duthost):
logger.info("Deleting mock test-watchdog-timeout.service")
duthost.shell("systemctl stop test-watchdog-timeout.service", module_ignore_errors=True)
duthost.shell("rm /etc/systemd/system/test-watchdog-timeout.service", module_ignore_errors=True)
duthost.shell("systemctl daemon-reload")
duthost.shell("systemctl reset-failed")


def backup_monit_config(duthost):
logger.info("Backing up monit config files")
duthost.shell("cp -f /etc/monit/monitrc ~/")
Expand Down
13 changes: 13 additions & 0 deletions tests/telemetry/events/events_data/test-watchdog-timeout.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[Unit]
Description=Test Watchdog Timeout

[Service]
Type=simple
# This service will sleep for 2 minutes therefore not sending the signal to watchdog within the specified 1 min requirement
# Since SIGABRT will be sent after watchdog times out due to no signal, it will crash the bash process and dump core
# Added logic to trap the SIGABRT so that no core dump is dropped.
ExecStart=/bin/bash -c 'trap "" SIGABRT; sleep 120'
WatchdogSec=60s

[Install]
WantedBy=multi-user.target
9 changes: 9 additions & 0 deletions tests/telemetry/events/host_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import time
from run_events_test import run_test
from event_utils import backup_monit_config, customize_monit_config, restore_monit_config
from event_utils import add_test_watchdog_timeout_service, delete_test_watchdog_timeout_service
from telemetry_utils import trigger_logger
from tests.common.helpers.dut_utils import is_container_running
from tests.common.utilities import wait_until
Expand Down Expand Up @@ -42,6 +43,14 @@ def test_event(duthost, gnxi_path, ptfhost, data_dir, validate_yang):
"event_down_ctr.json", "sonic-events-host:event-down-ctr", tag, False)
finally:
restore_monit_config(duthost)
add_test_watchdog_timeout_service(duthost)
try:
# We need to alot flat 60 seconds for watchdog timeout to fire since the timer is set to 60\
# With a base limit of 30 seconds, we will use 90 seconds
run_test(duthost, gnxi_path, ptfhost, data_dir, validate_yang, None,
"watchdog_timeout.json", "sonic-events-host:watchdog-timeout", tag, False, 90)
finally:
delete_test_watchdog_timeout_service(duthost)


def trigger_mem_threshold_exceeded_alert(duthost):
Expand Down

0 comments on commit 01ab063

Please sign in to comment.