Skip to content

Commit

Permalink
Revert "Use vendor customizable fan speed threshold checks (#378)"
Browse files Browse the repository at this point in the history
This reverts commit 6064369.
  • Loading branch information
yxieca committed Oct 18, 2023
1 parent 6064369 commit 2bb8e6b
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 50 deletions.
79 changes: 49 additions & 30 deletions sonic-thermalctld/scripts/thermalctld
Original file line number Diff line number Diff line change
Expand Up @@ -119,35 +119,57 @@ class FanStatus(logger.Logger):
self.status = status
return True

def set_under_speed(self, is_under_speed):
def _check_speed_value_available(self, speed, target_speed, tolerance, current_status):
if speed == NOT_AVAILABLE or target_speed == NOT_AVAILABLE or tolerance == NOT_AVAILABLE:
if isinstance(tolerance, int) and (tolerance > 100 or tolerance < 0):
self.log_warning('Invalid tolerance value: {}'.format(tolerance))
return False

if current_status is True:
self.log_warning('Fan speed or target_speed or tolerance became unavailable, '
'speed={}, target_speed={}, tolerance={}'.format(speed, target_speed, tolerance))
return False
return True

def set_under_speed(self, speed, target_speed, tolerance):
"""
Set and cache Fan under speed status
:param is_under_speed: Fan under speed threshold status
:param speed: Fan speed
:param target_speed: Fan target speed
:param tolerance: Threshold between Fan speed and target speed
:return: True if status changed else False
"""
if is_under_speed == NOT_AVAILABLE:
if self.under_speed:
self.log_warning('Fan under speed threshold check became unavailable')
is_under_speed = False
if not self._check_speed_value_available(speed, target_speed, tolerance, self.under_speed):
old_status = self.under_speed
self.under_speed = False
return old_status != self.under_speed

old_status = self.under_speed
self.under_speed = is_under_speed
return old_status != self.under_speed
status = speed < target_speed * (1 - float(tolerance) / 100)
if status == self.under_speed:
return False

def set_over_speed(self, is_over_speed):
self.under_speed = status
return True

def set_over_speed(self, speed, target_speed, tolerance):
"""
Set and cache Fan over speed status
:param is_over_speed: Fan over speed threshold status
:param speed: Fan speed
:param target_speed: Fan target speed
:param tolerance: Threshold between Fan speed and target speed
:return: True if status changed else False
"""
if is_over_speed == NOT_AVAILABLE:
if self.over_speed:
self.log_warning('Fan over speed threshold check became unavailable')
is_over_speed = False
if not self._check_speed_value_available(speed, target_speed, tolerance, self.over_speed):
old_status = self.over_speed
self.over_speed = False
return old_status != self.over_speed

old_status = self.over_speed
self.over_speed = is_over_speed
return old_status != self.over_speed
status = speed > target_speed * (1 + float(tolerance) / 100)
if status == self.over_speed:
return False

self.over_speed = status
return True

def is_ok(self):
"""
Expand Down Expand Up @@ -293,18 +315,16 @@ class FanUpdater(logger.Logger):
fan_status = self.fan_status_dict[fan_name]

speed = NOT_AVAILABLE
speed_tolerance = NOT_AVAILABLE
speed_target = NOT_AVAILABLE
is_under_speed = NOT_AVAILABLE
is_over_speed = NOT_AVAILABLE
fan_fault_status = NOT_AVAILABLE
fan_direction = NOT_AVAILABLE
is_replaceable = try_get(fan.is_replaceable, False)
presence = try_get(fan.get_presence, False)
if presence:
speed = try_get(fan.get_speed)
speed_tolerance = try_get(fan.get_speed_tolerance)
speed_target = try_get(fan.get_target_speed)
is_under_speed = try_get(fan.is_under_speed)
is_over_speed = try_get(fan.is_over_speed)
fan_fault_status = try_get(fan.get_status, False)
fan_direction = try_get(fan.get_direction)

Expand All @@ -324,20 +344,20 @@ class FanUpdater(logger.Logger):
'Fan fault warning: {} is broken'.format(fan_name)
)

if presence and fan_status.set_under_speed(is_under_speed):
if presence and fan_status.set_under_speed(speed, speed_target, speed_tolerance):
set_led = True
self._log_on_status_changed(not fan_status.under_speed,
'Fan low speed warning cleared: {} speed is back to normal'.format(fan_name),
'Fan low speed warning: {} current speed={}, target speed={}'.
format(fan_name, speed, speed_target)
'Fan low speed warning: {} current speed={}, target speed={}, tolerance={}'.
format(fan_name, speed, speed_target, speed_tolerance)
)

if presence and fan_status.set_over_speed(is_over_speed):
if presence and fan_status.set_over_speed(speed, speed_target, speed_tolerance):
set_led = True
self._log_on_status_changed(not fan_status.over_speed,
'Fan high speed warning cleared: {} speed is back to normal'.format(fan_name),
'Fan high speed warning: {} current speed={}, target speed={}'.
format(fan_name, speed, speed_target)
'Fan high speed warning: {} target speed={}, current speed={}, tolerance={}'.
format(fan_name, speed_target, speed, speed_tolerance)
)

# TODO: handle invalid fan direction
Expand All @@ -358,9 +378,8 @@ class FanUpdater(logger.Logger):
('status', str(fan_fault_status)),
('direction', str(fan_direction)),
('speed', str(speed)),
('speed_tolerance', str(speed_tolerance)),
('speed_target', str(speed_target)),
('is_under_speed', str(is_under_speed)),
('is_over_speed', str(is_over_speed)),
('is_replaceable', str(is_replaceable)),
('timestamp', datetime.now().strftime('%Y%m%d %H:%M:%S'))
])
Expand Down
68 changes: 48 additions & 20 deletions sonic-thermalctld/tests/test_thermalctld.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,30 @@ class TestFanStatus(object):
"""
Test cases to cover functionality in FanStatus class
"""
def test_check_speed_value_available(self):
fan_status = thermalctld.FanStatus()

ret = fan_status._check_speed_value_available(30, 32, 5, True)
assert ret == True
assert fan_status.log_warning.call_count == 0

ret = fan_status._check_speed_value_available(thermalctld.NOT_AVAILABLE, 32, 105, True)
assert ret == False
assert fan_status.log_warning.call_count == 1
fan_status.log_warning.assert_called_with('Invalid tolerance value: 105')

# Reset
fan_status.log_warning.reset_mock()

ret = fan_status._check_speed_value_available(thermalctld.NOT_AVAILABLE, 32, 5, False)
assert ret == False
assert fan_status.log_warning.call_count == 0

ret = fan_status._check_speed_value_available(thermalctld.NOT_AVAILABLE, 32, 5, True)
assert ret == False
assert fan_status.log_warning.call_count == 1
fan_status.log_warning.assert_called_with('Fan speed or target_speed or tolerance became unavailable, speed=N/A, target_speed=32, tolerance=5')

def test_set_presence(self):
fan_status = thermalctld.FanStatus()
ret = fan_status.set_presence(True)
Expand All @@ -80,48 +104,52 @@ def test_set_presence(self):

def test_set_under_speed(self):
fan_status = thermalctld.FanStatus()
ret = fan_status.set_under_speed(thermalctld.NOT_AVAILABLE, thermalctld.NOT_AVAILABLE, thermalctld.NOT_AVAILABLE)
assert not ret

ret = fan_status.set_under_speed(thermalctld.NOT_AVAILABLE, thermalctld.NOT_AVAILABLE, 0)
assert not ret

ret = fan_status.set_under_speed(False)
ret = fan_status.set_under_speed(thermalctld.NOT_AVAILABLE, 0, 0)
assert not ret

ret = fan_status.set_under_speed(True)
ret = fan_status.set_under_speed(0, 0, 0)
assert not ret

ret = fan_status.set_under_speed(80, 100, 19)
assert ret
assert fan_status.under_speed
assert not fan_status.is_ok()

ret = fan_status.set_under_speed(True)
assert not ret

ret = fan_status.set_under_speed(False)
ret = fan_status.set_under_speed(81, 100, 19)
assert ret
assert not fan_status.under_speed
assert fan_status.is_ok()

ret = fan_status.set_under_speed(False)
assert not ret

def test_set_over_speed(self):
fan_status = thermalctld.FanStatus()
ret = fan_status.set_over_speed(thermalctld.NOT_AVAILABLE, thermalctld.NOT_AVAILABLE, thermalctld.NOT_AVAILABLE)
assert not ret

ret = fan_status.set_over_speed(thermalctld.NOT_AVAILABLE, thermalctld.NOT_AVAILABLE, 0)
assert not ret

ret = fan_status.set_over_speed(False)
ret = fan_status.set_over_speed(thermalctld.NOT_AVAILABLE, 0, 0)
assert not ret

ret = fan_status.set_over_speed(True)
ret = fan_status.set_over_speed(0, 0, 0)
assert not ret

ret = fan_status.set_over_speed(120, 100, 19)
assert ret
assert fan_status.over_speed
assert not fan_status.is_ok()

ret = fan_status.set_over_speed(True)
assert not ret

ret = fan_status.set_over_speed(False)
ret = fan_status.set_over_speed(120, 100, 21)
assert ret
assert not fan_status.over_speed
assert fan_status.is_ok()

ret = fan_status.set_over_speed(False)
assert not ret


class TestFanUpdater(object):
"""
Expand Down Expand Up @@ -223,7 +251,7 @@ def test_fan_under_speed(self):
fan_list = chassis.get_all_fans()
assert fan_list[0].get_status_led() == MockFan.STATUS_LED_COLOR_RED
assert fan_updater.log_warning.call_count == 1
fan_updater.log_warning.assert_called_with('Fan low speed warning: FanDrawer 0 fan 1 current speed=1, target speed=2')
fan_updater.log_warning.assert_called_with('Fan low speed warning: FanDrawer 0 fan 1 current speed=1, target speed=2, tolerance=0')

fan_list[0].make_normal_speed()
fan_updater.update()
Expand All @@ -239,7 +267,7 @@ def test_fan_over_speed(self):
fan_list = chassis.get_all_fans()
assert fan_list[0].get_status_led() == MockFan.STATUS_LED_COLOR_RED
assert fan_updater.log_warning.call_count == 1
fan_updater.log_warning.assert_called_with('Fan high speed warning: FanDrawer 0 fan 1 current speed=2, target speed=1')
fan_updater.log_warning.assert_called_with('Fan high speed warning: FanDrawer 0 fan 1 target speed=1, current speed=2, tolerance=0')

fan_list[0].make_normal_speed()
fan_updater.update()
Expand Down

0 comments on commit 2bb8e6b

Please sign in to comment.