From 60e7224c7d0d398a8bbb055796d19e0a556f1916 Mon Sep 17 00:00:00 2001 From: Patrick MacArthur Date: Fri, 6 Dec 2024 13:10:54 -0500 Subject: [PATCH] thermalctld: Add support for fans on non-CPU modules (#555) * thermalctld: Add support for fans on non-CPU modules * Add module fan to unit tests --- sonic-thermalctld/scripts/thermalctld | 37 +++++++++++++++------ sonic-thermalctld/tests/mock_platform.py | 2 ++ sonic-thermalctld/tests/test_thermalctld.py | 22 +++++++++++- 3 files changed, 50 insertions(+), 11 deletions(-) diff --git a/sonic-thermalctld/scripts/thermalctld b/sonic-thermalctld/scripts/thermalctld index 2739bd926..ce4972339 100644 --- a/sonic-thermalctld/scripts/thermalctld +++ b/sonic-thermalctld/scripts/thermalctld @@ -5,6 +5,7 @@ Thermal control daemon for SONiC """ +from enum import Enum, auto import signal import sys import threading @@ -61,18 +62,23 @@ def update_entity_info(table, parent_name, key, device, device_index): table.set(key, fvs) +class FanType(Enum): + DRAWER = auto() + PSU = auto() + MODULE = auto() + class FanStatus(logger.Logger): absent_fan_count = 0 faulty_fan_count = 0 - def __init__(self, fan=None, is_psu_fan=False): + def __init__(self, fan=None, fan_type=FanType.DRAWER): """ Initializer of FanStatus """ super(FanStatus, self).__init__(SYSLOG_IDENTIFIER) self.fan = fan - self.is_psu_fan = is_psu_fan + self.fan_type = fan_type self.presence = True self.status = True self.under_speed = False @@ -95,7 +101,7 @@ class FanStatus(logger.Logger): :param presence: Fan presence status :return: True if status changed else False """ - if not presence and not self.is_psu_fan: + if not presence and self.fan_type == FanType.DRAWER: FanStatus.absent_fan_count += 1 if presence == self.presence: @@ -228,16 +234,25 @@ class FanUpdater(logger.Logger): if self.task_stopping_event.is_set(): return try: - self._refresh_fan_status(drawer, drawer_index, fan, fan_index) + self._refresh_fan_status(drawer, drawer_index, fan, fan_index, FanType.DRAWER) except Exception as e: self.log_warning('Failed to update fan status - {}'.format(repr(e))) + for module_index, module in enumerate(self.chassis.get_all_modules()): + for fan_index, fan in enumerate(module.get_all_fans()): + if self.task_stopping_event.is_set(): + return + try: + self._refresh_fan_status(module, module_index, fan, fan_index, FanType.MODULE) + except Exception as e: + self.log_warning('Failed to update module fan status - {}'.format(repr(e))) + for psu_index, psu in enumerate(self.chassis.get_all_psus()): for fan_index, fan in enumerate(psu.get_all_fans()): if self.task_stopping_event.is_set(): return try: - self._refresh_fan_status(psu, psu_index, fan, fan_index, True) + self._refresh_fan_status(psu, psu_index, fan, fan_index, FanType.PSU) except Exception as e: self.log_warning('Failed to update PSU fan status - {}'.format(repr(e))) @@ -270,7 +285,7 @@ class FanUpdater(logger.Logger): self.drawer_table.set(drawer_name, fvs) - def _refresh_fan_status(self, parent, parent_index, fan, fan_index, is_psu_fan=False): + def _refresh_fan_status(self, parent, parent_index, fan, fan_index, fan_type=FanType.DRAWER): """ Get Fan status by platform API and write to database for a given Fan :param parent: Parent device of this fan @@ -280,15 +295,17 @@ class FanUpdater(logger.Logger): :param name_prefix: name prefix of Fan object if Fan.get_name not presented :return: """ - drawer_name = NOT_AVAILABLE if is_psu_fan else str(try_get(parent.get_name)) - if is_psu_fan: + drawer_name = NOT_AVAILABLE if fan_type != FanType.DRAWER else str(try_get(parent.get_name)) + if fan_type == FanType.PSU: parent_name = 'PSU {}'.format(parent_index + 1) + elif fan_type == FanType.MODULE: + parent_name = 'Module {}'.format(parent_index + 1) else: parent_name = drawer_name if drawer_name != NOT_AVAILABLE else CHASSIS_INFO_KEY fan_name = try_get(fan.get_name, '{} fan {}'.format(parent_name, fan_index + 1)) update_entity_info(self.phy_entity_table, parent_name, fan_name, fan, fan_index + 1) if fan_name not in self.fan_status_dict: - self.fan_status_dict[fan_name] = FanStatus(fan, is_psu_fan) + self.fan_status_dict[fan_name] = FanStatus(fan, fan_type) fan_status = self.fan_status_dict[fan_name] @@ -342,7 +359,7 @@ class FanUpdater(logger.Logger): # We don't set PSU led here, PSU led will be handled in psud if set_led: - if not is_psu_fan: + if fan_type == FanType.DRAWER: self._set_fan_led(parent, fan, fan_name, fan_status) if fan_fault_status != NOT_AVAILABLE: diff --git a/sonic-thermalctld/tests/mock_platform.py b/sonic-thermalctld/tests/mock_platform.py index 1b7a43039..120096753 100644 --- a/sonic-thermalctld/tests/mock_platform.py +++ b/sonic-thermalctld/tests/mock_platform.py @@ -401,8 +401,10 @@ def make_module_thermal(self): sfp._thermal_list.append(MockThermal()) psu = MockPsu() psu._thermal_list.append(MockThermal()) + fan = MockFan() module._sfp_list.append(sfp) module._psu_list.append(psu) + module._fan_list.append(fan) module._thermal_list.append(MockThermal()) def is_modular_chassis(self): diff --git a/sonic-thermalctld/tests/test_thermalctld.py b/sonic-thermalctld/tests/test_thermalctld.py index 9f16b4111..94630d8d8 100644 --- a/sonic-thermalctld/tests/test_thermalctld.py +++ b/sonic-thermalctld/tests/test_thermalctld.py @@ -24,7 +24,7 @@ from sonic_py_common import daemon_base -from .mock_platform import MockChassis, MockFan, MockPsu, MockSfp, MockThermal +from .mock_platform import MockChassis, MockFan, MockModule, MockPsu, MockSfp, MockThermal from .mock_swsscommon import Table daemon_base.db_connect = mock.MagicMock() @@ -267,6 +267,26 @@ def test_update_psu_fans(self): else: fan_updater.log_warning.assert_called_with("Failed to update PSU fan status - Exception('Test message',)") + def test_update_module_fans(self): + chassis = MockChassis() + module = MockModule() + mock_fan = MockFan() + chassis.set_modular_chassis(True) + module._fan_list.append(mock_fan) + chassis._module_list.append(module) + fan_updater = thermalctld.FanUpdater(chassis, multiprocessing.Event()) + fan_updater.update() + assert fan_updater.log_warning.call_count == 0 + + fan_updater._refresh_fan_status = mock.MagicMock(side_effect=Exception("Test message")) + fan_updater.update() + assert fan_updater.log_warning.call_count == 1 + + # TODO: Clean this up once we no longer need to support Python 2 + if sys.version_info.major == 3: + fan_updater.log_warning.assert_called_with("Failed to update module fan status - Exception('Test message')") + else: + fan_updater.log_warning.assert_called_with("Failed to update module fan status - Exception('Test message',)") class TestThermalMonitor(object): """