From cbb8aa87ca48d60bba16c43dc95a827ffa460151 Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Wed, 5 Jun 2024 10:23:55 +0200 Subject: [PATCH 1/4] requirements: Adding 'packaging' python explicit dependency Commit 938cbde536a99395dacc9d7038b0b646dc878b48 was adding 'packaging' dependency that wasn't added in the requirements. This commit is also regenerating hashes done with "make regen_hashes". Signed-off-by: Erwan Velu --- requirements/base.in | 1 + requirements/base.txt | 21 ++++++++++++++++++++- requirements/test.txt | 17 +++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/requirements/base.in b/requirements/base.in index aa38220..9faafdd 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -3,3 +3,4 @@ numpy matplotlib redfish pycairo +packaging diff --git a/requirements/base.txt b/requirements/base.txt index 3cf8304..ed6aac5 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -399,7 +399,9 @@ numpy==1.26.4 \ packaging==24.0 \ --hash=sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 \ --hash=sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9 - # via matplotlib + # via + # -r requirements/base.in + # matplotlib pillow==10.3.0 \ --hash=sha256:048ad577748b9fa4a99a0548c64f2cb8d672d5bf2e643a739ac8faff1164238c \ --hash=sha256:048eeade4c33fdf7e08da40ef402e748df113fd0b4584e32c4af74fe78baaeb2 \ @@ -475,6 +477,23 @@ ply==3.11 \ --hash=sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3 \ --hash=sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce # via jsonpath-rw +pycairo==1.26.0 \ + --hash=sha256:1d54e28170a5e790269d9db4c195cca5152ff018ba7e330d0ed05d86ccc2ea7d \ + --hash=sha256:20a31af89d92ffd5fc60c08e65ff649f16e18621a14a40dbdb049fc74942d7a9 \ + --hash=sha256:2dddd0a874fbddb21e14acd9b955881ee1dc6e63b9c549a192d613a907f9cbeb \ + --hash=sha256:3e4e18ea03122e60abe3eb611e2849859cc950083ff85d8369328eadf3df63f5 \ + --hash=sha256:5986b8da3e7de7ab931d7ad527938df38f75d3a3bdea2b515c786c5ca2c5093c \ + --hash=sha256:675578bc6d62d15ff8669f264783efc9c8c73e3a6f564b294a70fb45a2f78667 \ + --hash=sha256:696ba8024d2827e66e088a6e05a3b0aea30d289476bcb2ca47c9670d40900a50 \ + --hash=sha256:8616408ae93de4824a3777ec532ea75643e4bf74e49d601062c0b1788180c962 \ + --hash=sha256:9fa51168010e2dfb45499df071fca2d921893f724646f3454951000a7ad0cabb \ + --hash=sha256:a611e4d82ad8470138bb46d465d47e8db826d9d80b6a520ccd83ee007f2073e4 \ + --hash=sha256:a8f3b567ba2ad55624a809823ccf75aff8d768c20216cb5888365f6fc695c1d2 \ + --hash=sha256:aac447b423b33b64119ecdd1ffebf9163b07f5401c5da50c707197efdd1c918a \ + --hash=sha256:b6690a00fb225c19f42d76660e676aba7ae7cb18f3632cb02bce7f0d9b9c3800 \ + --hash=sha256:d374d9ec6d2f791bf57105d87a9028db1ef2b687848f64a524e447033eae7229 \ + --hash=sha256:d63929ab5a2f890a333f2f2f51de9f1c9fe20d1bddc982c2ca577b737448d72f + # via -r requirements/base.in pyparsing==3.1.2 \ --hash=sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad \ --hash=sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742 diff --git a/requirements/test.txt b/requirements/test.txt index 45eb20c..ccace88 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -648,6 +648,23 @@ ply==3.11 \ # via # -r requirements/base.txt # jsonpath-rw +pycairo==1.26.0 \ + --hash=sha256:1d54e28170a5e790269d9db4c195cca5152ff018ba7e330d0ed05d86ccc2ea7d \ + --hash=sha256:20a31af89d92ffd5fc60c08e65ff649f16e18621a14a40dbdb049fc74942d7a9 \ + --hash=sha256:2dddd0a874fbddb21e14acd9b955881ee1dc6e63b9c549a192d613a907f9cbeb \ + --hash=sha256:3e4e18ea03122e60abe3eb611e2849859cc950083ff85d8369328eadf3df63f5 \ + --hash=sha256:5986b8da3e7de7ab931d7ad527938df38f75d3a3bdea2b515c786c5ca2c5093c \ + --hash=sha256:675578bc6d62d15ff8669f264783efc9c8c73e3a6f564b294a70fb45a2f78667 \ + --hash=sha256:696ba8024d2827e66e088a6e05a3b0aea30d289476bcb2ca47c9670d40900a50 \ + --hash=sha256:8616408ae93de4824a3777ec532ea75643e4bf74e49d601062c0b1788180c962 \ + --hash=sha256:9fa51168010e2dfb45499df071fca2d921893f724646f3454951000a7ad0cabb \ + --hash=sha256:a611e4d82ad8470138bb46d465d47e8db826d9d80b6a520ccd83ee007f2073e4 \ + --hash=sha256:a8f3b567ba2ad55624a809823ccf75aff8d768c20216cb5888365f6fc695c1d2 \ + --hash=sha256:aac447b423b33b64119ecdd1ffebf9163b07f5401c5da50c707197efdd1c918a \ + --hash=sha256:b6690a00fb225c19f42d76660e676aba7ae7cb18f3632cb02bce7f0d9b9c3800 \ + --hash=sha256:d374d9ec6d2f791bf57105d87a9028db1ef2b687848f64a524e447033eae7229 \ + --hash=sha256:d63929ab5a2f890a333f2f2f51de9f1c9fe20d1bddc982c2ca577b737448d72f + # via -r requirements/base.txt pyparsing==3.1.2 \ --hash=sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad \ --hash=sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742 From 18f0deaef0ad44cd1424ff397334338719b882bc Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Tue, 4 Jun 2024 17:10:34 +0200 Subject: [PATCH 2/4] hwbench: Mocked BMC must provide realistic metrics With the current code, the mocked BMC was returning a new data structure at each read_() call. Let's detail with the CPU temperature. At each read_thermal() call, the thermals looks like the following : {'CPU': {'CPU1': Temperature(name='CPU1', unit='Celsius', values=[40], mean=[], min=[], max=[], stdev=[], samples=[])}} As per the monitoring logic, each call to read_thermal() must add a new metric in "values". After a monitoring sampling period, mean/min/max/stdev are computed. As values remained a single value list, the mean/min/max/stdev were never computed leading to empty lists. This commit is ensuring the mocked BMC is using the same logic as real drivers so the accumulation works. Signed-off-by: Erwan Velu --- hwbench/environment/vendors/mock.py | 38 +++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/hwbench/environment/vendors/mock.py b/hwbench/environment/vendors/mock.py index 4a68d06..5ba01f5 100644 --- a/hwbench/environment/vendors/mock.py +++ b/hwbench/environment/vendors/mock.py @@ -2,6 +2,7 @@ FanContext, MonitorMetric, Power, + PowerCategories, PowerContext, Temperature, ThermalContext, @@ -17,29 +18,56 @@ def read_thermals( self, thermals: dict[str, dict[str, Temperature]] = {} ) -> dict[str, dict[str, Temperature]]: # Let's add a faked thermal metric - thermals[str(ThermalContext.CPU)] = {"CPU1": Temperature("CPU1", 40)} + name = "CPU1" + if str(ThermalContext.CPU) not in thermals: + thermals[str(ThermalContext.CPU)] = {} + if name not in thermals[str(ThermalContext.CPU)]: + thermals[str(ThermalContext.CPU)][name] = Temperature(name) + + thermals[str(ThermalContext.CPU)][name].add(40) return thermals def read_fans( self, fans: dict[str, dict[str, MonitorMetric]] = {} ) -> dict[str, dict[str, MonitorMetric]]: # Let's add a faked fans metric - fans[str(FanContext.FAN)] = {"Fan1": MonitorMetric("Fan1", "RPM", 40)} + name = "Fan1" + if str(FanContext.FAN) not in fans: + fans[str(FanContext.FAN)] = {} + if name not in fans[str(FanContext.FAN)]: + fans[str(FanContext.FAN)][name] = MonitorMetric(name, "RPM") + + fans[str(FanContext.FAN)][name].add(40) return fans def read_power_consumption( self, power_consumption: dict[str, dict[str, Power]] = {} ) -> dict[str, dict[str, Power]]: # Let's add a faked power metric - power_consumption[str(PowerContext.BMC)] = {"Chassis": Power("Chassis", 125.0)} + name = str(PowerCategories.CHASSIS) + if str(PowerContext.BMC) not in power_consumption: + power_consumption[str(PowerContext.BMC)] = {} + if name not in power_consumption[str(PowerContext.BMC)]: + power_consumption[str(PowerContext.BMC)][name] = Power( + str(PowerCategories.CHASSIS) + ) + + power_consumption[str(PowerContext.BMC)][str(PowerCategories.CHASSIS)].add( + 125.0 + ) return power_consumption def read_power_supplies( self, power_supplies: dict[str, dict[str, Power]] = {} ) -> dict[str, dict[str, Power]]: # Let's add a faked power supplies - - power_supplies[str(PowerContext.BMC)] = {"PS1 status": Power("PS1", 125.0)} + status = "PS1 status" + name = "PS1" + if str(PowerContext.BMC) not in power_supplies: + power_supplies[str(PowerContext.BMC)] = {} + if status not in power_supplies[str(PowerContext.BMC)]: + power_supplies[str(PowerContext.BMC)][status] = Power(name) + power_supplies[str(PowerContext.BMC)][status].add(125) return power_supplies def connect_redfish(self): From 9ec4b5730996c38e62a7338903971072d2d3d727 Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Wed, 5 Jun 2024 10:03:21 +0200 Subject: [PATCH 3/4] hwbench/monitoring: Simplify addition of a new metric The current code was a great set of code that got copy/paste over various functions and vendors. This generated lots of hard to read code which is error-prone. This commit is merging all the metric addition logic in BMC.add_monitoring_value() so it can be used across all vendors including the mocked one. Signed-off-by: Erwan Velu --- hwbench/environment/vendors/dell/dell.py | 47 ++++++++--------- hwbench/environment/vendors/hpe/hpe.py | 64 ++++++++++++++---------- hwbench/environment/vendors/mock.py | 53 +++++++++++--------- hwbench/environment/vendors/vendor.py | 17 +++++++ 4 files changed, 107 insertions(+), 74 deletions(-) diff --git a/hwbench/environment/vendors/dell/dell.py b/hwbench/environment/vendors/dell/dell.py index eb135c4..d545692 100644 --- a/hwbench/environment/vendors/dell/dell.py +++ b/hwbench/environment/vendors/dell/dell.py @@ -1,4 +1,6 @@ +from typing import cast from ....bench.monitoring_structs import ( + MonitorMetric, Power, PowerCategories as PowerCat, PowerContext, @@ -19,11 +21,13 @@ def read_thermals( continue name = t["Name"].split("Temp")[0].strip() pc = t["PhysicalContext"] - if pc not in thermals: - thermals[pc] = {} - if t["Name"] not in thermals[pc]: - thermals[pc][t["Name"]] = Temperature(name) - thermals[pc][t["Name"]].add(t["ReadingCelsius"]) + super().add_monitoring_value( + cast(dict[str, dict[str, MonitorMetric]], thermals), + pc, + Temperature(name), + t["Name"], + t["ReadingCelsius"], + ) return thermals def get_power(self): @@ -43,28 +47,25 @@ def read_power_consumption( # ServerPwr.1.SCViewSledPwr is computed from other metrics # It includes the SLED power consumption + a mathematical portion of the chassis consumption # It's computed like : ServerPwr.1.SCViewSledPwr = PowerConsumedWatts + 'SC-BMC.1.ChassisInfraPower / nb_servers' - if ( - str(PowerCat.SERVERINCHASSIS) - not in power_consumption[str(PowerContext.BMC)] - ): - power_consumption[str(PowerContext.BMC)][ - str(PowerCat.SERVERINCHASSIS) - ] = Power(str(PowerCat.SERVERINCHASSIS)) - power_consumption[str(PowerContext.BMC)][str(PowerCat.SERVERINCHASSIS)].add( - oem_system["Attributes"]["ServerPwr.1.SCViewSledPwr"] + name = str(PowerCat.SERVERINCHASSIS) + super().add_monitoring_value( + cast(dict[str, dict[str, MonitorMetric]], power_consumption), + PowerContext.BMC, + Power(name), + name, + oem_system["Attributes"]["ServerPwr.1.SCViewSledPwr"], ) + if "SC-BMC.1.ChassisInfraPower" in oem_system["Attributes"]: # SC-BMC.1.ChassisInfraPower reports the power consumption of the chassis infrastructure, # not counting the SLEDs - if ( - str(PowerCat.INFRASTRUCTURE) - not in power_consumption[str(PowerContext.BMC)] - ): - power_consumption[str(PowerContext.BMC)][ - str(PowerCat.INFRASTRUCTURE) - ] = Power(str(PowerCat.INFRASTRUCTURE)) - power_consumption[str(PowerContext.BMC)][str(PowerCat.INFRASTRUCTURE)].add( - oem_system["Attributes"]["SC-BMC.1.ChassisInfraPower"] + name = str(PowerCat.INFRASTRUCTURE) + super().add_monitoring_value( + cast(dict[str, dict[str, MonitorMetric]], power_consumption), + PowerContext.BMC, + Power(name), + name, + oem_system["Attributes"]["SC-BMC.1.ChassisInfraPower"], ) # Let's add the sum of the power supplies to get the inlet power consumption diff --git a/hwbench/environment/vendors/hpe/hpe.py b/hwbench/environment/vendors/hpe/hpe.py index 19c8278..ed6bff4 100644 --- a/hwbench/environment/vendors/hpe/hpe.py +++ b/hwbench/environment/vendors/hpe/hpe.py @@ -1,6 +1,9 @@ import pathlib import re +from typing import cast + from ....bench.monitoring_structs import ( + MonitorMetric, Power, PowerCategories as PowerCat, PowerContext, @@ -28,8 +31,6 @@ def read_thermals( if t["ReadingCelsius"] <= 0: continue pc = t["PhysicalContext"] - if pc not in thermals: - thermals[pc] = {} # Temperature metrics are named like the following : # 05-P1 DIMM 5-8 @@ -48,20 +49,24 @@ def read_thermals( # 04-P1 DIMM 1-4 sd = f"{s}{d}" - def add(name): - if t["Name"] not in thermals[pc]: - thermals[pc][t["Name"]] = Temperature(name) - thermals[pc][t["Name"]].add(t["ReadingCelsius"]) + def add(self, name): + super().add_monitoring_value( + cast(dict[str, dict[str, MonitorMetric]], thermals), + pc, + Temperature(name), + t["Name"], + t["ReadingCelsius"], + ) # We don't consider all sensors for now # This could be updated depending on the needs if s == "CPU": - add(sd) + add(self, sd) elif s == "Inlet": - add(s) + add(self, s) elif d == "DIMM": # P1 DIMM 1-4 - add(f"{s} {d} {de}") + add(self, f"{s} {d} {de}") return thermals def get_power(self): @@ -78,10 +83,12 @@ def read_power_supplies( # Let's update it to have a unique name name = psu["Name"] + str(psu["Oem"]["Hpe"]["BayNumber"]) psu_name = "PS" + str(psu["Oem"]["Hpe"]["BayNumber"]) - if name not in power_supplies[str(PowerContext.BMC)]: - power_supplies[str(PowerContext.BMC)][name] = Power(psu_name) - power_supplies[str(PowerContext.BMC)][name].add( - psu["Oem"]["Hpe"]["AveragePowerOutputWatts"] + super().add_monitoring_value( + cast(dict[str, dict[str, MonitorMetric]], power_supplies), + PowerContext.BMC, + Power(psu_name), + name, + psu["Oem"]["Hpe"]["AveragePowerOutputWatts"], ) return power_supplies @@ -98,26 +105,31 @@ def read_power_consumption( # But for multi-server chassis, ... if "HPE Apollo2000 Gen10+" in oem_chassis["Name"]: - if str(PowerContext.BMC) not in power_consumption: - power_consumption[str(PowerContext.BMC)] = { - str(PowerCat.SERVER): Power(str(PowerCat.SERVER)), - str(PowerCat.CHASSIS): Power(str(PowerCat.CHASSIS)), - str(PowerCat.SERVERINCHASSIS): Power(str(PowerCat.SERVERINCHASSIS)), - } # type: ignore[no-redef] - # On Apollo2000, the generic PowerConsumedWatts is fact SERVERINCHASSIS - power_consumption[str(PowerContext.BMC)][str(PowerCat.SERVERINCHASSIS)].add( - self.get_power().get("PowerControl")[0]["PowerConsumedWatts"] + super().add_monitoring_value( + cast(dict[str, dict[str, MonitorMetric]], power_consumption), + PowerContext.BMC, + Power(str(PowerCat.SERVERINCHASSIS)), + str(PowerCat.SERVERINCHASSIS), + self.get_power().get("PowerControl")[0]["PowerConsumedWatts"], ) # And extract SERVER from NodePowerWatts - power_consumption[str(PowerContext.BMC)][str(PowerCat.SERVER)].add( - oem_chassis["Oem"]["Hpe"]["NodePowerWatts"] + super().add_monitoring_value( + cast(dict[str, dict[str, MonitorMetric]], power_consumption), + PowerContext.BMC, + Power(str(PowerCat.SERVER)), + str(PowerCat.SERVER), + oem_chassis["Oem"]["Hpe"]["NodePowerWatts"], ) # And CHASSIS from ChassisPowerWatts - power_consumption[str(PowerContext.BMC)][str(PowerCat.CHASSIS)].add( - oem_chassis["Oem"]["Hpe"]["ChassisPowerWatts"] + super().add_monitoring_value( + cast(dict[str, dict[str, MonitorMetric]], power_consumption), + PowerContext.BMC, + Power(str(PowerCat.CHASSIS)), + str(PowerCat.CHASSIS), + oem_chassis["Oem"]["Hpe"]["ChassisPowerWatts"], ) return power_consumption diff --git a/hwbench/environment/vendors/mock.py b/hwbench/environment/vendors/mock.py index 5ba01f5..ea6e8e8 100644 --- a/hwbench/environment/vendors/mock.py +++ b/hwbench/environment/vendors/mock.py @@ -1,3 +1,4 @@ +from typing import cast from ...bench.monitoring_structs import ( FanContext, MonitorMetric, @@ -19,12 +20,14 @@ def read_thermals( ) -> dict[str, dict[str, Temperature]]: # Let's add a faked thermal metric name = "CPU1" - if str(ThermalContext.CPU) not in thermals: - thermals[str(ThermalContext.CPU)] = {} - if name not in thermals[str(ThermalContext.CPU)]: - thermals[str(ThermalContext.CPU)][name] = Temperature(name) - thermals[str(ThermalContext.CPU)][name].add(40) + super().add_monitoring_value( + cast(dict[str, dict[str, MonitorMetric]], thermals), + ThermalContext.CPU, + Temperature(name), + name, + 40, + ) return thermals def read_fans( @@ -32,12 +35,13 @@ def read_fans( ) -> dict[str, dict[str, MonitorMetric]]: # Let's add a faked fans metric name = "Fan1" - if str(FanContext.FAN) not in fans: - fans[str(FanContext.FAN)] = {} - if name not in fans[str(FanContext.FAN)]: - fans[str(FanContext.FAN)][name] = MonitorMetric(name, "RPM") - - fans[str(FanContext.FAN)][name].add(40) + super().add_monitoring_value( + cast(dict[str, dict[str, MonitorMetric]], fans), + FanContext.FAN, + MonitorMetric(name, "RPM"), + name, + 40, + ) return fans def read_power_consumption( @@ -45,15 +49,12 @@ def read_power_consumption( ) -> dict[str, dict[str, Power]]: # Let's add a faked power metric name = str(PowerCategories.CHASSIS) - if str(PowerContext.BMC) not in power_consumption: - power_consumption[str(PowerContext.BMC)] = {} - if name not in power_consumption[str(PowerContext.BMC)]: - power_consumption[str(PowerContext.BMC)][name] = Power( - str(PowerCategories.CHASSIS) - ) - - power_consumption[str(PowerContext.BMC)][str(PowerCategories.CHASSIS)].add( - 125.0 + super().add_monitoring_value( + cast(dict[str, dict[str, MonitorMetric]], power_consumption), + PowerContext.BMC, + Power(name), + name, + 125.0, ) return power_consumption @@ -63,11 +64,13 @@ def read_power_supplies( # Let's add a faked power supplies status = "PS1 status" name = "PS1" - if str(PowerContext.BMC) not in power_supplies: - power_supplies[str(PowerContext.BMC)] = {} - if status not in power_supplies[str(PowerContext.BMC)]: - power_supplies[str(PowerContext.BMC)][status] = Power(name) - power_supplies[str(PowerContext.BMC)][status].add(125) + super().add_monitoring_value( + cast(dict[str, dict[str, MonitorMetric]], power_supplies), + PowerContext.BMC, + Power(name), + status, + 125, + ) return power_supplies def connect_redfish(self): diff --git a/hwbench/environment/vendors/vendor.py b/hwbench/environment/vendors/vendor.py index 087773b..50f2c87 100644 --- a/hwbench/environment/vendors/vendor.py +++ b/hwbench/environment/vendors/vendor.py @@ -6,6 +6,7 @@ import pathlib import redfish # type: ignore from abc import ABC, abstractmethod +from typing import Any from ...utils import helpers as h from ...utils.external import External from ...bench.monitoring_structs import ( @@ -31,6 +32,22 @@ def __del__(self): if self.logged: self.redfish_obj.logout() + def add_monitoring_value( + self, + monitoring_struct: dict[str, dict[str, MonitorMetric]], + context: Any, + metric: MonitorMetric, + name: str, + value: float, + ) -> dict[str, dict[str, MonitorMetric]]: + """This function add a new in the monitoring data structure.""" + if str(context) not in monitoring_struct: + monitoring_struct[str(context)] = {} + if name not in monitoring_struct[str(context)]: + monitoring_struct[str(context)][name] = metric + monitoring_struct[str(context)][name].add(value) + return monitoring_struct + def run_cmd(self) -> list[str]: return ["ipmitool", "lan", "print"] From 041c942218c5391f38ede5bb8317cc6de48576ea Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Wed, 5 Jun 2024 10:29:08 +0200 Subject: [PATCH 4/4] hwbench/configs: Adding mini.conf job This job is very useful to make a quick test on a host. It can be used to validate a fresh install or even during development phases. Signed-off-by: Erwan Velu --- configs/mini.conf | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 configs/mini.conf diff --git a/configs/mini.conf b/configs/mini.conf new file mode 100644 index 0000000..8cda4f9 --- /dev/null +++ b/configs/mini.conf @@ -0,0 +1,14 @@ +# This configuration will : +# - load all cores with a matrixprod test during 15 sec. +[global] +runtime=15 +monitor=all + +[full_cpu_load] +engine=stressng +engine_module=cpu +engine_module_parameter=matrixprod +hosting_cpu_cores=all +hosting_cpu_cores_scaling=none +stressor_range=auto +