diff --git a/plugins/pdr_deterministic_plugin/README.md b/plugins/pdr_deterministic_plugin/README.md index 3cbe60da0..6421f2caa 100644 --- a/plugins/pdr_deterministic_plugin/README.md +++ b/plugins/pdr_deterministic_plugin/README.md @@ -66,6 +66,9 @@ The following parameters are configurable via the plugin's configuration file. ( | DEISOLATE_CONSIDER_TIME | consideration time for port deisolation (in minutes) | 5 | | DO_DEISOLATION | if set to false, the plugin will not perform deisolation | True | | DYNAMIC_WAIT_TIME | Seconds to wait for the dynamic telemetry session to respond | 30 | +| MAX_PORT_RESET_NUM | max number of resets per port | 2 +| PORT_RESET_INTERVAL_SECONDS | time interval in seconds after last reset that zeroes reset counter (reenables resets) | 604800 # 1 week in seconds + ## Calculating BER Counters diff --git a/plugins/pdr_deterministic_plugin/build/config/pdr_deterministic.conf b/plugins/pdr_deterministic_plugin/build/config/pdr_deterministic.conf index a4535b2ce..b22fe6c97 100644 --- a/plugins/pdr_deterministic_plugin/build/config/pdr_deterministic.conf +++ b/plugins/pdr_deterministic_plugin/build/config/pdr_deterministic.conf @@ -28,6 +28,12 @@ AUTOMATIC_DEISOLATE=True # if set to false, the plugin will not perform deisolation DO_DEISOLATION=True +[Reset] +# max number of resets per port +MAX_PORT_RESET_NUM=2 +# time interval in seconds after last reset that zeroes reset counter (reenables resets), the default is 1 week in seconds +PORT_RESET_INTERVAL_SECONDS=604800 + [Metrics] # in Celsius TMAX=70 diff --git a/plugins/pdr_deterministic_plugin/ufm_sim_web_service/constants.py b/plugins/pdr_deterministic_plugin/ufm_sim_web_service/constants.py index 82a1053cb..6f6342a8f 100644 --- a/plugins/pdr_deterministic_plugin/ufm_sim_web_service/constants.py +++ b/plugins/pdr_deterministic_plugin/ufm_sim_web_service/constants.py @@ -28,8 +28,11 @@ class PDRConstants(): CONF_SAMPLING = "Sampling" CONF_ISOLATION = "Isolation" CONF_METRICS = "Metrics" + CONF_RESET = "Reset" INTERVAL = "INTERVAL" MAX_NUM_ISOLATE = "MAX_NUM_ISOLATE" + MAX_PORT_RESET_NUM = "MAX_PORT_RESET_NUM" + PORT_RESET_INTERVAL_SECONDS = "PORT_RESET_INTERVAL_SECONDS" TMAX = "TMAX" D_TMAX = "D_TMAX" MAX_PDR = "MAX_PDR" @@ -53,6 +56,7 @@ class PDRConstants(): GET_ISOLATED_PORTS = "/resources/isolated_ports" GET_PORTS_REST = "/resources/ports" GET_ACTIVE_PORTS_REST = "/resources/ports?active=true" + POST_ACTIONS_REST = "/actions" API_HEALTHY_PORTS = "healthy_ports" API_ISOLATED_PORTS = "isolated_ports" diff --git a/plugins/pdr_deterministic_plugin/ufm_sim_web_service/isolation_mgr.py b/plugins/pdr_deterministic_plugin/ufm_sim_web_service/isolation_mgr.py index 6a9e8cd55..689ffe088 100644 --- a/plugins/pdr_deterministic_plugin/ufm_sim_web_service/isolation_mgr.py +++ b/plugins/pdr_deterministic_plugin/ufm_sim_web_service/isolation_mgr.py @@ -9,6 +9,7 @@ # This software product is governed by the End User License Agreement # provided with the software product. # +from datetime import datetime, timedelta import traceback import time import http @@ -22,6 +23,18 @@ from ufm_communication_mgr import UFMCommunicator from data_store import DataStore +class PortReset: + """ + Represents the port reset info. + """ + def __init__(self, port_name): + """ + Initialize a new instance of the PortReset class. + """ + self.port_name = port_name + self.reset_time = datetime.min + self.reset_count = 0 + #pylint: disable=too-many-instance-attributes class IsolationMgr: ''' @@ -34,6 +47,8 @@ def __init__(self, ufm_client: UFMCommunicator, logger): self.isolated_ports = {} # {port_name: telemetry_data} self.ports_data = {} + # {port_name: PortReset} + self.ports_resets = {} self.ufm_latest_isolation_state = [] pdr_config = configparser.ConfigParser() @@ -46,6 +61,10 @@ def __init__(self, ufm_client: UFMCommunicator, logger): self.do_deisolate = pdr_config.getboolean(Constants.CONF_ISOLATION, Constants.DO_DEISOLATION) self.switch_hca_isolation = pdr_config.getboolean(Constants.CONF_ISOLATION, Constants.SWITCH_TO_HOST_ISOLATION) self.test_mode = pdr_config.getboolean(Constants.CONF_COMMON, Constants.TEST_MODE, fallback=False) + self.max_port_reset_num = pdr_config.getint(Constants.CONF_RESET, Constants.MAX_PORT_RESET_NUM, fallback=2) + self.port_reset_interval = timedelta(seconds=pdr_config.getint(Constants.CONF_RESET, + Constants.PORT_RESET_INTERVAL_SECONDS, + fallback=7*24*3600)) # default is 1 week in seconds self.test_iteration = 0 self.logger = logger @@ -83,10 +102,13 @@ def eval_isolation(self, port_name, cause): return if not self.dry_run: + # Isolate port ret = self.ufm_client.isolate_port(port_name) if not ret or ret.status_code != http.HTTPStatus.OK: self.logger.warning("Failed isolating port: %s with cause: %s... status_code= %s", port_name, cause, ret.status_code) return + # Reset port + self.reset_port(port_name, port_obj.port_guid) isolated_port = self.isolated_ports.get(port_name) if not isolated_port: self.isolated_ports[port_name] = IsolatedPort(port_name) @@ -97,6 +119,36 @@ def eval_isolation(self, port_name, cause): if not self.test_mode: self.ufm_client.send_event(log_message, event_id=Constants.EXTERNAL_EVENT_ALERT, external_event_name="Isolating Port") + def reset_port(self, port_name, port_guid): + """ + Reset port if reset limit is not not exceeded + """ + # Check if reset is allowed + reset_history = self.ports_resets.get(port_name) + if reset_history: + if datetime.now() - reset_history.reset_time > self.port_reset_interval: + # Passed too much time from last reset: clean reset history and allow resets + del self.ports_resets[port_name] + elif reset_history.reset_count >= self.max_port_reset_num: + # Exceeds reset limit + self.logger.info("Skipping reset of port: %s... reset limit exceeded", port_name) + return + + # Perform reset + ret = self.ufm_client.reset_port(port_name, port_guid) + if not ret or ret.status_code != http.HTTPStatus.OK: + self.logger.warning("Failed resetting port: %s... status_code= %s", port_name, ret.status_code) + return + + # Update port resets history + reset_history = self.ports_resets.get(port_name) + if not reset_history: + reset_history = PortReset(port_name) + self.ports_resets[port_name] = reset_history + + reset_history.reset_count += 1 + reset_history.reset_time = datetime.now() + self.logger.info("Resetting port: %s... reset_count= %s", port_name, reset_history.reset_count) def eval_deisolate(self, port_name): """ diff --git a/plugins/pdr_deterministic_plugin/ufm_sim_web_service/pdr_algorithm.py b/plugins/pdr_deterministic_plugin/ufm_sim_web_service/pdr_algorithm.py index 6ecff06db..f18e9e7f2 100644 --- a/plugins/pdr_deterministic_plugin/ufm_sim_web_service/pdr_algorithm.py +++ b/plugins/pdr_deterministic_plugin/ufm_sim_web_service/pdr_algorithm.py @@ -433,7 +433,7 @@ def analyze_telemetry_data(self, ports_data, ports_counters): issues[port_name] = ber_issue # If out of operating conditions we'll overwrite the cause if self.temp_check and self.is_out_of_operating_conf(port_name): - issues[port_name].cause = Constants.ISSUE_OONOC + issues[port_name] = Issue(port_obj.port_name, Constants.ISSUE_OONOC) return list(issues.values()) def check_deisolation_conditions(self, isolated_port): diff --git a/plugins/pdr_deterministic_plugin/ufm_sim_web_service/ufm_communication_mgr.py b/plugins/pdr_deterministic_plugin/ufm_sim_web_service/ufm_communication_mgr.py index f4e8fcaf0..27d8f581b 100644 --- a/plugins/pdr_deterministic_plugin/ufm_sim_web_service/ufm_communication_mgr.py +++ b/plugins/pdr_deterministic_plugin/ufm_sim_web_service/ufm_communication_mgr.py @@ -104,3 +104,18 @@ def get_ports_metadata(self): def get_port_metadata(self, port_name): return self.get_request(f"{Constants.GET_PORTS_REST}/ {port_name}") + + def reset_port(self, port_name, port_guid): + """ + Reset port + """ + # using isolation UFM REST API - POST /ufmRestV2/actions + data = { + "params": { "port_id": port_name }, + "action": "reset", + "object_ids": [ port_guid ], + "object_type": "System", + "description": "", + "identifier": "id" + } + return self.send_request(Constants.POST_ACTIONS_REST, data, method=Constants.POST_METHOD)