task:4171696 -Log analyzer - Adding ruff to the ci (#282)

* for-now * ruff-ci * Making ruff and pylint parllel
Mellanox · Nov 20, 2024 · 22cf5b8 · 22cf5b8
1 parent 6da3ca8
commit 22cf5b8
Show file tree

Hide file tree

Showing 23 changed files with 602 additions and 370 deletions.
diff --git a/.github/workflows/ufm_log_analyzer_ci_workflow.yml b/.github/workflows/ufm_log_analyzer_ci_workflow.yml
@@ -28,9 +28,28 @@ jobs:
 
         cd $SCRIPT_DIR
 
-        # Install dependencies
         pip install -r src/loganalyze/requirements.txt
         pip install pylint==3.2.6
 
-        # Run PyLint
         pylint --rcfile=src/loganalyze/.pylintrc src/loganalyze
+
+  ruff:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@main
+
+    - name: Set up Python
+      uses: actions/setup-python@main
+      with:
+        python-version: 3.9
+
+    - name: Install and run Ruff
+      run: |
+        SCRIPT_DIR="plugins/ufm_log_analyzer_plugin"
+        cd $SCRIPT_DIR
+
+        pip install ruff
+
+        ruff format --diff --check src/loganalyze
diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzer.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzer.py
diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/base_analyzer.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/base_analyzer.py
@@ -27,13 +27,17 @@
 
 from loganalyze.log_analyzers.constants import DataConstants
 import loganalyze.logger as log
+
 # This makes sure the user does not see the warning from plotting
-logging.getLogger('matplotlib').setLevel(logging.ERROR)
-matplotlib.use('Agg') # This allows to run the tool on servers without graphic card/headless
+logging.getLogger("matplotlib").setLevel(logging.ERROR)
+matplotlib.use(
+    "Agg"
+)  # This allows to run the tool on servers without graphic card/headless
 
 pd.set_option("display.max_colwidth", None)
 warnings.filterwarnings("ignore")
 
+
 class BaseImageCreator:
     # Setting the graph time interval to 1 hour
     # This is out side of the constructor since
@@ -89,11 +93,12 @@ def _save_data_based_on_timestamp(
                 bbox={"facecolor": "white", "alpha": 0.5},
             )
 
-
             generic_file_name = f"{title}".replace(" ", "_").replace("/", "_")
             images_created = []
             for img_type in self._images_type:
-                cur_img = os.path.join(self._dest_image_path,f"{generic_file_name}.{img_type}")
+                cur_img = os.path.join(
+                    self._dest_image_path, f"{generic_file_name}.{img_type}"
+                )
                 log.LOGGER.debug(f"Saving {cur_img}")
                 plt.savefig(cur_img, format=img_type)
                 images_created.append(cur_img)
@@ -119,7 +124,9 @@ def _save_pivot_data_in_bars(
             generic_file_name = f"{title}".replace(" ", "_").replace("/", "_")
             images_created = []
             for img_type in self._images_type:
-                cur_img = os.path.join(self._dest_image_path,f"{generic_file_name}.{img_type}")
+                cur_img = os.path.join(
+                    self._dest_image_path, f"{generic_file_name}.{img_type}"
+                )
                 log.LOGGER.debug(f"Saving {cur_img}")
                 plt.savefig(cur_img, format=img_type)
                 images_created.append(cur_img)
@@ -137,14 +144,16 @@ def full_analysis(self):
             # In case a function is raising an exception.
             try:
                 func()
-            except: # pylint: disable=bare-except
+            except:  # pylint: disable=bare-except
                 function_name = func.__name__
                 try:
                     class_name = ""
                     if "." in func.__qualname__:
-                        class_name = func.__qualname__.split('.')[0]
-                    log.LOGGER.debug(f"Error when calling {function_name} {class_name}, skipping")
-                except: # pylint: disable=bare-except
+                        class_name = func.__qualname__.split(".")[0]
+                    log.LOGGER.debug(
+                        f"Error when calling {function_name} {class_name}, skipping"
+                    )
+                except:  # pylint: disable=bare-except
                     pass
 
         return self._images_created if len(self._images_created) > 0 else []
@@ -156,14 +165,12 @@ class BaseAnalyzer(BaseImageCreator):
     ability to print/save images and filter data
     """
 
-
     def __init__(
         self,
         logs_csvs: List[str],
         hours: int,
         dest_image_path: str,
-        sort_timestamp=True
-
+        sort_timestamp=True,
     ):
         super().__init__(dest_image_path)
         dataframes = [pd.read_csv(ufm_log) for ufm_log in logs_csvs]
@@ -175,8 +182,9 @@ def __init__(
             # Filter logs to include only those within the last 'hours' from the max timestamp
             filtered_logs = df[df[DataConstants.TIMESTAMP] >= start_time]
             data_sorted = filtered_logs.sort_values(by=DataConstants.TIMESTAMP)
-            data_sorted[DataConstants.AGGREGATIONTIME] = \
-                        data_sorted[DataConstants.TIMESTAMP].dt.floor(self.time_interval)
+            data_sorted[DataConstants.AGGREGATIONTIME] = data_sorted[
+                DataConstants.TIMESTAMP
+            ].dt.floor(self.time_interval)
             self._log_data_sorted = data_sorted
         else:
             self._log_data_sorted = df
@@ -186,7 +194,9 @@ def __init__(
     def _remove_empty_lines_from_csv(input_file):
         temp_file = input_file + ".temp"
 
-        with open(input_file, "r", newline="", encoding=DataConstants.UTF8ENCODING) as infile, open(
+        with open(
+            input_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
+        ) as infile, open(
             temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
         ) as outfile:
             reader = csv.reader(infile)
@@ -212,10 +222,11 @@ def fix_lines_with_no_timestamp(csvs):
             temp_file = csv_file + ".temp"
             BaseAnalyzer._remove_empty_lines_from_csv(csv_file)
             fixed_lines = 0
-            with open(csv_file, "r", newline="", encoding=DataConstants.UTF8ENCODING) \
-                as infile, open(
-                            temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
-                                ) as outfile:
+            with open(
+                csv_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
+            ) as infile, open(
+                temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
+            ) as outfile:
                 reader = csv.reader(infile)
                 writer = csv.writer(outfile)
                 current_line = None

diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/console_log_analyzer.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/console_log_analyzer.py
@@ -43,14 +43,11 @@ def _extract_ufm_version(logs_csvs):
             temp_file = csv_file + ".temp"
 
             # Open the input CSV file for reading
-            with open(csv_file,
-                      mode='r',
-                      newline='',
-                      encoding=DataConstants.UTF8ENCODING) as infile, \
-                open(temp_file,
-                     mode='w',
-                     newline='',
-                     encoding=DataConstants.UTF8ENCODING) as outfile:
+            with open(
+                csv_file, mode="r", newline="", encoding=DataConstants.UTF8ENCODING
+            ) as infile, open(
+                temp_file, mode="w", newline="", encoding=DataConstants.UTF8ENCODING
+            ) as outfile:
                 reader = csv.DictReader(infile)
                 fieldnames = reader.fieldnames  # Get the header from the CSV
                 writer = csv.DictWriter(outfile, fieldnames=fieldnames)
@@ -60,10 +57,10 @@ def _extract_ufm_version(logs_csvs):
 
                 # Iterate through each row in the input CSV
                 for row in reader:
-                    if row['type'] == 'ufm_version':
+                    if row["type"] == "ufm_version":
                         # If the type is 'ufm_version',
                         # save the row and don't write it to the new file
-                        ufm_versions.add(row['data'])
+                        ufm_versions.add(row["data"])
                     else:
                         # Write the row to the new CSV file
                         writer.writerow(row)

diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/constants.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/constants.py
@@ -10,6 +10,7 @@
 # provided with the software product.
 #
 
+
 class DataConstants:
     AGGREGATIONTIME = "aggregated_by_time"
     TIMESTAMP = "timestamp"

diff --git a/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/events_log_analyzer.py b/plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzers/events_log_analyzer.py
@@ -24,9 +24,11 @@ class EventsLogAnalyzer(BaseAnalyzer):
     def __init__(self, logs_csvs: List[str], hours: int, dest_image_path):
         super().__init__(logs_csvs, hours, dest_image_path)
         self._supported_log_levels = ["CRITICAL", "WARNING", "INFO", "MINOR"]
-        self._funcs_for_analysis = {self.plot_critical_events_per_aggregation_time,
-                                    self.plot_link_up_down_count_per_aggregation_time,
-                                    self.plot_top_n_critical_events_over_time}
+        self._funcs_for_analysis = {
+            self.plot_critical_events_per_aggregation_time,
+            self.plot_link_up_down_count_per_aggregation_time,
+            self.plot_top_n_critical_events_over_time,
+        }
 
     # Function to split "object_id" into "device" and "description"
     def _split_switch_object_id(self, row):
@@ -48,16 +50,20 @@ def get_events_by_log_level(self, log_level="CRITICAL"):
 
     def plot_top_n_critical_events_over_time(self, n=10):
         critical_events = self.get_events_by_log_level("CRITICAL")
-        total_critical_events = critical_events.groupby("event").size().reset_index(name="count")
+        total_critical_events = (
+            critical_events.groupby("event").size().reset_index(name="count")
+        )
 
         # Get the top n events with the highest count overall
-        top_n_events = total_critical_events.nlargest(n, 'count')
+        top_n_events = total_critical_events.nlargest(n, "count")
 
         # Group the top 5 events by time interval
-        critical_events_grouped_by_time = \
-            critical_events[critical_events["event"].isin(top_n_events["event"])]\
-            .groupby([DataConstants.AGGREGATIONTIME, "event"])\
-            .size().reset_index(name="count")
+        critical_events_grouped_by_time = (
+            critical_events[critical_events["event"].isin(top_n_events["event"])]
+            .groupby([DataConstants.AGGREGATIONTIME, "event"])
+            .size()
+            .reset_index(name="count")
+        )
 
         pivot_top_n_events_by_hour = critical_events_grouped_by_time.pivot(
             index=DataConstants.AGGREGATIONTIME, columns="event", values="count"
@@ -78,38 +84,42 @@ def get_critical_event_bursts(self, n=5):
         critical_events = self.get_events_by_log_level("CRITICAL")
 
         # Round timestamps to the nearest minute
-        critical_events['minute'] = critical_events['timestamp'].dt.floor('T')
+        critical_events["minute"] = critical_events["timestamp"].dt.floor("T")
 
         # Group by minute and event type, then count the number of events in each group
-        event_counts = (critical_events
-                        .groupby(['minute', 'event', 'event_type'])
-                        .size()
-                        .reset_index(name='count'))
+        event_counts = (
+            critical_events.groupby(["minute", "event", "event_type"])
+            .size()
+            .reset_index(name="count")
+        )
 
         # Filter for bursts where the count exceeds or equals 'n'
-        bursts = event_counts[event_counts['count'] >= n]
+        bursts = event_counts[event_counts["count"] >= n]
 
         # Create a Series with 'minute' as index and 'count' as values
-        bursts_series = bursts.set_index('minute')['count']
+        bursts_series = bursts.set_index("minute")["count"]
 
         # Save the plot using the series
         self._save_data_based_on_timestamp(
             bursts_series,  # Pass the Series instead of separate lists
             "Time",
             "Number of Critical Events in the burst",
-            "Critical Event Bursts"
+            "Critical Event Bursts",
         )
 
         # Convert the result to a list of dictionaries for returning
-        burst_list = bursts.rename(columns={'minute': 'timestamp'}).to_dict(orient='records')
+        burst_list = bursts.rename(columns={"minute": "timestamp"}).to_dict(
+            orient="records"
+        )
 
         return burst_list
 
     def plot_critical_events_per_aggregation_time(self):
         critical_events = self.get_events_by_log_level("CRITICAL")
         critical_events_grouped_by_time = (
-            critical_events.groupby([DataConstants.AGGREGATIONTIME, "event"])\
-                .size().reset_index(name="count")
+            critical_events.groupby([DataConstants.AGGREGATIONTIME, "event"])
+            .size()
+            .reset_index(name="count")
         )
 
         pivot_critical_events_by_hour = critical_events_grouped_by_time.pivot(
@@ -124,15 +134,14 @@ def plot_critical_events_per_aggregation_time(self):
             "Events",
         )
 
-
-
     def plot_link_up_down_count_per_aggregation_time(self):
         links_events = self._log_data_sorted[
             (self._log_data_sorted["event"] == "Link is up")
-            |
-            (self._log_data_sorted["event"] == "Link went down")
+            | (self._log_data_sorted["event"] == "Link went down")
         ]
-        grouped_links_events = links_events.groupby([DataConstants.AGGREGATIONTIME, "event"])
+        grouped_links_events = links_events.groupby(
+            [DataConstants.AGGREGATIONTIME, "event"]
+        )
         counted_links_events_by_time = grouped_links_events.size().reset_index(
             name="count"
         )
@@ -141,9 +150,5 @@ def plot_link_up_down_count_per_aggregation_time(self):
             index=DataConstants.AGGREGATIONTIME, columns="event", values="count"
         ).fillna(0)
         self._save_pivot_data_in_bars(
-            pivot_links_data,
-            "Time",
-            "Number of Events",
-            "Link up/down events",
-            "Event"
+            pivot_links_data, "Time", "Number of Events", "Link up/down events", "Event"
         )