Skip to content

Commit

Permalink
task:4171696 -Log analyzer - Adding ruff to the ci (#282)
Browse files Browse the repository at this point in the history
* for-now

* ruff-ci

* Making ruff and pylint parllel
  • Loading branch information
boazhaim authored Nov 20, 2024
1 parent 6da3ca8 commit 22cf5b8
Show file tree
Hide file tree
Showing 23 changed files with 602 additions and 370 deletions.
23 changes: 21 additions & 2 deletions .github/workflows/ufm_log_analyzer_ci_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,28 @@ jobs:
cd $SCRIPT_DIR
# Install dependencies
pip install -r src/loganalyze/requirements.txt
pip install pylint==3.2.6
# Run PyLint
pylint --rcfile=src/loganalyze/.pylintrc src/loganalyze
ruff:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@main

- name: Set up Python
uses: actions/setup-python@main
with:
python-version: 3.9

- name: Install and run Ruff
run: |
SCRIPT_DIR="plugins/ufm_log_analyzer_plugin"
cd $SCRIPT_DIR
pip install ruff
ruff format --diff --check src/loganalyze
232 changes: 138 additions & 94 deletions plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzer.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,17 @@

from loganalyze.log_analyzers.constants import DataConstants
import loganalyze.logger as log

# This makes sure the user does not see the warning from plotting
logging.getLogger('matplotlib').setLevel(logging.ERROR)
matplotlib.use('Agg') # This allows to run the tool on servers without graphic card/headless
logging.getLogger("matplotlib").setLevel(logging.ERROR)
matplotlib.use(
"Agg"
) # This allows to run the tool on servers without graphic card/headless

pd.set_option("display.max_colwidth", None)
warnings.filterwarnings("ignore")


class BaseImageCreator:
# Setting the graph time interval to 1 hour
# This is out side of the constructor since
Expand Down Expand Up @@ -89,11 +93,12 @@ def _save_data_based_on_timestamp(
bbox={"facecolor": "white", "alpha": 0.5},
)


generic_file_name = f"{title}".replace(" ", "_").replace("/", "_")
images_created = []
for img_type in self._images_type:
cur_img = os.path.join(self._dest_image_path,f"{generic_file_name}.{img_type}")
cur_img = os.path.join(
self._dest_image_path, f"{generic_file_name}.{img_type}"
)
log.LOGGER.debug(f"Saving {cur_img}")
plt.savefig(cur_img, format=img_type)
images_created.append(cur_img)
Expand All @@ -119,7 +124,9 @@ def _save_pivot_data_in_bars(
generic_file_name = f"{title}".replace(" ", "_").replace("/", "_")
images_created = []
for img_type in self._images_type:
cur_img = os.path.join(self._dest_image_path,f"{generic_file_name}.{img_type}")
cur_img = os.path.join(
self._dest_image_path, f"{generic_file_name}.{img_type}"
)
log.LOGGER.debug(f"Saving {cur_img}")
plt.savefig(cur_img, format=img_type)
images_created.append(cur_img)
Expand All @@ -137,14 +144,16 @@ def full_analysis(self):
# In case a function is raising an exception.
try:
func()
except: # pylint: disable=bare-except
except: # pylint: disable=bare-except
function_name = func.__name__
try:
class_name = ""
if "." in func.__qualname__:
class_name = func.__qualname__.split('.')[0]
log.LOGGER.debug(f"Error when calling {function_name} {class_name}, skipping")
except: # pylint: disable=bare-except
class_name = func.__qualname__.split(".")[0]
log.LOGGER.debug(
f"Error when calling {function_name} {class_name}, skipping"
)
except: # pylint: disable=bare-except
pass

return self._images_created if len(self._images_created) > 0 else []
Expand All @@ -156,14 +165,12 @@ class BaseAnalyzer(BaseImageCreator):
ability to print/save images and filter data
"""


def __init__(
self,
logs_csvs: List[str],
hours: int,
dest_image_path: str,
sort_timestamp=True

sort_timestamp=True,
):
super().__init__(dest_image_path)
dataframes = [pd.read_csv(ufm_log) for ufm_log in logs_csvs]
Expand All @@ -175,8 +182,9 @@ def __init__(
# Filter logs to include only those within the last 'hours' from the max timestamp
filtered_logs = df[df[DataConstants.TIMESTAMP] >= start_time]
data_sorted = filtered_logs.sort_values(by=DataConstants.TIMESTAMP)
data_sorted[DataConstants.AGGREGATIONTIME] = \
data_sorted[DataConstants.TIMESTAMP].dt.floor(self.time_interval)
data_sorted[DataConstants.AGGREGATIONTIME] = data_sorted[
DataConstants.TIMESTAMP
].dt.floor(self.time_interval)
self._log_data_sorted = data_sorted
else:
self._log_data_sorted = df
Expand All @@ -186,7 +194,9 @@ def __init__(
def _remove_empty_lines_from_csv(input_file):
temp_file = input_file + ".temp"

with open(input_file, "r", newline="", encoding=DataConstants.UTF8ENCODING) as infile, open(
with open(
input_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile, open(
temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile:
reader = csv.reader(infile)
Expand All @@ -212,10 +222,11 @@ def fix_lines_with_no_timestamp(csvs):
temp_file = csv_file + ".temp"
BaseAnalyzer._remove_empty_lines_from_csv(csv_file)
fixed_lines = 0
with open(csv_file, "r", newline="", encoding=DataConstants.UTF8ENCODING) \
as infile, open(
temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile:
with open(
csv_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile, open(
temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile:
reader = csv.reader(infile)
writer = csv.writer(outfile)
current_line = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,11 @@ def _extract_ufm_version(logs_csvs):
temp_file = csv_file + ".temp"

# Open the input CSV file for reading
with open(csv_file,
mode='r',
newline='',
encoding=DataConstants.UTF8ENCODING) as infile, \
open(temp_file,
mode='w',
newline='',
encoding=DataConstants.UTF8ENCODING) as outfile:
with open(
csv_file, mode="r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile, open(
temp_file, mode="w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile:
reader = csv.DictReader(infile)
fieldnames = reader.fieldnames # Get the header from the CSV
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
Expand All @@ -60,10 +57,10 @@ def _extract_ufm_version(logs_csvs):

# Iterate through each row in the input CSV
for row in reader:
if row['type'] == 'ufm_version':
if row["type"] == "ufm_version":
# If the type is 'ufm_version',
# save the row and don't write it to the new file
ufm_versions.add(row['data'])
ufm_versions.add(row["data"])
else:
# Write the row to the new CSV file
writer.writerow(row)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# provided with the software product.
#


class DataConstants:
AGGREGATIONTIME = "aggregated_by_time"
TIMESTAMP = "timestamp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ class EventsLogAnalyzer(BaseAnalyzer):
def __init__(self, logs_csvs: List[str], hours: int, dest_image_path):
super().__init__(logs_csvs, hours, dest_image_path)
self._supported_log_levels = ["CRITICAL", "WARNING", "INFO", "MINOR"]
self._funcs_for_analysis = {self.plot_critical_events_per_aggregation_time,
self.plot_link_up_down_count_per_aggregation_time,
self.plot_top_n_critical_events_over_time}
self._funcs_for_analysis = {
self.plot_critical_events_per_aggregation_time,
self.plot_link_up_down_count_per_aggregation_time,
self.plot_top_n_critical_events_over_time,
}

# Function to split "object_id" into "device" and "description"
def _split_switch_object_id(self, row):
Expand All @@ -48,16 +50,20 @@ def get_events_by_log_level(self, log_level="CRITICAL"):

def plot_top_n_critical_events_over_time(self, n=10):
critical_events = self.get_events_by_log_level("CRITICAL")
total_critical_events = critical_events.groupby("event").size().reset_index(name="count")
total_critical_events = (
critical_events.groupby("event").size().reset_index(name="count")
)

# Get the top n events with the highest count overall
top_n_events = total_critical_events.nlargest(n, 'count')
top_n_events = total_critical_events.nlargest(n, "count")

# Group the top 5 events by time interval
critical_events_grouped_by_time = \
critical_events[critical_events["event"].isin(top_n_events["event"])]\
.groupby([DataConstants.AGGREGATIONTIME, "event"])\
.size().reset_index(name="count")
critical_events_grouped_by_time = (
critical_events[critical_events["event"].isin(top_n_events["event"])]
.groupby([DataConstants.AGGREGATIONTIME, "event"])
.size()
.reset_index(name="count")
)

pivot_top_n_events_by_hour = critical_events_grouped_by_time.pivot(
index=DataConstants.AGGREGATIONTIME, columns="event", values="count"
Expand All @@ -78,38 +84,42 @@ def get_critical_event_bursts(self, n=5):
critical_events = self.get_events_by_log_level("CRITICAL")

# Round timestamps to the nearest minute
critical_events['minute'] = critical_events['timestamp'].dt.floor('T')
critical_events["minute"] = critical_events["timestamp"].dt.floor("T")

# Group by minute and event type, then count the number of events in each group
event_counts = (critical_events
.groupby(['minute', 'event', 'event_type'])
.size()
.reset_index(name='count'))
event_counts = (
critical_events.groupby(["minute", "event", "event_type"])
.size()
.reset_index(name="count")
)

# Filter for bursts where the count exceeds or equals 'n'
bursts = event_counts[event_counts['count'] >= n]
bursts = event_counts[event_counts["count"] >= n]

# Create a Series with 'minute' as index and 'count' as values
bursts_series = bursts.set_index('minute')['count']
bursts_series = bursts.set_index("minute")["count"]

# Save the plot using the series
self._save_data_based_on_timestamp(
bursts_series, # Pass the Series instead of separate lists
"Time",
"Number of Critical Events in the burst",
"Critical Event Bursts"
"Critical Event Bursts",
)

# Convert the result to a list of dictionaries for returning
burst_list = bursts.rename(columns={'minute': 'timestamp'}).to_dict(orient='records')
burst_list = bursts.rename(columns={"minute": "timestamp"}).to_dict(
orient="records"
)

return burst_list

def plot_critical_events_per_aggregation_time(self):
critical_events = self.get_events_by_log_level("CRITICAL")
critical_events_grouped_by_time = (
critical_events.groupby([DataConstants.AGGREGATIONTIME, "event"])\
.size().reset_index(name="count")
critical_events.groupby([DataConstants.AGGREGATIONTIME, "event"])
.size()
.reset_index(name="count")
)

pivot_critical_events_by_hour = critical_events_grouped_by_time.pivot(
Expand All @@ -124,15 +134,14 @@ def plot_critical_events_per_aggregation_time(self):
"Events",
)



def plot_link_up_down_count_per_aggregation_time(self):
links_events = self._log_data_sorted[
(self._log_data_sorted["event"] == "Link is up")
|
(self._log_data_sorted["event"] == "Link went down")
| (self._log_data_sorted["event"] == "Link went down")
]
grouped_links_events = links_events.groupby([DataConstants.AGGREGATIONTIME, "event"])
grouped_links_events = links_events.groupby(
[DataConstants.AGGREGATIONTIME, "event"]
)
counted_links_events_by_time = grouped_links_events.size().reset_index(
name="count"
)
Expand All @@ -141,9 +150,5 @@ def plot_link_up_down_count_per_aggregation_time(self):
index=DataConstants.AGGREGATIONTIME, columns="event", values="count"
).fillna(0)
self._save_pivot_data_in_bars(
pivot_links_data,
"Time",
"Number of Events",
"Link up/down events",
"Event"
pivot_links_data, "Time", "Number of Events", "Link up/down events", "Event"
)
Loading

0 comments on commit 22cf5b8

Please sign in to comment.