Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

issue: 4176956: move the text handling to the analyzers classes #284

Merged
merged 26 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/ufm_log_analyzer_ci_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ jobs:

steps:
- name: Checkout code
uses: actions/checkout@main
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@main
uses: actions/setup-python@v4
with:
python-version: 3.9

Expand All @@ -38,10 +38,10 @@ jobs:

steps:
- name: Checkout code
uses: actions/checkout@main
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@main
uses: actions/setup-python@v4
with:
python-version: 3.9

Expand All @@ -50,6 +50,6 @@ jobs:
SCRIPT_DIR="plugins/ufm_log_analyzer_plugin"
cd $SCRIPT_DIR

pip install ruff
pip install ruff==0.7.3

ruff format --diff --check src/loganalyze
68 changes: 5 additions & 63 deletions plugins/ufm_log_analyzer_plugin/src/loganalyze/log_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,10 @@ def create_analyzer(
end = time.perf_counter()
log.LOGGER.debug(f"Took {end-start:.3f} to load the parsed data")

all_images_outputs_and_title = ufm_top_analyzer.full_analysis()
all_images_outputs_and_title, dataframes_for_pdf, txt_for_pdf = (
ufm_top_analyzer.full_analysis_all_analyzers()
)

png_images = []
images_and_title_to_present = []
for image_title in all_images_outputs_and_title:
Expand All @@ -388,69 +391,8 @@ def create_analyzer(
)

pdf = PDFCreator(pdf_path, pdf_header, png_images, text_to_show_in_pdf)
dataframes_for_pdf = []
fabric_info = (
ibdiagnet_analyzer.get_fabric_size()
if ibdiagnet_analyzer
else "No Fabric Info found"
)
dataframes_for_pdf.append(("Fabric info", fabric_info))
if links_flapping_analyzer:
dataframes_for_pdf.append(
(
"Link Flapping past week",
links_flapping_analyzer.get_link_flapping_last_week(),
)
)
lists_to_add = []
critical_events_headers = ["timestamp", "event_type", "event", "count"]
lists_to_add.append(
(
event_log_analyzer.get_critical_event_bursts(),
"More than 5 events burst over a minute",
critical_events_headers,
)
)

existing_telemetry_analyzers = []
for telemetry_analyzer in [
ibdianget_2_ports_primary_analyzer,
ibdianget_2_ports_secondary_analyzer,
]:
if telemetry_analyzer:
existing_telemetry_analyzers.append(telemetry_analyzer)

for cur_telemetry in existing_telemetry_analyzers:
dataframes_for_pdf.append(
(
f"{cur_telemetry.telemetry_type} Telemetry iteration time",
cur_telemetry.get_last_iterations_time_stats(),
)
)
dataframes_for_pdf.append(
(
f"{cur_telemetry.telemetry_type} "
"Telemetry iteration first and last timestamps",
cur_telemetry.get_first_last_iteration_timestamp(),
)
)
dataframes_for_pdf.append(
(
f"{cur_telemetry.telemetry_type} Telemetry fabric size",
cur_telemetry.get_number_of_switches_and_ports(),
)
)
lists_to_add.append(
(
[cur_telemetry.get_number_of_core_dumps()],
f"{cur_telemetry.telemetry_type} "
"number of core dumps found in the logs",
["Amount"],
)
)
pdf.create_pdf(dataframes_for_pdf, txt_for_pdf)

# PDF creator gets all the images and to add to the report
pdf.create_pdf(dataframes_for_pdf, lists_to_add)
# Generated a report that can be located in the destination
log.LOGGER.info("Analysis is done, please see the following outputs:")
for image, title in images_and_title_to_present:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,15 @@ class BaseImageCreator:
def __init__(self, dest_image_path):
self._dest_image_path = dest_image_path
self._images_created = []
self._dataframes_for_pdf = []
self._txt_for_pdf = []
self._funcs_for_analysis = set()

def _save_data_based_on_timestamp(
self, data_to_plot, x_label, y_label, title, large_sample=False
):
if data_to_plot.empty:
return
with plt.ion():
log.LOGGER.debug(f"saving {title}")
plt.figure(figsize=(12, 6))
Expand Down Expand Up @@ -156,7 +160,14 @@ def full_analysis(self):
except: # pylint: disable=bare-except
pass

return self._images_created if len(self._images_created) > 0 else []
def get_images_created(self):
return self._images_created

def get_dataframes_for_pdf(self):
return self._dataframes_for_pdf

def get_txt_for_pdf(self):
return self._txt_for_pdf


class BaseAnalyzer(BaseImageCreator):
Expand Down Expand Up @@ -194,11 +205,14 @@ def __init__(
def _remove_empty_lines_from_csv(input_file):
temp_file = input_file + ".temp"

with open(
input_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile, open(
temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile:
with (
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please revert this display

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ruff requires this display

open(
input_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile,
open(
temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile,
):
Miryam-Schwartz marked this conversation as resolved.
Show resolved Hide resolved
reader = csv.reader(infile)
writer = csv.writer(outfile)

Expand All @@ -222,11 +236,14 @@ def fix_lines_with_no_timestamp(csvs):
temp_file = csv_file + ".temp"
BaseAnalyzer._remove_empty_lines_from_csv(csv_file)
fixed_lines = 0
with open(
csv_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile, open(
temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile:
with (
open(
csv_file, "r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile,
open(
temp_file, "w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile,
):
reader = csv.reader(infile)
writer = csv.writer(outfile)
current_line = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,14 @@ def _extract_ufm_version(logs_csvs):
temp_file = csv_file + ".temp"

# Open the input CSV file for reading
with open(
csv_file, mode="r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile, open(
temp_file, mode="w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile:
with (
open(
csv_file, mode="r", newline="", encoding=DataConstants.UTF8ENCODING
) as infile,
open(
temp_file, mode="w", newline="", encoding=DataConstants.UTF8ENCODING
) as outfile,
):
reader = csv.DictReader(infile)
fieldnames = reader.fieldnames # Get the header from the CSV
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
Expand Down Expand Up @@ -104,8 +107,5 @@ def print_exceptions_per_time_count(self):
)

def full_analysis(self):
"""
Returns a list of all the graphs created and their title
"""
super().full_analysis()
Miryam-Schwartz marked this conversation as resolved.
Show resolved Hide resolved
self.print_exceptions()
return super().full_analysis()
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(self, logs_csvs: List[str], hours: int, dest_image_path):
self.plot_critical_events_per_aggregation_time,
self.plot_link_up_down_count_per_aggregation_time,
self.plot_top_n_critical_events_over_time,
self.get_critical_event_bursts,
}

# Function to split "object_id" into "device" and "description"
Expand Down Expand Up @@ -107,12 +108,12 @@ def get_critical_event_bursts(self, n=5):
"Critical Event Bursts",
)

# Convert the result to a list of dictionaries for returning
burst_list = bursts.rename(columns={"minute": "timestamp"}).to_dict(
orient="records"
# Add bursts to dataframes_for_pdf
df_to_add = (
"More than 5 events burst over a minute",
bursts,
)

return burst_list
self._dataframes_for_pdf.append(df_to_add)

def plot_critical_events_per_aggregation_time(self):
critical_events = self.get_events_by_log_level("CRITICAL")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,13 @@ def __init__(
self._log_data_sorted[col] = pd.to_numeric(
self._log_data_sorted[col], errors="coerce"
).astype("Int64")
self._funcs_for_analysis = {self.plot_iteration_time_over_time}
self._funcs_for_analysis = {
self.plot_iteration_time_over_time,
self.save_last_iterations_time_stats,
self.save_first_last_iteration_timestamp,
self.save_number_of_switches_and_ports,
self.save_number_of_core_dumps,
}
# Based on the log path, decided if this is primary or secondary
if "ufm_logs" in logs_csvs[0]:
self.telemetry_type = "primary"
Expand All @@ -52,7 +58,7 @@ def get_collectx_versions(self):
]["data"].unique()
return unique_collectx_versions

def get_number_of_switches_and_ports(self):
def save_number_of_switches_and_ports(self):
"""
Generate summary statistics for 'total_devices_ports' data.
This function calculates the average, maximum, minimum
Expand Down Expand Up @@ -100,7 +106,12 @@ def get_number_of_switches_and_ports(self):

summary_df = pd.DataFrame(summary_stats)

return summary_df
self._dataframes_for_pdf.append(
(
f"{self.telemetry_type} telemetry fabric size",
summary_df,
)
)

def analyze_iteration_time(self, threshold=0.15):
"""
Expand Down Expand Up @@ -160,17 +171,29 @@ def analyze_iteration_time(self, threshold=0.15):
self._last_timestamp_of_logs = last_timestamp
return stats_df

def get_first_last_iteration_timestamp(self):
def save_first_last_iteration_timestamp(self):
if not self._first_timestamp_of_logs or not self._last_timestamp_of_logs:
self.analyze_iteration_time()
times = {
"first": str(self._first_timestamp_of_logs),
"last": str(self._last_timestamp_of_logs),
}
return pd.DataFrame([times])
first_last_it = pd.DataFrame([times])
self._dataframes_for_pdf.append(
(
f"{self.telemetry_type} "
"telemetry iteration first and last timestamps",
first_last_it,
)
)

def get_last_iterations_time_stats(self):
return self._iteration_time_stats
def save_last_iterations_time_stats(self):
self._dataframes_for_pdf.append(
(
f"{self.telemetry_type} telemetry iteration time",
self._iteration_time_stats(),
)
)

def plot_iteration_time_over_time(self):
if self._iteration_time_data is None:
Expand All @@ -188,8 +211,15 @@ def plot_iteration_time_over_time(self):
large_sample=True,
)

def get_number_of_core_dumps(self):
def save_number_of_core_dumps(self):
core_dumps = self._log_data_sorted[
self._log_data_sorted["type"] == "timeout_dump_core"
]
return {"Amount": len(core_dumps)}
num = {"Amount": len(core_dumps)}
self._txt_for_pdf.append(
(
[num],
f"{self.telemetry_type} number of core dumps found in the logs",
["Amount"],
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
class IBDIAGNETLogAnalyzer(BaseAnalyzer):
def __init__(self, logs_csvs: List[str], hours: int, dest_image_path):
super().__init__(logs_csvs, hours, dest_image_path, sort_timestamp=False)
self._funcs_for_analysis = {self.save_fabric_size}

def print_fabric_size(self):
fabric_info = self.get_fabric_size()
Expand All @@ -28,9 +29,10 @@ def print_fabric_size(self):
def get_fabric_size(self):
return self._log_data_sorted

def save_fabric_size(self):
fabric_info = self.get_fabric_size()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same thing. Use the infrastructure in the base class to run this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

self._dataframes_for_pdf.append(("Fabric info", fabric_info))

def full_analysis(self):
"""
Returns a list of all the graphs created and their title
"""
super().full_analysis()
self.print_fabric_size()
return []
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,5 @@ def plot_link_flapping_last_week(self):
pivot_table, "Time", "Count", "Link Flapping Count", None
)

def full_analysis(self):
self.get_link_flapping_last_week()
return super().full_analysis()
# Save link_flapping in dataframes_for_pdf
self._dataframes_for_pdf.extend([("Link Flapping last week", link_flapping)])
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,19 @@ def __init__(self):
def add_analyzer(self, analyzer):
self._analyzers.append(analyzer)

def full_analysis(self):
def full_analysis_all_analyzers(self):
"""
Returns a list of all the graphs created and their title
"""
for analyzer in self._analyzers:
analyzer.full_analysis()

graphs_and_titles = []
dataframes = []
txt = []
for analyzer in self._analyzers:
tmp_images_list = analyzer.full_analysis()
if len(tmp_images_list) > 0:
graphs_and_titles.extend(tmp_images_list)
return graphs_and_titles
graphs_and_titles.extend(analyzer.get_images_created())
dataframes.extend(analyzer.get_dataframes_for_pdf())
txt.extend(analyzer.get_txt_for_pdf())

return graphs_and_titles, dataframes, txt
Loading
Loading