Skip to content

Commit

Permalink
graph: Adding CPU clock scaling graph
Browse files Browse the repository at this point in the history
As per issue #39, it could be super useful to have a new type of graph
to represent how the CPU clock performed during a benchmark.

This new graph gives a quick overview on how the CPU frequency behave
during a scaling benchmark.

The rendering is not as precise as environment graphs, but it gives a
brief overview with the following trade-offs for the y-err bars:
- min of the yerr-bar, is the min of min values
- mean of the yerr-bar, is the mean of mean values
- max of the yerr-bar, is the max of the max values

Signed-off-by: Erwan Velu <e.velu@criteo.com>
  • Loading branch information
ErwanAliasr1 committed Oct 28, 2024
1 parent f8c8e15 commit eef217a
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 1 deletion.
2 changes: 1 addition & 1 deletion graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def init_matplotlib(args):
fatal(f"Cannot load matplotlib backend engine {args.engine}")


GRAPH_TYPES = ["perf", "perf_watt", "watts"]
GRAPH_TYPES = ["perf", "perf_watt", "watts", "cpu_clock"]


class Graph:
Expand Down
25 changes: 25 additions & 0 deletions graph/scaling.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ def scaling_graph(args, output_dir, job: str, traces_name: list) -> int:
aggregated_perfs_watt = {} # type: dict[str, dict[str, Any]]
aggregated_watt = {} # type: dict[str, dict[str, Any]]
aggregated_watt_err = {} # type: dict[str, dict[str, Any]]
aggregated_cpu_clock = {} # type: dict[str, dict[str, Any]]
aggregated_cpu_clock_err = {} # type: dict[str, dict[str, Any]]
workers = {} # type: dict[str, list]
logical_core_per_worker = []
perf_list, unit = benches[emp]["metrics"]
Expand All @@ -41,6 +43,8 @@ def scaling_graph(args, output_dir, job: str, traces_name: list) -> int:
aggregated_perfs_watt[perf] = {}
aggregated_watt[perf] = {}
aggregated_watt_err[perf] = {}
aggregated_cpu_clock[perf] = {}
aggregated_cpu_clock_err[perf] = {}
# For every trace file given at the command line
for trace in args.traces:
workers[trace.get_name()] = []
Expand All @@ -63,13 +67,17 @@ def scaling_graph(args, output_dir, job: str, traces_name: list) -> int:
aggregated_perfs_watt[perf][trace.get_name()] = []
aggregated_watt[perf][trace.get_name()] = []
aggregated_watt_err[perf][trace.get_name()] = []
aggregated_cpu_clock[perf][trace.get_name()] = []
aggregated_cpu_clock_err[perf][trace.get_name()] = []

bench.add_perf(
perf,
traces_perf=aggregated_perfs[perf][trace.get_name()],
perf_watt=aggregated_perfs_watt[perf][trace.get_name()],
watt=aggregated_watt[perf][trace.get_name()],
watt_err=aggregated_watt_err[perf][trace.get_name()],
cpu_clock=aggregated_cpu_clock[perf][trace.get_name()],
cpu_clock_err=aggregated_cpu_clock_err[perf][trace.get_name()],
)

# Let's render all graphs types
Expand All @@ -94,6 +102,13 @@ def scaling_graph(args, output_dir, job: str, traces_name: list) -> int:
outfile = f"scaling_watt_{clean_perf}_{bench.get_title_engine_name().replace(' ','_')}"
y_label = "Watts"
y_source = aggregated_watt
elif "cpu_clock" in graph_type:
graph_type_title = (
f"Scaling {graph_type}: {args.traces[0].get_metric_name()}"
)
outfile = f"scaling_cpu_clock_{clean_perf}_{bench.get_title_engine_name().replace(' ','_')}"
y_label = "Mhz"
y_source = aggregated_cpu_clock
else:
graph_type_title = (
f"Scaling {graph_type}: {bench.get_title_engine_name()}"
Expand Down Expand Up @@ -164,6 +179,16 @@ def scaling_graph(args, output_dir, job: str, traces_name: list) -> int:
capsize=4,
label=trace_name,
)
elif y_source == aggregated_cpu_clock:
graph.get_ax().errorbar(
x_serie,
y_serie,
yerr=np.array(aggregated_cpu_clock_err[perf][trace_name]).T,
ecolor=e_color,
color=color_name,
capsize=4,
label=trace_name,
)
else:
graph.get_ax().plot(
x_serie,
Expand Down
37 changes: 37 additions & 0 deletions graph/trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ def add_perf(
perf_watt=None,
watt=None,
watt_err=None,
cpu_clock=None,
cpu_clock_err=None,
index=None,
) -> None:
"""Extract performance and power efficiency"""
Expand Down Expand Up @@ -316,6 +318,41 @@ def add_perf(
watt_err.append(metric)
else:
watt_err[index] = metric

if cpu_clock is not None:
mm = self.get_monitoring_metric(Metrics.FREQ)
mean_values = []
min_values = []
max_values = []

for freq_metric in mm:
if freq_metric != "CPU":
continue
# We have to compute metrics of all systems cores
for core in mm[freq_metric]:
# MIN of min ?
# Mean of mean ?
# Max of max ?
min_values.append(min(mm[freq_metric][core].get_min()))
mean_values.append(mean(mm[freq_metric][core].get_mean()))
max_values.append(max(mm[freq_metric][core].get_max()))
min_value = min(min_values)
mean_value = mean(mean_values)
max_value = max(max_values)

if index is None:
cpu_clock.append(mean_value)
else:
cpu_clock[index] = mean_value

# If we want to keep the error distribution to plot error bars
if cpu_clock_err is not None:
metric = (mean_value - min_value, max_value - mean_value)
if index is None:
cpu_clock_err.append(metric)
else:
cpu_clock_err[index] = metric

except ValueError:
fatal(f"No {perf} found in {self.get_bench_name()}")

Expand Down

0 comments on commit eef217a

Please sign in to comment.