From eef217ae24802247ec3dd15bf4c146b71e2d98ef Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Fri, 25 Oct 2024 16:49:51 +0200 Subject: [PATCH] graph: Adding CPU clock scaling graph As per issue #39, it could be super useful to have a new type of graph to represent how the CPU clock performed during a benchmark. This new graph gives a quick overview on how the CPU frequency behave during a scaling benchmark. The rendering is not as precise as environment graphs, but it gives a brief overview with the following trade-offs for the y-err bars: - min of the yerr-bar, is the min of min values - mean of the yerr-bar, is the mean of mean values - max of the yerr-bar, is the max of the max values Signed-off-by: Erwan Velu --- graph/graph.py | 2 +- graph/scaling.py | 25 +++++++++++++++++++++++++ graph/trace.py | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/graph/graph.py b/graph/graph.py index a35598b..cebfaac 100644 --- a/graph/graph.py +++ b/graph/graph.py @@ -17,7 +17,7 @@ def init_matplotlib(args): fatal(f"Cannot load matplotlib backend engine {args.engine}") -GRAPH_TYPES = ["perf", "perf_watt", "watts"] +GRAPH_TYPES = ["perf", "perf_watt", "watts", "cpu_clock"] class Graph: diff --git a/graph/scaling.py b/graph/scaling.py index 78af7f9..bb448b8 100644 --- a/graph/scaling.py +++ b/graph/scaling.py @@ -25,6 +25,8 @@ def scaling_graph(args, output_dir, job: str, traces_name: list) -> int: aggregated_perfs_watt = {} # type: dict[str, dict[str, Any]] aggregated_watt = {} # type: dict[str, dict[str, Any]] aggregated_watt_err = {} # type: dict[str, dict[str, Any]] + aggregated_cpu_clock = {} # type: dict[str, dict[str, Any]] + aggregated_cpu_clock_err = {} # type: dict[str, dict[str, Any]] workers = {} # type: dict[str, list] logical_core_per_worker = [] perf_list, unit = benches[emp]["metrics"] @@ -41,6 +43,8 @@ def scaling_graph(args, output_dir, job: str, traces_name: list) -> int: aggregated_perfs_watt[perf] = {} aggregated_watt[perf] = {} aggregated_watt_err[perf] = {} + aggregated_cpu_clock[perf] = {} + aggregated_cpu_clock_err[perf] = {} # For every trace file given at the command line for trace in args.traces: workers[trace.get_name()] = [] @@ -63,6 +67,8 @@ def scaling_graph(args, output_dir, job: str, traces_name: list) -> int: aggregated_perfs_watt[perf][trace.get_name()] = [] aggregated_watt[perf][trace.get_name()] = [] aggregated_watt_err[perf][trace.get_name()] = [] + aggregated_cpu_clock[perf][trace.get_name()] = [] + aggregated_cpu_clock_err[perf][trace.get_name()] = [] bench.add_perf( perf, @@ -70,6 +76,8 @@ def scaling_graph(args, output_dir, job: str, traces_name: list) -> int: perf_watt=aggregated_perfs_watt[perf][trace.get_name()], watt=aggregated_watt[perf][trace.get_name()], watt_err=aggregated_watt_err[perf][trace.get_name()], + cpu_clock=aggregated_cpu_clock[perf][trace.get_name()], + cpu_clock_err=aggregated_cpu_clock_err[perf][trace.get_name()], ) # Let's render all graphs types @@ -94,6 +102,13 @@ def scaling_graph(args, output_dir, job: str, traces_name: list) -> int: outfile = f"scaling_watt_{clean_perf}_{bench.get_title_engine_name().replace(' ','_')}" y_label = "Watts" y_source = aggregated_watt + elif "cpu_clock" in graph_type: + graph_type_title = ( + f"Scaling {graph_type}: {args.traces[0].get_metric_name()}" + ) + outfile = f"scaling_cpu_clock_{clean_perf}_{bench.get_title_engine_name().replace(' ','_')}" + y_label = "Mhz" + y_source = aggregated_cpu_clock else: graph_type_title = ( f"Scaling {graph_type}: {bench.get_title_engine_name()}" @@ -164,6 +179,16 @@ def scaling_graph(args, output_dir, job: str, traces_name: list) -> int: capsize=4, label=trace_name, ) + elif y_source == aggregated_cpu_clock: + graph.get_ax().errorbar( + x_serie, + y_serie, + yerr=np.array(aggregated_cpu_clock_err[perf][trace_name]).T, + ecolor=e_color, + color=color_name, + capsize=4, + label=trace_name, + ) else: graph.get_ax().plot( x_serie, diff --git a/graph/trace.py b/graph/trace.py index beed903..36fb7ae 100644 --- a/graph/trace.py +++ b/graph/trace.py @@ -232,6 +232,8 @@ def add_perf( perf_watt=None, watt=None, watt_err=None, + cpu_clock=None, + cpu_clock_err=None, index=None, ) -> None: """Extract performance and power efficiency""" @@ -316,6 +318,41 @@ def add_perf( watt_err.append(metric) else: watt_err[index] = metric + + if cpu_clock is not None: + mm = self.get_monitoring_metric(Metrics.FREQ) + mean_values = [] + min_values = [] + max_values = [] + + for freq_metric in mm: + if freq_metric != "CPU": + continue + # We have to compute metrics of all systems cores + for core in mm[freq_metric]: + # MIN of min ? + # Mean of mean ? + # Max of max ? + min_values.append(min(mm[freq_metric][core].get_min())) + mean_values.append(mean(mm[freq_metric][core].get_mean())) + max_values.append(max(mm[freq_metric][core].get_max())) + min_value = min(min_values) + mean_value = mean(mean_values) + max_value = max(max_values) + + if index is None: + cpu_clock.append(mean_value) + else: + cpu_clock[index] = mean_value + + # If we want to keep the error distribution to plot error bars + if cpu_clock_err is not None: + metric = (mean_value - min_value, max_value - mean_value) + if index is None: + cpu_clock_err.append(metric) + else: + cpu_clock_err[index] = metric + except ValueError: fatal(f"No {perf} found in {self.get_bench_name()}")