-
Notifications
You must be signed in to change notification settings - Fork 137
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a tool capable of processing and plotting the results produced by the new benchmark tool. Also support comparison with reference (assumes libm). Produces 1 graph per (precision, accuracy, arch, compiler) showing performance for various variants (scalar, vector and scalable vector).
- Loading branch information
1 parent
d7901d5
commit 394ca26
Showing
5 changed files
with
564 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Python Libraries Imports | ||
from argparse import ArgumentParser | ||
from pathlib import Path | ||
import pandas as pd | ||
import os | ||
|
||
# Auxiliar File Imports | ||
import preprocessing_validators as ppv | ||
import preprocessing as pp | ||
import plotter as pl | ||
|
||
# This function orchestrates everything: from data processing to graph plotting. | ||
# It is organized as follows: | ||
# 1. Define command line arguments | ||
# 2. Validate command line arguments | ||
# 3. Processing (Stage 1): Convert raw results into organized dataframe | ||
# 4. Processing (Stage 2): Filter results | ||
# 5. Generate graph plot over filtered results | ||
# 6. Save image | ||
# Example command line invocation: | ||
# ./src/libm-benchmarks/plot-tool/plot_results.py | ||
# --result-file <path to result file> | ||
# --reference-file <path to reference result file> | ||
# -v scalar vector128 sve -p double -a u35 -d -y "Throughput Ratio" -o graphs | ||
|
||
|
||
def main(): | ||
# Define command line arguments | ||
parser = ArgumentParser() | ||
parser.add_argument( | ||
"--result-file", | ||
type=Path, | ||
required=True, | ||
help="File with benchmark results (csv format)", | ||
) | ||
parser.add_argument( | ||
"--reference-file", | ||
type=Path, | ||
required=False, | ||
help="File with reference benchmark results (csv format)", | ||
) | ||
parser.add_argument( | ||
"-y", | ||
"--y-axis", | ||
choices=[ | ||
"Total Time", | ||
"Total Time Ratio", | ||
"Throughput", | ||
"Throughput Ratio", | ||
"Throughput Speedup", | ||
], | ||
default="Throughput", | ||
help="Quantity tracked by y axis", | ||
) | ||
parser.add_argument( | ||
"-v", | ||
"--variant", | ||
nargs="+", | ||
choices=["scalar", "vector128", "vector256", "vector512", "sve"], | ||
required=True, | ||
help="Which variant to plot", | ||
) | ||
parser.add_argument( | ||
"-m", | ||
"--machine", | ||
required=True, | ||
help="Which machine did the benchmarks occured on", | ||
) | ||
parser.add_argument( | ||
"-p", | ||
"--precision", | ||
choices=["double", "single"], | ||
required=True, | ||
help="Which precision to plot", | ||
) | ||
parser.add_argument( | ||
"-a", | ||
"--accuracy", | ||
choices=["u10", "u35"], | ||
required=True, | ||
help="Which accuracy to plot", | ||
) | ||
parser.add_argument( | ||
"-d", | ||
"--drop-intervals", | ||
action="store_true", | ||
help="Keep one interval per function (if intervals are sorted will keep lowest interval)", | ||
) | ||
parser.add_argument( | ||
"-o", | ||
"--output-directory", | ||
type=Path, | ||
required=False, | ||
help="Directory to save output", | ||
) | ||
args = parser.parse_args() | ||
|
||
# Validate command line arguments | ||
results_filename = str(args.result_file) | ||
ref_results_filename = str(args.reference_file) | ||
library, architecture, compiler = ppv.filename_validator(results_filename) | ||
|
||
# Convert raw results into organized dataframe | ||
sleef_df_raw = pp.raw_to_df(results_filename) | ||
precision = ppv.valid_precision(args.precision) | ||
accuracy = ppv.valid_accuracy(args.accuracy) | ||
|
||
# Filter results by variant, precision, accuracy | ||
# One dataframe per variant | ||
filtered_dfs = [] | ||
for v in args.variant: | ||
variant = ppv.valid_variant(v) | ||
filtered_df = pp.filter_results( | ||
sleef_df_raw, | ||
precision=precision, | ||
accuracy=accuracy, | ||
variant=variant, | ||
keep_lower_interval=args.drop_intervals, | ||
) | ||
filtered_dfs.append(filtered_df) | ||
|
||
# If reference provided, repeat similar process | ||
ref_filtered_df = pd.DataFrame({"A": []}) | ||
if args.reference_file: | ||
library_ref, architecture_ref, compiler_ref = ppv.filename_validator( | ||
ref_results_filename | ||
) | ||
assert ( | ||
architecture == architecture_ref | ||
and compiler == compiler_ref | ||
and library != library_ref | ||
) | ||
# Convert raw results into organized dataframe | ||
ref_df_raw = pp.raw_to_df(ref_results_filename) | ||
# Filter results by variant, precision, accuracy | ||
# Note: for now we fix u10 scalar routines in the reference library (ie libm) for comparison. | ||
ref_filtered_df = pp.filter_results( | ||
ref_df_raw, | ||
precision=precision, | ||
accuracy="u10", | ||
variant="scalar", | ||
keep_lower_interval=args.drop_intervals, | ||
) | ||
|
||
# Plot results | ||
graph_plot = pl.plot_graph( | ||
filtered_dfs, | ||
ref_df=ref_filtered_df, | ||
y_col=args.y_axis, | ||
saving_title=f"graph-{precision}-{accuracy}-{compiler}-{architecture}-{args.machine}", | ||
) | ||
|
||
if not args.output_directory.is_dir(): | ||
os.mkdir(args.output_directory) | ||
|
||
graph_plot.write_image( | ||
f"{args.output_directory}/graph-{precision}-{accuracy}-{compiler}-{architecture}.png", | ||
format="png", | ||
) | ||
return | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
import plotly.graph_objects as go | ||
import pandas as pd | ||
|
||
|
||
def get_legend(variant, arch): | ||
if arch == "aarch64": | ||
if variant == "vector128": | ||
return "AdvSIMD" | ||
if variant == "sve": | ||
return "SVE 256bit" | ||
if arch == "x86": | ||
if variant == "vector128": | ||
return "SSE" | ||
if variant == "vector256": | ||
return "AVX2" | ||
if variant == "vector512": | ||
return "AVX512" | ||
return variant | ||
|
||
|
||
# Given a filtered dataframe, it extracts the data necessary in this dataframe | ||
# for the graph we want to build: | ||
# Information for the x axis: Function and interval data | ||
# Information for the y axis: Performance values (Total Time and Throughput) | ||
# The headers will be changed so that they contain the metric and the variant they relate to | ||
# Example: Total Time - sleef scalar | ||
# This is convenient for the graphing step, where the first part can be used to further filtering | ||
# the dataframe, and the latter part will be used as legends for the plots in the graph. | ||
def extract_coordinates(filtered_df, extra_legend=""): | ||
graph_df = pd.DataFrame({}) | ||
# x axis | ||
graph_df["Fun-Interval"] = filtered_df["Function"] + filtered_df["Interval"] | ||
variant = filtered_df.iloc[0]["Variant"] | ||
arch = filtered_df.iloc[0]["Architecture"] | ||
# y axis | ||
legend = ( | ||
f'{filtered_df.iloc[0]["Library"]} {get_legend(variant, arch)} {extra_legend}' | ||
) | ||
graph_df["Throughput - " + legend] = filtered_df["Throughput"] | ||
graph_df["Total Time - " + legend] = filtered_df["Total Time"] | ||
graph_df = graph_df.set_index("Fun-Interval") | ||
return graph_df | ||
|
||
|
||
# Given a dataframe with all the information necessary to fill x and y values, so that | ||
# we can plot a performance graph. | ||
# The y_col value determines the performance metric that will be shown in the y axis in the graph | ||
# It will also be used to further filter and process the dataframes. | ||
def plot_graph_from_coordinates( | ||
coordinates_df, y_col="Throughput", graph_title="graph-pr-acc-comp-arch" | ||
): | ||
# y_col can be Throughput, Total Time, Throughput Ratio or Total Time Ratio. | ||
# In the coordinates_df, the values don't show ratios, which means they only | ||
# contain Total Time and Throughput information. We further filter this | ||
# dataframe so that it only shows the quantity we need (Total Time or Throughput) | ||
# according to the first word in of y_col | ||
coordinates_df = coordinates_df.filter(like=y_col.split()[0], axis=1) | ||
|
||
ratio = "ratio" in y_col.lower() | ||
speedup = "speedup" in y_col.lower() | ||
if ratio: | ||
# The program will fail here (as expected) if reference not provided | ||
# In order to divide all the columns by the reference, we convert the reference | ||
# into series and divide all columns in this dataframe by the ref series. | ||
# A trick to convert a column in a dataframe into series is transposing it | ||
# and then applying iloc function to it | ||
ref_df = coordinates_df.filter(regex="ref").T.iloc[0] | ||
coordinates_df = pd.DataFrame( | ||
coordinates_df.values / ref_df.values[:, None], | ||
index=coordinates_df.index, | ||
columns=coordinates_df.columns, | ||
) | ||
elif speedup: | ||
# The program will fail here (as expected) if reference not provided | ||
# In order to divide all the columns by the reference, we convert the reference | ||
# into series and divide all columns in this dataframe by the ref series. | ||
# A trick to convert a column in a dataframe into series is transposing it | ||
# and then applying iloc function to it | ||
ref_df = coordinates_df.filter(regex="ref").T.iloc[0] | ||
coordinates_df = pd.DataFrame( | ||
ref_df.values[:, None] / coordinates_df.values, | ||
index=coordinates_df.index, | ||
columns=coordinates_df.columns, | ||
) | ||
|
||
# fix naming in y axis by adding units (ratio does not have units) | ||
elif "throughput" in y_col.lower(): | ||
y_col = f"{y_col} (ns/el)" | ||
elif "total time" in y_col.lower(): | ||
y_col = f"{y_col} (ns)" | ||
|
||
x_vector = coordinates_df.index | ||
fig = go.Figure() | ||
for (columnName, columnData) in coordinates_df.items(): | ||
# In ratio mode, ref is just an horizontal line y=1.0 | ||
# In the coordinates dataframe, the columns headers are expected to be filled | ||
# in a way that they contain what metric the column contains (Total Time or Throughput), | ||
# and what variant the results belong to (sleef scalar, sleef sve ...), so they look like | ||
# Throughput - sleef scalar for example. | ||
# The first part was used earlier for the y_axis naming | ||
# We use the second part after "-" for the legends title (in this case example | ||
# would be "sleef scalar") | ||
legend = columnName.split("-")[1] | ||
if "ref" in columnName and (ratio or speedup): | ||
fig.add_trace( | ||
go.Scatter( | ||
x=x_vector, | ||
y=[1 for x in x_vector], | ||
name=legend, | ||
line=dict(width=2, dash="dash"), | ||
mode="lines", | ||
) | ||
) | ||
continue | ||
fig.add_trace(go.Bar(name=legend, x=x_vector, y=columnData)) | ||
|
||
# Configure Title | ||
# The graphtitle parameter passed on to this function should take the | ||
# following format graph-{precision}-{accuracy}-{compiler}-{architecture}-{machine} | ||
_, precision, accuracy, _, _, machine = graph_title.split("-") | ||
long_acc = {"u10": "1ULP", "u35": "3.5ULP"}[accuracy] | ||
fig.update_layout( | ||
title=f"Comparison between system libm (GLIBC 2.35) and SLEEF performance<br>for {precision} precision {long_acc} functions on {machine}", | ||
barmode="group", | ||
xaxis_title="function name and interval", | ||
yaxis_title=y_col, | ||
legend_title="Variant", | ||
width=800, | ||
height=600, | ||
) | ||
return fig | ||
|
||
|
||
# Given an array of filtered dataframes (and potentially a similar style reference dataframe) | ||
# it merges all of them in a single dataframe with the information necessary to build the graph. | ||
# This dataframe has "Function Interval" information as indexes, and each column will | ||
# correspond to a performance quantity per variant (Example: Total Time - scalar) | ||
def plot_graph( | ||
filtered_df_array, | ||
ref_df, | ||
y_col="Throughput (ns)", | ||
saving_title="graph-pr-acc-comp-arch", | ||
): | ||
coordinates_df_array = [extract_coordinates(df) for df in filtered_df_array] | ||
|
||
# Use outer join operation ("pd.concat") to merge variant dataframes, as we are | ||
# interested in showing performance in all functions supported. | ||
# We substitute the resulting nan values by 0. | ||
graph_df = pd.concat(coordinates_df_array, axis=1) | ||
if graph_df.isna().any(axis=None): | ||
print("Warning: join resulted in nan values") | ||
graph_df = graph_df.fillna(0) | ||
|
||
if not ref_df.empty: | ||
# Use left join result dataframe with the reference dataframe, as we are interested | ||
# in comparing performance with the functions present in the result dataframe, so | ||
# any other function that is not present there, we are not interested to present | ||
# in output graph. | ||
# (No nan values should be produced) | ||
coordinates_df_ref = extract_coordinates(ref_df, extra_legend="(ref)") | ||
graph_df = pd.concat([graph_df, coordinates_df_ref], axis=1).reindex( | ||
graph_df.index | ||
) | ||
return plot_graph_from_coordinates(graph_df, y_col=y_col, graph_title=saving_title) |
Oops, something went wrong.