diff --git a/src/libm-benchmarks/plot-tool/plot_results.py b/src/libm-benchmarks/plot-tool/plot_results.py new file mode 100755 index 00000000..c4f0d5d6 --- /dev/null +++ b/src/libm-benchmarks/plot-tool/plot_results.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 + +# Python Libraries Imports +from argparse import ArgumentParser +from pathlib import Path +import pandas as pd +import os + +# Auxiliar File Imports +import preprocessing_validators as ppv +import preprocessing as pp +import plotter as pl + +# This function orchestrates everything: from data processing to graph plotting. +# It is organized as follows: +# 1. Define command line arguments +# 2. Validate command line arguments +# 3. Processing (Stage 1): Convert raw results into organized dataframe +# 4. Processing (Stage 2): Filter results +# 5. Generate graph plot over filtered results +# 6. Save image +# Example command line invocation: +# ./src/libm-benchmarks/plot-tool/plot_results.py +# --result-file +# --reference-file +# -v scalar vector128 sve -p double -a u35 -d -y "Throughput Ratio" -o graphs + + +def main(): + # Define command line arguments + parser = ArgumentParser() + parser.add_argument( + "--result-file", + type=Path, + required=True, + help="File with benchmark results (csv format)", + ) + parser.add_argument( + "--reference-file", + type=Path, + required=False, + help="File with reference benchmark results (csv format)", + ) + parser.add_argument( + "-y", + "--y-axis", + choices=[ + "Total Time", + "Total Time Ratio", + "Throughput", + "Throughput Ratio", + "Throughput Speedup", + ], + default="Throughput", + help="Quantity tracked by y axis", + ) + parser.add_argument( + "-v", + "--variant", + nargs="+", + choices=["scalar", "vector128", "vector256", "vector512", "sve"], + required=True, + help="Which variant to plot", + ) + parser.add_argument( + "-m", + "--machine", + required=True, + help="Which machine did the benchmarks occured on", + ) + parser.add_argument( + "-p", + "--precision", + choices=["double", "single"], + required=True, + help="Which precision to plot", + ) + parser.add_argument( + "-a", + "--accuracy", + choices=["u10", "u35"], + required=True, + help="Which accuracy to plot", + ) + parser.add_argument( + "-d", + "--drop-intervals", + action="store_true", + help="Keep one interval per function (if intervals are sorted will keep lowest interval)", + ) + parser.add_argument( + "-o", + "--output-directory", + type=Path, + required=False, + help="Directory to save output", + ) + args = parser.parse_args() + + # Validate command line arguments + results_filename = str(args.result_file) + ref_results_filename = str(args.reference_file) + library, architecture, compiler = ppv.filename_validator(results_filename) + + # Convert raw results into organized dataframe + sleef_df_raw = pp.raw_to_df(results_filename) + precision = ppv.valid_precision(args.precision) + accuracy = ppv.valid_accuracy(args.accuracy) + + # Filter results by variant, precision, accuracy + # One dataframe per variant + filtered_dfs = [] + for v in args.variant: + variant = ppv.valid_variant(v) + filtered_df = pp.filter_results( + sleef_df_raw, + precision=precision, + accuracy=accuracy, + variant=variant, + keep_lower_interval=args.drop_intervals, + ) + filtered_dfs.append(filtered_df) + + # If reference provided, repeat similar process + ref_filtered_df = pd.DataFrame({"A": []}) + if args.reference_file: + library_ref, architecture_ref, compiler_ref = ppv.filename_validator( + ref_results_filename + ) + assert ( + architecture == architecture_ref + and compiler == compiler_ref + and library != library_ref + ) + # Convert raw results into organized dataframe + ref_df_raw = pp.raw_to_df(ref_results_filename) + # Filter results by variant, precision, accuracy + # Note: for now we fix u10 scalar routines in the reference library (ie libm) for comparison. + ref_filtered_df = pp.filter_results( + ref_df_raw, + precision=precision, + accuracy="u10", + variant="scalar", + keep_lower_interval=args.drop_intervals, + ) + + # Plot results + graph_plot = pl.plot_graph( + filtered_dfs, + ref_df=ref_filtered_df, + y_col=args.y_axis, + saving_title=f"graph-{precision}-{accuracy}-{compiler}-{architecture}-{args.machine}", + ) + + if not args.output_directory.is_dir(): + os.mkdir(args.output_directory) + + graph_plot.write_image( + f"{args.output_directory}/graph-{precision}-{accuracy}-{compiler}-{architecture}.png", + format="png", + ) + return + + +if __name__ == "__main__": + main() diff --git a/src/libm-benchmarks/plot-tool/plotter.py b/src/libm-benchmarks/plot-tool/plotter.py new file mode 100644 index 00000000..2ef1cd2f --- /dev/null +++ b/src/libm-benchmarks/plot-tool/plotter.py @@ -0,0 +1,164 @@ +import plotly.graph_objects as go +import pandas as pd + + +def get_legend(variant, arch): + if arch == "aarch64": + if variant == "vector128": + return "AdvSIMD" + if variant == "sve": + return "SVE 256bit" + if arch == "x86": + if variant == "vector128": + return "SSE" + if variant == "vector256": + return "AVX2" + if variant == "vector512": + return "AVX512" + return variant + + +# Given a filtered dataframe, it extracts the data necessary in this dataframe +# for the graph we want to build: +# Information for the x axis: Function and interval data +# Information for the y axis: Performance values (Total Time and Throughput) +# The headers will be changed so that they contain the metric and the variant they relate to +# Example: Total Time - sleef scalar +# This is convenient for the graphing step, where the first part can be used to further filtering +# the dataframe, and the latter part will be used as legends for the plots in the graph. +def extract_coordinates(filtered_df, extra_legend=""): + graph_df = pd.DataFrame({}) + # x axis + graph_df["Fun-Interval"] = filtered_df["Function"] + filtered_df["Interval"] + variant = filtered_df.iloc[0]["Variant"] + arch = filtered_df.iloc[0]["Architecture"] + # y axis + legend = ( + f'{filtered_df.iloc[0]["Library"]} {get_legend(variant, arch)} {extra_legend}' + ) + graph_df["Throughput - " + legend] = filtered_df["Throughput"] + graph_df["Total Time - " + legend] = filtered_df["Total Time"] + graph_df = graph_df.set_index("Fun-Interval") + return graph_df + + +# Given a dataframe with all the information necessary to fill x and y values, so that +# we can plot a performance graph. +# The y_col value determines the performance metric that will be shown in the y axis in the graph +# It will also be used to further filter and process the dataframes. +def plot_graph_from_coordinates( + coordinates_df, y_col="Throughput", graph_title="graph-pr-acc-comp-arch" +): + # y_col can be Throughput, Total Time, Throughput Ratio or Total Time Ratio. + # In the coordinates_df, the values don't show ratios, which means they only + # contain Total Time and Throughput information. We further filter this + # dataframe so that it only shows the quantity we need (Total Time or Throughput) + # according to the first word in of y_col + coordinates_df = coordinates_df.filter(like=y_col.split()[0], axis=1) + + ratio = "ratio" in y_col.lower() + speedup = "speedup" in y_col.lower() + if ratio: + # The program will fail here (as expected) if reference not provided + # In order to divide all the columns by the reference, we convert the reference + # into series and divide all columns in this dataframe by the ref series. + # A trick to convert a column in a dataframe into series is transposing it + # and then applying iloc function to it + ref_df = coordinates_df.filter(regex="ref").T.iloc[0] + coordinates_df = pd.DataFrame( + coordinates_df.values / ref_df.values[:, None], + index=coordinates_df.index, + columns=coordinates_df.columns, + ) + elif speedup: + # The program will fail here (as expected) if reference not provided + # In order to divide all the columns by the reference, we convert the reference + # into series and divide all columns in this dataframe by the ref series. + # A trick to convert a column in a dataframe into series is transposing it + # and then applying iloc function to it + ref_df = coordinates_df.filter(regex="ref").T.iloc[0] + coordinates_df = pd.DataFrame( + ref_df.values[:, None] / coordinates_df.values, + index=coordinates_df.index, + columns=coordinates_df.columns, + ) + + # fix naming in y axis by adding units (ratio does not have units) + elif "throughput" in y_col.lower(): + y_col = f"{y_col} (ns/el)" + elif "total time" in y_col.lower(): + y_col = f"{y_col} (ns)" + + x_vector = coordinates_df.index + fig = go.Figure() + for (columnName, columnData) in coordinates_df.items(): + # In ratio mode, ref is just an horizontal line y=1.0 + # In the coordinates dataframe, the columns headers are expected to be filled + # in a way that they contain what metric the column contains (Total Time or Throughput), + # and what variant the results belong to (sleef scalar, sleef sve ...), so they look like + # Throughput - sleef scalar for example. + # The first part was used earlier for the y_axis naming + # We use the second part after "-" for the legends title (in this case example + # would be "sleef scalar") + legend = columnName.split("-")[1] + if "ref" in columnName and (ratio or speedup): + fig.add_trace( + go.Scatter( + x=x_vector, + y=[1 for x in x_vector], + name=legend, + line=dict(width=2, dash="dash"), + mode="lines", + ) + ) + continue + fig.add_trace(go.Bar(name=legend, x=x_vector, y=columnData)) + + # Configure Title + # The graphtitle parameter passed on to this function should take the + # following format graph-{precision}-{accuracy}-{compiler}-{architecture}-{machine} + _, precision, accuracy, _, _, machine = graph_title.split("-") + long_acc = {"u10": "1ULP", "u35": "3.5ULP"}[accuracy] + fig.update_layout( + title=f"Comparison between system libm (GLIBC 2.35) and SLEEF performance
for {precision} precision {long_acc} functions on {machine}", + barmode="group", + xaxis_title="function name and interval", + yaxis_title=y_col, + legend_title="Variant", + width=800, + height=600, + ) + return fig + + +# Given an array of filtered dataframes (and potentially a similar style reference dataframe) +# it merges all of them in a single dataframe with the information necessary to build the graph. +# This dataframe has "Function Interval" information as indexes, and each column will +# correspond to a performance quantity per variant (Example: Total Time - scalar) +def plot_graph( + filtered_df_array, + ref_df, + y_col="Throughput (ns)", + saving_title="graph-pr-acc-comp-arch", +): + coordinates_df_array = [extract_coordinates(df) for df in filtered_df_array] + + # Use outer join operation ("pd.concat") to merge variant dataframes, as we are + # interested in showing performance in all functions supported. + # We substitute the resulting nan values by 0. + graph_df = pd.concat(coordinates_df_array, axis=1) + if graph_df.isna().any(axis=None): + print("Warning: join resulted in nan values") + graph_df = graph_df.fillna(0) + + if not ref_df.empty: + # Use left join result dataframe with the reference dataframe, as we are interested + # in comparing performance with the functions present in the result dataframe, so + # any other function that is not present there, we are not interested to present + # in output graph. + # (No nan values should be produced) + coordinates_df_ref = extract_coordinates(ref_df, extra_legend="(ref)") + graph_df = pd.concat([graph_df, coordinates_df_ref], axis=1).reindex( + graph_df.index + ) + return plot_graph_from_coordinates(graph_df, y_col=y_col, graph_title=saving_title) diff --git a/src/libm-benchmarks/plot-tool/preprocessing.py b/src/libm-benchmarks/plot-tool/preprocessing.py new file mode 100644 index 00000000..75594dcb --- /dev/null +++ b/src/libm-benchmarks/plot-tool/preprocessing.py @@ -0,0 +1,69 @@ +import preprocessing_validators as ppv +import pandas as pd + +# Convert raw results into organized dataframe +# 1. Drop empty columns +# 2. Reformulate columns in dataframe as (example): +# Fun: sin +# Variant: scalar: +# Precision: double +# Interval: [-6.28, 6.28] +# ULP: u10 +# Total Time: 5ns +# Throughput: 5ns +# Compiler: gcc14 +# Architecture: aarch64 +# Library: sleef +def raw_to_df(results_file): + library, architecture, compiler = ppv.filename_validator(results_file) + raw_df = pd.read_csv(results_file) + raw_df.dropna(how="all", axis=1, inplace=True) + intermediate_df = pd.DataFrame({}) + intermediate_df["Function"] = raw_df["name"].apply(ppv.extract_fun_name) + intermediate_df["Variant"] = raw_df["name"].apply(ppv.extract_variant) + intermediate_df["Precision"] = raw_df["name"].apply(ppv.extract_precision) + intermediate_df["Interval"] = raw_df["name"].apply(ppv.extract_interval) + intermediate_df["ULP"] = raw_df["name"].apply(ppv.extract_ulp) + intermediate_df["Total Time"] = raw_df["real_time"] + intermediate_df["Throughput"] = raw_df["NSperEl"].apply(ppv.seconds_to_nanoseconds) + intermediate_df["Compiler"] = compiler + intermediate_df["Architecture"] = architecture + intermediate_df["Library"] = library + return intermediate_df + + +# Filter entries that contain a fixed precision, accuracy and variant. +# If keep_lower_interval is True, then it will also only keep one interval per +# function. If the intervals are sorted in the results provided (recommended), +# then it will only keep the lowest intervals per function. +def filter_results(raw_df, precision, accuracy, variant, keep_lower_interval=False): + filtered_df = raw_df + filtered_df = filtered_df[filtered_df["Precision"] == precision] + filtered_df = filtered_df[filtered_df["ULP"] == accuracy] + filtered_df = filtered_df[filtered_df["Variant"] == variant] + if keep_lower_interval: + filtered_df.drop_duplicates(subset="Function", keep="first", inplace=True) + # Resetting index is important, + # otherwise they remain with same indexes as in original dataframe + filtered_df.reset_index(drop=True, inplace=True) + return filtered_df + + +######################################################## +######################## TESTING ####################### +# python3 preprocessing.py # +######################################################## +if __name__ == "__main__": + libm_gcc14_aarch64_df = raw_to_df("results-libm-gcc14-aarch64.csv") + sleef_gcc14_aarch64_df = raw_to_df("results-sleef-gcc14-aarch64.csv") + print(libm_gcc14_aarch64_df.head(3)) + print(sleef_gcc14_aarch64_df.head(3)) + libm_gcc14_aarch64_scalar_double_u10_df = filter_results( + libm_gcc14_aarch64_df, precision="double", accuracy="u10", variant="scalar" + ) + print(libm_gcc14_aarch64_scalar_double_u10_df.head(3)) + + sleef_gcc14_aarch64_scalar_double_u10_df = filter_results( + sleef_gcc14_aarch64_df, precision="double", accuracy="u10", variant="scalar" + ) + print(sleef_gcc14_aarch64_scalar_double_u10_df.head(3)) diff --git a/src/libm-benchmarks/plot-tool/preprocessing_validators.py b/src/libm-benchmarks/plot-tool/preprocessing_validators.py new file mode 100644 index 00000000..c0cf66f6 --- /dev/null +++ b/src/libm-benchmarks/plot-tool/preprocessing_validators.py @@ -0,0 +1,160 @@ +import re + +# Set of options for which this result preprocessing and graph plotting +# tool should work. +compilers = ["gcc14", "llvm17"] +architectures = ["aarch64", "x86"] +variants = ["scalar", "vector128", "vector256", "vector512", "sve"] +precisions = ["single", "double"] +accuracies = ["u10", "u35"] +libraries = ["sleef", "libm"] + +# Validation Functions +# Ensure fields in dataframe belong to options defined above. +def valid_compiler(compiler): + assert compiler in compilers + return compiler + + +def valid_architecture(architecture): + assert architecture in architectures + return architecture + + +def valid_variant(variant): + assert variant in variants + return variant + + +def valid_precision(precision): + assert precision in precisions + return precision + + +def valid_library(library): + assert library in libraries + return library + + +def valid_accuracy(accuracy): + assert accuracy in accuracies + return accuracy + + +# This function takes a give results filename and checks if it obbeys +# the naming convention: "results---.csv" +# Also checks if the options in the name are supported. +# If it passes the checks, it returns the relevant component of the filename. +# Example of a valid filename: "results-libm-gcc14-aarch64.csv" +def filename_validator(result_filename): + result_filename = result_filename.split("/")[-1] + filename_components = result_filename.split("-") + assert len(filename_components) == 4 + assert filename_components[0] == "results" + library = valid_library(filename_components[1]) + compiler = valid_compiler(filename_components[2]) + architecture_extension = filename_components[3].split(".") + assert len(architecture_extension) == 2 + architecture = valid_architecture(architecture_extension[0]) + extension = architecture_extension[1] + assert extension == "csv" + return library, architecture, compiler + + +# This function takes a benchmark label and extrapolates the name of the library +# the function belongs to. +# Example "MB_Sleef_tandx_u35sve_sved_0_6.28" -> sleef +def extract_lib(fun_name): + return valid_library(fun_name.split("_")[1].lower()) + + +# This function takes a benchmark label and extrapolates the name of the math function +# (independent of what vector extension, interval etc...) the routine captures. +# Example "MB_Sleef_tandx_u35sve_sved_0_6.28" -> tan +def extract_fun_name(fun_name): + # Pass 1: Split by _ + # MB_Sleef_tandx_u35sve_sved_0_6.28 --> [MB, Sleef,tandx,u35sve,sved,0,6.28]. + # The element with index 2 contains the name we are interested: tandx + formatted_fun_name1 = fun_name.split("_")[2] + # Pass 2: Split by make sure we ignore the sufix usually present in last 3 characters + # tandx --> [tan, ''] + # discard the last elemnt of this list + # we discard the last element instad of keeping the first element in case the function + # contains f and d in the name. + # Also, this could be simplified if reverse split with regular experessions split was + # supported with python + suffix_reg_exp = "fx|dx|f\d+|d\d+|f|d" + formatted_fun_name2 = re.split(suffix_reg_exp, formatted_fun_name1) + if len(formatted_fun_name2) == 1: + return formatted_fun_name2[0] + else: + return "".join(formatted_fun_name2[:-1]) + + +# This function takes a benchmark label and extrapolates the interval used to benchmark +# the routine in question. +# Example "MB_Sleef_tandx_u35sve_sved_0_6.28" -> (0, 6.28) +def extract_interval(fun_name): + interval = fun_name.split("_") + return "[" + interval[-2] + "," + interval[-1] + "]" + + +# This function takes a benchmark label and extrapolates the extension of the routine +# Should be scalar, sve or vector +# Example "MB_Sleef_tandx_u35sve_sved_0_6.28" -> sve +def extract_variant(fun_name): + variant_w_precision = fun_name.split("_")[4] + variant = re.split("f|d", variant_w_precision) + return valid_variant("".join(variant)) + + +# This function takes a benchmark label and extrapolates if its a single precision or +# double precision routine +# Example "MB_Sleef_tandx_u35sve_sved_0_6.28" -> double precision +# Works assuming extension names dont have f or d. If that becomes the case, +# use regular expressions +def extract_precision(fun_name): + extension_w_precision = fun_name.split("_")[4] + precision = None + if "f" in extension_w_precision: + precision = "single" + if "d" in extension_w_precision: + precision = "double" + return valid_precision(precision) + + +# This function takes a benchmark label and extrapolates if its a single precision or +# double precision routine +# Example "MB_Sleef_tandx_u35sve_sved_0_6.28" -> double precision +def extract_ulp(fun_name): + return valid_accuracy(fun_name.split("_")[3][:3]) + + +# This function takes a benchmark label and extrapolates if its a single precision or +# double precision routine +# Example "MB_Sleef_tandx_u35sve_sved_0_6.28" -> double precision +def seconds_to_nanoseconds(seconds): + return seconds * 1e9 + + +######################################################## +######################## TESTING ####################### +# python3 preprocessing_validators.py # +######################################################## +if __name__ == "__main__": + # Testing of functions above: + print(extract_lib("MB_Sleef_tandx_u35sve_sved_0_6.28")) + print(extract_lib("MB_libm_sin_scalard_0_1e28")) + print(extract_fun_name("MB_Sleef_tandx_u35sve_sved_0_6.28")) + print(extract_interval("MB_Sleef_tandx_u35sve_sved_0_1e+38")) + print(extract_interval("MB_Sleef_atan2d2_u10_vectord128_-10_10")) + print(extract_variant("MB_Sleef_tandx_u35sve_sved_0_1e+38")) + print(extract_variant("MB_Sleef_log10d2_u10_vectord128_0_1e100")) + print(extract_variant("MB_Sleef_tandx_u35sve_sved_0_1e+38")) + print(extract_ulp("MB_Sleef_tandx_u35sve_sved_0_1e+38")) + print(extract_ulp("MB_Sleef_tandx_u35sve_sved_0_1e+38")) + print(extract_fun_name("MB_libm_tan_u10_scalarf_0_1e+6")) + print(extract_interval("MB_libm_tan_u10_scalarf_0_1e+6")) + print(extract_variant("MB_libm_tan_u10_scalarf_0_1e+6")) + print(extract_ulp("MB_libm_tan_u10_scalarf_0_1e+6")) + filename_validator("results-libm-gcc14-aarch64.csv") diff --git a/src/libm-benchmarks/plot-tool/requirements.txt b/src/libm-benchmarks/plot-tool/requirements.txt new file mode 100644 index 00000000..a911896f --- /dev/null +++ b/src/libm-benchmarks/plot-tool/requirements.txt @@ -0,0 +1,5 @@ +pandas +plotly +re +pathlib +argparse \ No newline at end of file