diff --git a/src/libm-benchmarks/plot-tool/plot_results.py b/src/libm-benchmarks/plot-tool/plot_results.py
new file mode 100755
index 00000000..c4f0d5d6
--- /dev/null
+++ b/src/libm-benchmarks/plot-tool/plot_results.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+
+# Python Libraries Imports
+from argparse import ArgumentParser
+from pathlib import Path
+import pandas as pd
+import os
+
+# Auxiliar File Imports
+import preprocessing_validators as ppv
+import preprocessing as pp
+import plotter as pl
+
+# This function orchestrates everything: from data processing to graph plotting.
+# It is organized as follows:
+# 1. Define command line arguments
+# 2. Validate command line arguments
+# 3. Processing (Stage 1): Convert raw results into organized dataframe
+# 4. Processing (Stage 2): Filter results
+# 5. Generate graph plot over filtered results
+# 6. Save image
+# Example command line invocation:
+# ./src/libm-benchmarks/plot-tool/plot_results.py
+#             --result-file <path to result file>
+#             --reference-file <path to reference result file>
+#             -v scalar vector128 sve -p double -a u35 -d -y "Throughput Ratio" -o graphs
+
+
+def main():
+    # Define command line arguments
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--result-file",
+        type=Path,
+        required=True,
+        help="File with benchmark results (csv format)",
+    )
+    parser.add_argument(
+        "--reference-file",
+        type=Path,
+        required=False,
+        help="File with reference benchmark results (csv format)",
+    )
+    parser.add_argument(
+        "-y",
+        "--y-axis",
+        choices=[
+            "Total Time",
+            "Total Time Ratio",
+            "Throughput",
+            "Throughput Ratio",
+            "Throughput Speedup",
+        ],
+        default="Throughput",
+        help="Quantity tracked by y axis",
+    )
+    parser.add_argument(
+        "-v",
+        "--variant",
+        nargs="+",
+        choices=["scalar", "vector128", "vector256", "vector512", "sve"],
+        required=True,
+        help="Which variant to plot",
+    )
+    parser.add_argument(
+        "-m",
+        "--machine",
+        required=True,
+        help="Which machine did the benchmarks occured on",
+    )
+    parser.add_argument(
+        "-p",
+        "--precision",
+        choices=["double", "single"],
+        required=True,
+        help="Which precision to plot",
+    )
+    parser.add_argument(
+        "-a",
+        "--accuracy",
+        choices=["u10", "u35"],
+        required=True,
+        help="Which accuracy to plot",
+    )
+    parser.add_argument(
+        "-d",
+        "--drop-intervals",
+        action="store_true",
+        help="Keep one interval per function (if intervals are sorted will keep lowest interval)",
+    )
+    parser.add_argument(
+        "-o",
+        "--output-directory",
+        type=Path,
+        required=False,
+        help="Directory to save output",
+    )
+    args = parser.parse_args()
+
+    # Validate command line arguments
+    results_filename = str(args.result_file)
+    ref_results_filename = str(args.reference_file)
+    library, architecture, compiler = ppv.filename_validator(results_filename)
+
+    # Convert raw results into organized dataframe
+    sleef_df_raw = pp.raw_to_df(results_filename)
+    precision = ppv.valid_precision(args.precision)
+    accuracy = ppv.valid_accuracy(args.accuracy)
+
+    # Filter results by variant, precision, accuracy
+    # One dataframe per variant
+    filtered_dfs = []
+    for v in args.variant:
+        variant = ppv.valid_variant(v)
+        filtered_df = pp.filter_results(
+            sleef_df_raw,
+            precision=precision,
+            accuracy=accuracy,
+            variant=variant,
+            keep_lower_interval=args.drop_intervals,
+        )
+        filtered_dfs.append(filtered_df)
+
+    # If reference provided, repeat similar process
+    ref_filtered_df = pd.DataFrame({"A": []})
+    if args.reference_file:
+        library_ref, architecture_ref, compiler_ref = ppv.filename_validator(
+            ref_results_filename
+        )
+        assert (
+            architecture == architecture_ref
+            and compiler == compiler_ref
+            and library != library_ref
+        )
+        # Convert raw results into organized dataframe
+        ref_df_raw = pp.raw_to_df(ref_results_filename)
+        # Filter results by variant, precision, accuracy
+        # Note: for now we fix u10 scalar routines in the reference library (ie libm) for comparison.
+        ref_filtered_df = pp.filter_results(
+            ref_df_raw,
+            precision=precision,
+            accuracy="u10",
+            variant="scalar",
+            keep_lower_interval=args.drop_intervals,
+        )
+
+    # Plot results
+    graph_plot = pl.plot_graph(
+        filtered_dfs,
+        ref_df=ref_filtered_df,
+        y_col=args.y_axis,
+        saving_title=f"graph-{precision}-{accuracy}-{compiler}-{architecture}-{args.machine}",
+    )
+
+    if not args.output_directory.is_dir():
+        os.mkdir(args.output_directory)
+
+    graph_plot.write_image(
+        f"{args.output_directory}/graph-{precision}-{accuracy}-{compiler}-{architecture}.png",
+        format="png",
+    )
+    return
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/libm-benchmarks/plot-tool/plotter.py b/src/libm-benchmarks/plot-tool/plotter.py
new file mode 100644
index 00000000..2ef1cd2f
--- /dev/null
+++ b/src/libm-benchmarks/plot-tool/plotter.py
@@ -0,0 +1,164 @@
+import plotly.graph_objects as go
+import pandas as pd
+
+
+def get_legend(variant, arch):
+    if arch == "aarch64":
+        if variant == "vector128":
+            return "AdvSIMD"
+        if variant == "sve":
+            return "SVE 256bit"
+    if arch == "x86":
+        if variant == "vector128":
+            return "SSE"
+        if variant == "vector256":
+            return "AVX2"
+        if variant == "vector512":
+            return "AVX512"
+    return variant
+
+
+# Given a filtered dataframe, it extracts the data necessary in this dataframe
+# for the graph we want to build:
+# Information for the x axis: Function and interval data
+# Information for the y axis: Performance values (Total Time and Throughput)
+# The headers will be changed so that they contain the metric and the variant they relate to
+# Example: Total Time - sleef scalar
+# This is convenient for the graphing step, where the first part can be used to further filtering
+# the dataframe, and the latter part will be used as legends for the plots in the graph.
+def extract_coordinates(filtered_df, extra_legend=""):
+    graph_df = pd.DataFrame({})
+    # x axis
+    graph_df["Fun-Interval"] = filtered_df["Function"] + filtered_df["Interval"]
+    variant = filtered_df.iloc[0]["Variant"]
+    arch = filtered_df.iloc[0]["Architecture"]
+    # y axis
+    legend = (
+        f'{filtered_df.iloc[0]["Library"]} {get_legend(variant, arch)} {extra_legend}'
+    )
+    graph_df["Throughput - " + legend] = filtered_df["Throughput"]
+    graph_df["Total Time - " + legend] = filtered_df["Total Time"]
+    graph_df = graph_df.set_index("Fun-Interval")
+    return graph_df
+
+
+# Given a dataframe with all the information necessary to fill x and y values, so that
+# we can plot a performance graph.
+# The y_col value determines the performance metric that will be shown in the y axis in the graph
+# It will also be used to further filter and process the dataframes.
+def plot_graph_from_coordinates(
+    coordinates_df, y_col="Throughput", graph_title="graph-pr-acc-comp-arch"
+):
+    # y_col can be Throughput, Total Time, Throughput Ratio or Total Time Ratio.
+    # In the coordinates_df, the values don't show ratios, which means they only
+    # contain Total Time and Throughput information. We further filter this
+    # dataframe so that it only shows the quantity we need (Total Time or Throughput)
+    # according to the first word in of y_col
+    coordinates_df = coordinates_df.filter(like=y_col.split()[0], axis=1)
+
+    ratio = "ratio" in y_col.lower()
+    speedup = "speedup" in y_col.lower()
+    if ratio:
+        # The program will fail here (as expected) if reference not provided
+        # In order to divide all the columns by the reference, we convert the reference
+        # into series and divide all columns in this dataframe by the ref series.
+        # A trick to convert a column in a dataframe into series is transposing it
+        # and then applying iloc function to it
+        ref_df = coordinates_df.filter(regex="ref").T.iloc[0]
+        coordinates_df = pd.DataFrame(
+            coordinates_df.values / ref_df.values[:, None],
+            index=coordinates_df.index,
+            columns=coordinates_df.columns,
+        )
+    elif speedup:
+        # The program will fail here (as expected) if reference not provided
+        # In order to divide all the columns by the reference, we convert the reference
+        # into series and divide all columns in this dataframe by the ref series.
+        # A trick to convert a column in a dataframe into series is transposing it
+        # and then applying iloc function to it
+        ref_df = coordinates_df.filter(regex="ref").T.iloc[0]
+        coordinates_df = pd.DataFrame(
+            ref_df.values[:, None] / coordinates_df.values,
+            index=coordinates_df.index,
+            columns=coordinates_df.columns,
+        )
+
+    # fix naming in y axis by adding units (ratio does not have units)
+    elif "throughput" in y_col.lower():
+        y_col = f"{y_col} (ns/el)"
+    elif "total time" in y_col.lower():
+        y_col = f"{y_col} (ns)"
+
+    x_vector = coordinates_df.index
+    fig = go.Figure()
+    for (columnName, columnData) in coordinates_df.items():
+        # In ratio mode, ref is just an horizontal line y=1.0
+        # In the coordinates dataframe, the columns headers are expected to be filled
+        # in a way that they contain what metric the column contains (Total Time or Throughput),
+        # and what variant the results belong to (sleef scalar, sleef sve ...), so they look like
+        # Throughput - sleef scalar for example.
+        # The first part was used earlier for the y_axis naming
+        # We use the second part after "-" for the legends title (in this case example
+        # would be "sleef scalar")
+        legend = columnName.split("-")[1]
+        if "ref" in columnName and (ratio or speedup):
+            fig.add_trace(
+                go.Scatter(
+                    x=x_vector,
+                    y=[1 for x in x_vector],
+                    name=legend,
+                    line=dict(width=2, dash="dash"),
+                    mode="lines",
+                )
+            )
+            continue
+        fig.add_trace(go.Bar(name=legend, x=x_vector, y=columnData))
+
+    # Configure Title
+    # The graphtitle parameter passed on to this function should take the
+    # following format graph-{precision}-{accuracy}-{compiler}-{architecture}-{machine}
+    _, precision, accuracy, _, _, machine = graph_title.split("-")
+    long_acc = {"u10": "1ULP", "u35": "3.5ULP"}[accuracy]
+    fig.update_layout(
+        title=f"Comparison between system libm (GLIBC 2.35) and SLEEF performance<br>for {precision} precision {long_acc} functions on {machine}",
+        barmode="group",
+        xaxis_title="function name and interval",
+        yaxis_title=y_col,
+        legend_title="Variant",
+        width=800,
+        height=600,
+    )
+    return fig
+
+
+# Given an array of filtered dataframes (and potentially a similar style reference dataframe)
+# it merges all of them in a single dataframe with the information necessary to build the graph.
+# This dataframe has "Function Interval" information as indexes, and each column will
+# correspond to a performance quantity per variant (Example: Total Time - scalar)
+def plot_graph(
+    filtered_df_array,
+    ref_df,
+    y_col="Throughput (ns)",
+    saving_title="graph-pr-acc-comp-arch",
+):
+    coordinates_df_array = [extract_coordinates(df) for df in filtered_df_array]
+
+    # Use outer join operation ("pd.concat") to merge variant dataframes, as we are
+    # interested in showing performance in all functions supported.
+    # We substitute the resulting nan values by 0.
+    graph_df = pd.concat(coordinates_df_array, axis=1)
+    if graph_df.isna().any(axis=None):
+        print("Warning: join resulted in nan values")
+        graph_df = graph_df.fillna(0)
+
+    if not ref_df.empty:
+        # Use left join result dataframe with the reference dataframe, as we are interested
+        # in comparing performance with the functions present in the result dataframe, so
+        # any other function that is not present there, we are not interested to present
+        # in output graph.
+        # (No nan values should be produced)
+        coordinates_df_ref = extract_coordinates(ref_df, extra_legend="(ref)")
+        graph_df = pd.concat([graph_df, coordinates_df_ref], axis=1).reindex(
+            graph_df.index
+        )
+    return plot_graph_from_coordinates(graph_df, y_col=y_col, graph_title=saving_title)
diff --git a/src/libm-benchmarks/plot-tool/preprocessing.py b/src/libm-benchmarks/plot-tool/preprocessing.py
new file mode 100644
index 00000000..75594dcb
--- /dev/null
+++ b/src/libm-benchmarks/plot-tool/preprocessing.py
@@ -0,0 +1,69 @@
+import preprocessing_validators as ppv
+import pandas as pd
+
+# Convert raw results into organized dataframe
+#   1. Drop empty columns
+#   2. Reformulate columns in dataframe as (example):
+#         Fun: sin
+#         Variant: scalar:
+#         Precision: double
+#         Interval: [-6.28, 6.28]
+#         ULP: u10
+#         Total Time: 5ns
+#         Throughput: 5ns
+#         Compiler: gcc14
+#         Architecture: aarch64
+#         Library: sleef
+def raw_to_df(results_file):
+    library, architecture, compiler = ppv.filename_validator(results_file)
+    raw_df = pd.read_csv(results_file)
+    raw_df.dropna(how="all", axis=1, inplace=True)
+    intermediate_df = pd.DataFrame({})
+    intermediate_df["Function"] = raw_df["name"].apply(ppv.extract_fun_name)
+    intermediate_df["Variant"] = raw_df["name"].apply(ppv.extract_variant)
+    intermediate_df["Precision"] = raw_df["name"].apply(ppv.extract_precision)
+    intermediate_df["Interval"] = raw_df["name"].apply(ppv.extract_interval)
+    intermediate_df["ULP"] = raw_df["name"].apply(ppv.extract_ulp)
+    intermediate_df["Total Time"] = raw_df["real_time"]
+    intermediate_df["Throughput"] = raw_df["NSperEl"].apply(ppv.seconds_to_nanoseconds)
+    intermediate_df["Compiler"] = compiler
+    intermediate_df["Architecture"] = architecture
+    intermediate_df["Library"] = library
+    return intermediate_df
+
+
+# Filter entries that contain a fixed precision, accuracy and variant.
+# If keep_lower_interval is True, then it will also only keep one interval per
+# function. If the intervals are sorted in the results provided (recommended),
+# then it will only keep the lowest intervals per function.
+def filter_results(raw_df, precision, accuracy, variant, keep_lower_interval=False):
+    filtered_df = raw_df
+    filtered_df = filtered_df[filtered_df["Precision"] == precision]
+    filtered_df = filtered_df[filtered_df["ULP"] == accuracy]
+    filtered_df = filtered_df[filtered_df["Variant"] == variant]
+    if keep_lower_interval:
+        filtered_df.drop_duplicates(subset="Function", keep="first", inplace=True)
+    # Resetting index is important,
+    # otherwise they remain with same indexes as in original dataframe
+    filtered_df.reset_index(drop=True, inplace=True)
+    return filtered_df
+
+
+########################################################
+######################## TESTING #######################
+# python3 preprocessing.py                         #
+########################################################
+if __name__ == "__main__":
+    libm_gcc14_aarch64_df = raw_to_df("results-libm-gcc14-aarch64.csv")
+    sleef_gcc14_aarch64_df = raw_to_df("results-sleef-gcc14-aarch64.csv")
+    print(libm_gcc14_aarch64_df.head(3))
+    print(sleef_gcc14_aarch64_df.head(3))
+    libm_gcc14_aarch64_scalar_double_u10_df = filter_results(
+        libm_gcc14_aarch64_df, precision="double", accuracy="u10", variant="scalar"
+    )
+    print(libm_gcc14_aarch64_scalar_double_u10_df.head(3))
+
+    sleef_gcc14_aarch64_scalar_double_u10_df = filter_results(
+        sleef_gcc14_aarch64_df, precision="double", accuracy="u10", variant="scalar"
+    )
+    print(sleef_gcc14_aarch64_scalar_double_u10_df.head(3))
diff --git a/src/libm-benchmarks/plot-tool/preprocessing_validators.py b/src/libm-benchmarks/plot-tool/preprocessing_validators.py
new file mode 100644
index 00000000..c0cf66f6
--- /dev/null
+++ b/src/libm-benchmarks/plot-tool/preprocessing_validators.py
@@ -0,0 +1,160 @@
+import re
+
+# Set of options for which this result preprocessing and graph plotting
+# tool should work.
+compilers = ["gcc14", "llvm17"]
+architectures = ["aarch64", "x86"]
+variants = ["scalar", "vector128", "vector256", "vector512", "sve"]
+precisions = ["single", "double"]
+accuracies = ["u10", "u35"]
+libraries = ["sleef", "libm"]
+
+# Validation Functions
+# Ensure fields in dataframe belong to options defined above.
+def valid_compiler(compiler):
+    assert compiler in compilers
+    return compiler
+
+
+def valid_architecture(architecture):
+    assert architecture in architectures
+    return architecture
+
+
+def valid_variant(variant):
+    assert variant in variants
+    return variant
+
+
+def valid_precision(precision):
+    assert precision in precisions
+    return precision
+
+
+def valid_library(library):
+    assert library in libraries
+    return library
+
+
+def valid_accuracy(accuracy):
+    assert accuracy in accuracies
+    return accuracy
+
+
+# This function takes a give results filename and checks if it obbeys
+# the naming convention:  "results-<library>-<compiler>-<architecture>.csv"
+# Also checks if the options in the name are supported.
+# If it passes the checks, it returns the relevant component of the filename.
+# Example of a valid filename: "results-libm-gcc14-aarch64.csv"
+def filename_validator(result_filename):
+    result_filename = result_filename.split("/")[-1]
+    filename_components = result_filename.split("-")
+    assert len(filename_components) == 4
+    assert filename_components[0] == "results"
+    library = valid_library(filename_components[1])
+    compiler = valid_compiler(filename_components[2])
+    architecture_extension = filename_components[3].split(".")
+    assert len(architecture_extension) == 2
+    architecture = valid_architecture(architecture_extension[0])
+    extension = architecture_extension[1]
+    assert extension == "csv"
+    return library, architecture, compiler
+
+
+# This function takes a benchmark label and extrapolates the name of the library
+# the function belongs to.
+# Example "MB_Sleef_tandx_u35sve_sved_0_6.28" -> sleef
+def extract_lib(fun_name):
+    return valid_library(fun_name.split("_")[1].lower())
+
+
+# This function takes a benchmark label and extrapolates the name of the math function
+# (independent of what vector extension, interval etc...) the routine captures.
+# Example "MB_Sleef_tandx_u35sve_sved_0_6.28" -> tan
+def extract_fun_name(fun_name):
+    # Pass 1: Split by _
+    #    MB_Sleef_tandx_u35sve_sved_0_6.28 --> [MB, Sleef,tandx,u35sve,sved,0,6.28].
+    #    The element with index 2 contains the name we are interested: tandx
+    formatted_fun_name1 = fun_name.split("_")[2]
+    # Pass 2: Split by make sure we ignore the sufix usually present in last 3 characters
+    #    tandx --> [tan, '']
+    #    discard the last elemnt of this list
+    #    we discard the last element instad of keeping the first element in case the function
+    #    contains f and d in the name.
+    #    Also, this could be simplified if reverse split with regular experessions split was
+    #    supported with python
+    suffix_reg_exp = "fx|dx|f\d+|d\d+|f|d"
+    formatted_fun_name2 = re.split(suffix_reg_exp, formatted_fun_name1)
+    if len(formatted_fun_name2) == 1:
+        return formatted_fun_name2[0]
+    else:
+        return "".join(formatted_fun_name2[:-1])
+
+
+# This function takes a benchmark label and extrapolates the interval used to benchmark
+# the routine in question.
+# Example "MB_Sleef_tandx_u35sve_sved_0_6.28" -> (0, 6.28)
+def extract_interval(fun_name):
+    interval = fun_name.split("_")
+    return "[" + interval[-2] + "," + interval[-1] + "]"
+
+
+# This function takes a benchmark label and extrapolates the extension of the routine
+# Should be scalar, sve or vector
+# Example "MB_Sleef_tandx_u35sve_sved_0_6.28" -> sve
+def extract_variant(fun_name):
+    variant_w_precision = fun_name.split("_")[4]
+    variant = re.split("f|d", variant_w_precision)
+    return valid_variant("".join(variant))
+
+
+# This function takes a benchmark label and extrapolates if its a single precision or
+# double precision routine
+# Example "MB_Sleef_tandx_u35sve_sved_0_6.28" -> double precision
+# Works assuming extension names dont have f or d. If that becomes the case,
+# use regular expressions
+def extract_precision(fun_name):
+    extension_w_precision = fun_name.split("_")[4]
+    precision = None
+    if "f" in extension_w_precision:
+        precision = "single"
+    if "d" in extension_w_precision:
+        precision = "double"
+    return valid_precision(precision)
+
+
+# This function takes a benchmark label and extrapolates if its a single precision or
+# double precision routine
+# Example "MB_Sleef_tandx_u35sve_sved_0_6.28" -> double precision
+def extract_ulp(fun_name):
+    return valid_accuracy(fun_name.split("_")[3][:3])
+
+
+# This function takes a benchmark label and extrapolates if its a single precision or
+# double precision routine
+# Example "MB_Sleef_tandx_u35sve_sved_0_6.28" -> double precision
+def seconds_to_nanoseconds(seconds):
+    return seconds * 1e9
+
+
+########################################################
+######################## TESTING #######################
+# python3 preprocessing_validators.py                  #
+########################################################
+if __name__ == "__main__":
+    # Testing of functions above:
+    print(extract_lib("MB_Sleef_tandx_u35sve_sved_0_6.28"))
+    print(extract_lib("MB_libm_sin_scalard_0_1e28"))
+    print(extract_fun_name("MB_Sleef_tandx_u35sve_sved_0_6.28"))
+    print(extract_interval("MB_Sleef_tandx_u35sve_sved_0_1e+38"))
+    print(extract_interval("MB_Sleef_atan2d2_u10_vectord128_-10_10"))
+    print(extract_variant("MB_Sleef_tandx_u35sve_sved_0_1e+38"))
+    print(extract_variant("MB_Sleef_log10d2_u10_vectord128_0_1e100"))
+    print(extract_variant("MB_Sleef_tandx_u35sve_sved_0_1e+38"))
+    print(extract_ulp("MB_Sleef_tandx_u35sve_sved_0_1e+38"))
+    print(extract_ulp("MB_Sleef_tandx_u35sve_sved_0_1e+38"))
+    print(extract_fun_name("MB_libm_tan_u10_scalarf_0_1e+6"))
+    print(extract_interval("MB_libm_tan_u10_scalarf_0_1e+6"))
+    print(extract_variant("MB_libm_tan_u10_scalarf_0_1e+6"))
+    print(extract_ulp("MB_libm_tan_u10_scalarf_0_1e+6"))
+    filename_validator("results-libm-gcc14-aarch64.csv")
diff --git a/src/libm-benchmarks/plot-tool/requirements.txt b/src/libm-benchmarks/plot-tool/requirements.txt
new file mode 100644
index 00000000..a911896f
--- /dev/null
+++ b/src/libm-benchmarks/plot-tool/requirements.txt
@@ -0,0 +1,5 @@
+pandas
+plotly
+re
+pathlib
+argparse
\ No newline at end of file