add benchmark outputs (#268)

* add output generation to snax benchmarks * better formatting * small fixes * update workflow * update workflow * install git lfs * add viz for plotting * update workflow * updat workflow * add workflow * maybe not force? * do not persist credentials * update * push * update * try to use token * typo * do not persist again * try * final try * jorenbot * regular joren * fuck this shit * re-enable all sizes * don't delete everything every new run * do not run on pr anymore * add tar.gz to gitattributes * artifact * do not checkout branch * delete lfs stuff * only one gitignore * do not run on pr anymore
KULeuven-MICAS · Oct 2, 2024 · b8c209a · b8c209a
1 parent 727b089
commit b8c209a
Show file tree

Hide file tree

Showing 11 changed files with 110 additions and 33 deletions.
diff --git a/.gitattributes b/.gitattributes
diff --git a/.github/workflows/run-benchmarks.yml b/.github/workflows/run-benchmarks.yml
@@ -11,16 +11,18 @@ jobs:
     container:
       image: ghcr.io/kuleuven-micas/snax:v0.2.2
     steps:
-      - uses: actions/checkout@v3
+      - name: Checkout repository
+        uses: actions/checkout@v4
       - name: Install snax-mlir
-        run: python3 -m pip install '-e.[dev]'
+        run: python3 -m pip install '-e.[dev,viz]'
       - name: Run benchmarks
         run: python3 genbenchmark.py
         working-directory: benchmarks/${{ matrix.kernel }}
-      - uses: actions/upload-artifact@v4
+      - name: Upload Artifact
+        uses: actions/upload-artifact@v4
         with:
-          name: output_report
-          path: benchmarks/${{ matrix.kernel }}/output_report.txt
+          name: ${{ matrix.kernel }}
+          path: benchmarks/${{ matrix.kernel }}/output
     strategy:
       matrix:
         kernel: [dense_matmul]
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,12 @@
 # snax stuff
 *logs*
+*results*
+*output*
+*generated*
+*.png
+*.pdf
+*.vcd
+*.csv
 
 .env
 .venv

diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore
diff --git a/benchmarks/dense_matmul/Makefile b/benchmarks/dense_matmul/Makefile
@@ -30,7 +30,13 @@ data.c data.h:
 
 sim_%: %
 	rm -fr ./logs/
-	$(VLTSIM) $<
+	$(VLTSIM) $< --vcd
+
+sim.csv: sim.vcd
+	vcd-to-csv
+
+plots: sim.csv
+	$(PYTHON) ../../util/visualization/gemmx.py --input_file sim.csv --output_path ./
 
 RUN = $(addprefix run_, $(TESTS))
 $(RUN): run_%: sim_%
@@ -41,4 +47,4 @@ all: $(TESTS)
 allrun: $(RUN)
 
 clean:
-	rm -fr *.ll12 *.x *.o *.logs/ logs/ data.h data.c
+	rm -fr *.ll12 *.x *.o *.logs/ logs/ data.h data.c *.png *.pdf *.vcd *.csv
diff --git a/benchmarks/dense_matmul/genbenchmark.py b/benchmarks/dense_matmul/genbenchmark.py
@@ -1,6 +1,7 @@
 import itertools
 import json
 import pathlib
+from datetime import datetime
 from io import StringIO
 
 from xdsl.builder import ImplicitBuilder
@@ -65,22 +66,57 @@ def write_module_to_file(module, file):
         output_file.write(output.getvalue())
 
 
-def generate_tiled_benchmark(m, n, k) -> SNAXBenchmark:
+def generate_dense_benchmark(m, n, k) -> SNAXBenchmark:
     module = create_matrix_multiply(k, m, n)
     write_module_to_file(module, "generated.mlir")
     binary = "generated.x"
     bm = SNAXBenchmark(
-        kernel=f"tiled_matmul_generated_{k}x{n}x{m}",
+        kernel=f"dense_matmul_generated_{k}x{n}x{m}",
         binary=binary,
         src_dir=str(pathlib.Path.cwd()),
         export_dir=str(pathlib.Path.cwd()),
+        output_dir=str(pathlib.Path.cwd()),
     )
     return bm
 
 
+def output_log() -> str:
+    result = "# Dense Matmul Benchmark Results\n\n"
+    dt_string = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
+    result += f"This test was run at {dt_string}\n\n"
+    result += "| benchmark | cycles | ideal | utilization |\n"
+    result += "| --- | --- | --- | --- |\n"
+    avg_utilization = 0
+    avg_n = 0
+    for benchmark in output_report:
+        result += f"| [{benchmark}]({benchmark}) "
+        result += f"| {output_report[benchmark]['cycles']} "
+        result += f"| {output_report[benchmark]['ideal']} "
+        result += f"| {output_report[benchmark]['utilization']} | \n"
+        avg_utilization += output_report[benchmark]["utilization"]
+        avg_n += 1
+    result += "| average | | |"
+    result += f"{avg_utilization/avg_n} |\n"
+    return result
+
+
+def output_log_benchmark(benchmark_name: str, utilization: dict[str, int]) -> str:
+    result: str = ""
+    result += f"# results for {benchmark_name}\n\n"
+    dt_string = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
+    result += f"This test was run at {dt_string}\n\n"
+    result += f"Utilization: {utilization['utilization']}\n\n"
+    result += f" ({utilization['ideal']} cycles ideal, {utilization['cycles']} cycles real)\n\n"
+    result += "[view banking conflicts plot](figures/banking_conflicts.pdf)\n\n"
+    result += f"[dowload logs and binaries that generated this result]({benchmark_name}_results.tar.gz)\n\n"
+    result += "![conflicts_bank](figures/nb_of_stalls_per_bank.png)\n\n"
+    result += "![conflicts_port](figures/nb_of_stalls_per_port.png)\n\n"
+    return result
+
+
 if __name__ == "__main__":
     """Runs the gendata.py script with specified arguments."""
-    selected_dims = [32, 48, 64]
+    selected_dims = [32]
 
     sizes = list(itertools.product(selected_dims, repeat=3))
 
@@ -89,7 +125,7 @@ def generate_tiled_benchmark(m, n, k) -> SNAXBenchmark:
     for size in sizes:
         k, m, n = size
         folder = f"test_generated_{k}x{m}x{m}"
-        bm = generate_tiled_benchmark(k, m, n)
+        bm = generate_dense_benchmark(k, m, n)
         bm.clean()
         bm.build(
             build_opts=[
@@ -101,9 +137,12 @@ def generate_tiled_benchmark(m, n, k) -> SNAXBenchmark:
         )
         bm.run()
         bm.trace()
+        bm.plot()
         bm.process_traces(folder)
         bm.copy_binary(folder)
         bm.copy_logs(folder)
+        bm.copy_plots()
+        bm.copy_results()
 
         # add to output report
         trace = bm.log_dir.joinpath(bm.input_file.format(hart="00000"))
@@ -112,24 +151,14 @@ def generate_tiled_benchmark(m, n, k) -> SNAXBenchmark:
         cycles = data[1]["cycles"]
         ideal = round((k / 8 + 1) * (m / 8) * (n / 8))
         utilization = ideal / cycles
-        output_report[bm.benchmark] = {
+        utilization = {
             "cycles": cycles,
             "ideal": ideal,
             "utilization": utilization,
         }
+        output_report[bm.benchmark] = utilization
+
+        bm.generate_output_log(lambda name: output_log_benchmark(name, utilization))
 
-    with open("output_report.txt", "w") as file:
-        file.write("benchmark\tcycles\tideal\tutilization\t\n")
-        avg_utilization = 0
-        avg_n = 0
-        for benchmark in output_report:
-            file.write(f"{benchmark}\t")
-            file.write(f"{output_report[benchmark]['cycles']}\t")
-            file.write(f"{output_report[benchmark]['ideal']}\t")
-            file.write(f"{output_report[benchmark]['utilization']}\t")
-            file.write("\n")
-            avg_utilization += output_report[benchmark]["utilization"]
-            avg_n += 1
-        file.write("--------------------------\n")
-        file.write("average\t\t\t")
-        file.write(f"{avg_utilization/avg_n}\t\n")
+        with open("output/index.md", "w") as file:
+            file.write(output_log())
diff --git a/benchmarks/tiled_matmul/genbenchmark.py b/benchmarks/tiled_matmul/genbenchmark.py
@@ -93,6 +93,7 @@ def generate_tiled_benchmark(m, n, k, tiling_factors) -> SNAXBenchmark:
         binary=binary,
         src_dir=str(pathlib.Path.cwd()),
         export_dir=str(pathlib.Path.cwd()),
+        output_dir=str(pathlib.Path.cwd()),
     )
     return bm
 

diff --git a/kernels/streamer_matmul/genbenchmark.py b/kernels/streamer_matmul/genbenchmark.py
@@ -10,6 +10,7 @@ def run_all(binary: str, folder: str):
             binary=binary,
             src_dir=str(pathlib.Path.cwd()),
             export_dir=str(pathlib.Path.cwd()),
+            output_dir=str(pathlib.Path.cwd()),
         )
         bm.clean()
         bm.build(build_opts=[])

diff --git a/kernels/tiled_add/genbenchmark.py b/kernels/tiled_add/genbenchmark.py
@@ -15,6 +15,7 @@ def run_all(binary: str, folder: str):
             binary=binary,
             src_dir=str(pathlib.Path.cwd()),
             export_dir=str(pathlib.Path.cwd()),
+            output_dir=str(pathlib.Path.cwd()),
         )
         bm.clean()
         bm.build(build_opts=[*SIZES])
@@ -31,6 +32,7 @@ def run_all(binary: str, folder: str):
             binary=binary,
             src_dir=str(pathlib.Path.cwd()),
             export_dir=str(pathlib.Path.cwd()),
+            output_dir=str(pathlib.Path.cwd()),
         )
         bm.clean()
         bm.build(build_opts=[*SIZES, "ACCFGOPT=1"])

diff --git a/tests/benchmark/test_snax_benchmark.py b/tests/benchmark/test_snax_benchmark.py
@@ -11,6 +11,7 @@ def test_snax_benchmark_runner():
         binary="untiled.acc_dialect.x",
         src_dir=str(this_file.parent / ".." / ".." / "kernels" / "tiled_add" / ""),
         export_dir=str(this_file.parent),
+        output_dir=str(this_file.parent),
     )
     bm.clean()
     bm.build(build_opts=["ARRAY_SIZE=128", "TILE_SIZE=16", "NO_CHECK=1"])

diff --git a/util/snax_benchmark.py b/util/snax_benchmark.py
@@ -1,6 +1,7 @@
 import pathlib
 import shutil
 import subprocess
+from collections.abc import Callable
 
 from util.tracing.trace_to_perfetto import process_traces
 
@@ -14,7 +15,10 @@ def __init__(
         kernel: str,
         binary: str,
         src_dir: str,
+        # export dir: for all results (useful for manual inspection)
         export_dir: str,
+        # output dir: for all benchmark outputs (for docs generation)
+        output_dir: str,
         benchmark: str | None = None,
     ):
         self.kernel = kernel
@@ -26,6 +30,7 @@ def __init__(
         else:
             self.benchmark = benchmark
         self.export_dir = export_dir / pathlib.Path("results") / self.benchmark
+        self.output_dir = output_dir / pathlib.Path("output") / self.benchmark
 
     def announce(self, string) -> None:
         str_len = len(string) + len(self.benchmark) + 5
@@ -50,6 +55,10 @@ def trace(self):
         self.announce("Tracing benchmark")
         subprocess.run(["make", "traces"], cwd=self.src_dir, check=True)
 
+    def plot(self):
+        self.announce("Generating plots")
+        subprocess.run(["make", "plots"], cwd=self.src_dir, check=True)
+
     def process_traces(self, folder: str, file=None):
         self.announce("Processing Traces")
         dst_folder = self.export_dir / pathlib.Path(folder)
@@ -74,6 +83,14 @@ def process_traces(self, folder: str, file=None):
         )
         output_events.close()
 
+    def generate_output_log(self, generator: Callable[[str], str]) -> None:
+        self.announce("Generating output log")
+        output_folder = pathlib.Path(self.output_dir)
+        if not output_folder.exists():
+            output_folder.mkdir(parents=True)
+        with open(output_folder / "index.md", "w") as f:
+            f.write(generator(self.benchmark))
+
     def copy_binary(self, folder: str):
         self.announce("Copying binary")
         dst_folder = pathlib.Path(self.export_dir / folder)
@@ -86,3 +103,20 @@ def copy_logs(self, folder: str):
         shutil.copytree(
             src=self.log_dir, dst=self.export_dir / folder, dirs_exist_ok=True
         )
+
+    def copy_plots(self):
+        self.announce("Copying plots to output folder")
+        plot_filenames = tuple(self.src_dir.glob("*.png"))
+        plot_filenames += tuple(self.src_dir.glob("*.pdf"))
+        output_folder = pathlib.Path(self.output_dir) / "figures"
+        if not output_folder.exists():
+            output_folder.mkdir(parents=True)
+        for plot in plot_filenames:
+            shutil.copy(src=plot, dst=output_folder)
+
+    def copy_results(self):
+        self.announce("Copying results to output folder")
+        archive_path = shutil.make_archive(
+            self.benchmark + "_results", "gztar", self.export_dir
+        )
+        shutil.move(archive_path, self.output_dir)