From 779fd5b4016593a1a78cdbf1438bb0e28d1608d9 Mon Sep 17 00:00:00 2001
From: Joren Dumoulin <joren.dumoulin@kuleuven.be>
Date: Thu, 5 Dec 2024 14:05:56 +0100
Subject: [PATCH 1/3] further changes

---
 benchmarks/dense_matmul/genbenchmark.py | 6 +++---
 benchmarks/tiled_matmul/genbenchmark.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/benchmarks/dense_matmul/genbenchmark.py b/benchmarks/dense_matmul/genbenchmark.py
index 5bea33d3..e6b6a4df 100644
--- a/benchmarks/dense_matmul/genbenchmark.py
+++ b/benchmarks/dense_matmul/genbenchmark.py
@@ -44,7 +44,7 @@ def create_matrix_multiply(m, n, k, add_c: bool = False):
 
     @Builder.implicit_region(arg_types)
     def func_body(args: tuple[BlockArgument, ...]) -> None:
-        c0 = arith.Constant.from_int_and_width(0, 32)
+        c0 = arith.ConstantOp.from_int_and_width(0, 32)
         empty_tensor = tensor.EmptyOp([], (arg_types[-1]))
         result = linalg.QuantizedMatmulOp(
             [args[0], args[1], c0.result, c0.result], [empty_tensor.tensor]
@@ -54,9 +54,9 @@ def func_body(args: tuple[BlockArgument, ...]) -> None:
             newresult = linalg.AddOp(
                 [args[2], result.results[0]], [empty_tensor_2.tensor]
             )
-            func.Return(newresult)
+            func.ReturnOp(newresult)
         else:
-            func.Return(result)
+            func.ReturnOp(result)
 
     function = func.FuncOp.from_region(
         "streamer_matmul", arg_types, res_types, func_body
diff --git a/benchmarks/tiled_matmul/genbenchmark.py b/benchmarks/tiled_matmul/genbenchmark.py
index 544cfd77..dc40aaf2 100644
--- a/benchmarks/tiled_matmul/genbenchmark.py
+++ b/benchmarks/tiled_matmul/genbenchmark.py
@@ -47,9 +47,9 @@ def get_2d_memref_type(typ, dim_one, dim_two, transpose=False):
     b = Block(arg_types=(input_types))
 
     with ImplicitBuilder(b) as (arg0, arg1, arg2):
-        c0 = arith.Constant.from_int_and_width(0, 32)
+        c0 = arith.ConstantOp.from_int_and_width(0, 32)
         linalg.QuantizedMatmulOp([arg0, arg1, c0.result, c0.result], [arg2])
-        func.Return()
+        func.ReturnOp()
 
     region = Region(b)
 

From dfba4836b8d3cf2a3fdd3f8772c2faa4cdbc9649 Mon Sep 17 00:00:00 2001
From: Joren Dumoulin <joren.dumoulin@kuleuven.be>
Date: Thu, 5 Dec 2024 15:07:04 +0100
Subject: [PATCH 2/3] different nb_cores

---
 benchmarks/dense_matmul/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/dense_matmul/Makefile b/benchmarks/dense_matmul/Makefile
index ada940b3..e38d7799 100644
--- a/benchmarks/dense_matmul/Makefile
+++ b/benchmarks/dense_matmul/Makefile
@@ -20,7 +20,7 @@ else
 REMOVE_MEMREF_COPY=
 endif
 
-SNAXOPTFLAGS = -p convert-linalg-to-kernel,insert-accfg-op{accelerator=snax_gemmx},dispatch-kernels,convert-linalg-to-stream,fuse-streaming-regions,stream-bufferize,snax-bufferize,alloc-to-global,set-memory-space,set-memory-layout{gemm_layout=${LAYOUT}},realize-memref-casts,${REMOVE_MEMREF_COPY}insert-sync-barrier,dispatch-regions{nb_cores=3},convert-stream-to-snax-stream,convert-linalg-to-accfg,test-add-mcycle-around-launch,convert-accfg-to-csr,snax-copy-to-dma,memref-to-snax,snax-to-func,snax-lower-mcycle,clear-memory-space
+SNAXOPTFLAGS = -p convert-linalg-to-kernel,insert-accfg-op{accelerator=snax_gemmx},dispatch-kernels,convert-linalg-to-stream,fuse-streaming-regions,stream-bufferize,snax-bufferize,alloc-to-global,set-memory-space,set-memory-layout{gemm_layout=${LAYOUT}},realize-memref-casts,${REMOVE_MEMREF_COPY}insert-sync-barrier,dispatch-regions{nb_cores=2},convert-stream-to-snax-stream,convert-linalg-to-accfg,test-add-mcycle-around-launch,convert-accfg-to-csr,snax-copy-to-dma,memref-to-snax,snax-to-func,snax-lower-mcycle,clear-memory-space
 
 GEN_DATA_OPTS += --m=${SIZE_M}
 GEN_DATA_OPTS += --n=${SIZE_N}

From 415477983de3233ede18516f93751fb205d39228 Mon Sep 17 00:00:00 2001
From: Joren Dumoulin <joren.dumoulin@kuleuven.be>
Date: Thu, 5 Dec 2024 15:52:15 +0100
Subject: [PATCH 3/3] remove too large sizes

---
 benchmarks/dense_matmul/genbenchmark.py | 58 -------------------------
 1 file changed, 58 deletions(-)

diff --git a/benchmarks/dense_matmul/genbenchmark.py b/benchmarks/dense_matmul/genbenchmark.py
index e6b6a4df..47e5f676 100644
--- a/benchmarks/dense_matmul/genbenchmark.py
+++ b/benchmarks/dense_matmul/genbenchmark.py
@@ -147,64 +147,6 @@ def output_log_benchmark(
 
     sizes = list(itertools.product(selected_dims, repeat=3))
 
-    # some other relevant neural network sizes:
-    nn_size = [
-        # m, n, k
-        # tiled small matrix sizes from LSTM
-        [16, 32, 512],
-        # tiled small matrix sizes from MobileNetV2
-        [448, 32, 32],
-        [8, 192, 32],
-        [8, 16, 16],
-        [224, 16, 192],
-        [8, 96, 16],
-        [64, 24, 96],
-        [8, 48, 24],
-        [56, 48, 16],
-        [8, 32, 144],
-        [56, 32, 32],
-        [200, 48, 16],
-        [200, 32, 64],
-        [200, 96, 16],
-        [200, 8, 384],
-        [200, 8, 96],
-        [56, 576, 16],
-        [8, 160, 576],
-        [56, 48, 160],
-        [8, 960, 16],
-        [56, 64, 960],
-        [56, 64, 320],
-        [8, 40, 1280],
-        # tiled small matrix sizes from ResNet18
-        [8, 32, 152],
-        [8, 64, 576],
-        [8, 128, 576],
-        [112, 128, 128],
-        [56, 32, 64],
-        [40, 64, 1152],
-        [200, 64, 192],
-        [200, 32, 128],
-        [56, 8, 576],
-        [56, 8, 512],
-        [56, 128, 256],
-        [8, 200, 512],
-        # tiled small matrix sizes from Vision-Transformer
-        [40, 96, 768],
-        [40, 200, 64],
-        [200, 64, 200],
-        [40, 8, 768],
-        [8, 128, 192],
-        [8, 40, 768],
-        # tiled small matrix sizes from I-BERTBase
-        [32, 64, 768],
-        [8, 512, 64],
-        [32, 64, 512],
-        [128, 8, 768],
-        [128, 8, 792],
-        [128, 88, 192],
-    ]
-    sizes += nn_size
-
     output_report: dict[str, dict] = {}
 
     for size, layout, add_c in itertools.product(