From 779fd5b4016593a1a78cdbf1438bb0e28d1608d9 Mon Sep 17 00:00:00 2001 From: Joren Dumoulin Date: Thu, 5 Dec 2024 14:05:56 +0100 Subject: [PATCH 1/3] further changes --- benchmarks/dense_matmul/genbenchmark.py | 6 +++--- benchmarks/tiled_matmul/genbenchmark.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/benchmarks/dense_matmul/genbenchmark.py b/benchmarks/dense_matmul/genbenchmark.py index 5bea33d3..e6b6a4df 100644 --- a/benchmarks/dense_matmul/genbenchmark.py +++ b/benchmarks/dense_matmul/genbenchmark.py @@ -44,7 +44,7 @@ def create_matrix_multiply(m, n, k, add_c: bool = False): @Builder.implicit_region(arg_types) def func_body(args: tuple[BlockArgument, ...]) -> None: - c0 = arith.Constant.from_int_and_width(0, 32) + c0 = arith.ConstantOp.from_int_and_width(0, 32) empty_tensor = tensor.EmptyOp([], (arg_types[-1])) result = linalg.QuantizedMatmulOp( [args[0], args[1], c0.result, c0.result], [empty_tensor.tensor] @@ -54,9 +54,9 @@ def func_body(args: tuple[BlockArgument, ...]) -> None: newresult = linalg.AddOp( [args[2], result.results[0]], [empty_tensor_2.tensor] ) - func.Return(newresult) + func.ReturnOp(newresult) else: - func.Return(result) + func.ReturnOp(result) function = func.FuncOp.from_region( "streamer_matmul", arg_types, res_types, func_body diff --git a/benchmarks/tiled_matmul/genbenchmark.py b/benchmarks/tiled_matmul/genbenchmark.py index 544cfd77..dc40aaf2 100644 --- a/benchmarks/tiled_matmul/genbenchmark.py +++ b/benchmarks/tiled_matmul/genbenchmark.py @@ -47,9 +47,9 @@ def get_2d_memref_type(typ, dim_one, dim_two, transpose=False): b = Block(arg_types=(input_types)) with ImplicitBuilder(b) as (arg0, arg1, arg2): - c0 = arith.Constant.from_int_and_width(0, 32) + c0 = arith.ConstantOp.from_int_and_width(0, 32) linalg.QuantizedMatmulOp([arg0, arg1, c0.result, c0.result], [arg2]) - func.Return() + func.ReturnOp() region = Region(b) From dfba4836b8d3cf2a3fdd3f8772c2faa4cdbc9649 Mon Sep 17 00:00:00 2001 From: Joren Dumoulin Date: Thu, 5 Dec 2024 15:07:04 +0100 Subject: [PATCH 2/3] different nb_cores --- benchmarks/dense_matmul/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/dense_matmul/Makefile b/benchmarks/dense_matmul/Makefile index ada940b3..e38d7799 100644 --- a/benchmarks/dense_matmul/Makefile +++ b/benchmarks/dense_matmul/Makefile @@ -20,7 +20,7 @@ else REMOVE_MEMREF_COPY= endif -SNAXOPTFLAGS = -p convert-linalg-to-kernel,insert-accfg-op{accelerator=snax_gemmx},dispatch-kernels,convert-linalg-to-stream,fuse-streaming-regions,stream-bufferize,snax-bufferize,alloc-to-global,set-memory-space,set-memory-layout{gemm_layout=${LAYOUT}},realize-memref-casts,${REMOVE_MEMREF_COPY}insert-sync-barrier,dispatch-regions{nb_cores=3},convert-stream-to-snax-stream,convert-linalg-to-accfg,test-add-mcycle-around-launch,convert-accfg-to-csr,snax-copy-to-dma,memref-to-snax,snax-to-func,snax-lower-mcycle,clear-memory-space +SNAXOPTFLAGS = -p convert-linalg-to-kernel,insert-accfg-op{accelerator=snax_gemmx},dispatch-kernels,convert-linalg-to-stream,fuse-streaming-regions,stream-bufferize,snax-bufferize,alloc-to-global,set-memory-space,set-memory-layout{gemm_layout=${LAYOUT}},realize-memref-casts,${REMOVE_MEMREF_COPY}insert-sync-barrier,dispatch-regions{nb_cores=2},convert-stream-to-snax-stream,convert-linalg-to-accfg,test-add-mcycle-around-launch,convert-accfg-to-csr,snax-copy-to-dma,memref-to-snax,snax-to-func,snax-lower-mcycle,clear-memory-space GEN_DATA_OPTS += --m=${SIZE_M} GEN_DATA_OPTS += --n=${SIZE_N} From 415477983de3233ede18516f93751fb205d39228 Mon Sep 17 00:00:00 2001 From: Joren Dumoulin Date: Thu, 5 Dec 2024 15:52:15 +0100 Subject: [PATCH 3/3] remove too large sizes --- benchmarks/dense_matmul/genbenchmark.py | 58 ------------------------- 1 file changed, 58 deletions(-) diff --git a/benchmarks/dense_matmul/genbenchmark.py b/benchmarks/dense_matmul/genbenchmark.py index e6b6a4df..47e5f676 100644 --- a/benchmarks/dense_matmul/genbenchmark.py +++ b/benchmarks/dense_matmul/genbenchmark.py @@ -147,64 +147,6 @@ def output_log_benchmark( sizes = list(itertools.product(selected_dims, repeat=3)) - # some other relevant neural network sizes: - nn_size = [ - # m, n, k - # tiled small matrix sizes from LSTM - [16, 32, 512], - # tiled small matrix sizes from MobileNetV2 - [448, 32, 32], - [8, 192, 32], - [8, 16, 16], - [224, 16, 192], - [8, 96, 16], - [64, 24, 96], - [8, 48, 24], - [56, 48, 16], - [8, 32, 144], - [56, 32, 32], - [200, 48, 16], - [200, 32, 64], - [200, 96, 16], - [200, 8, 384], - [200, 8, 96], - [56, 576, 16], - [8, 160, 576], - [56, 48, 160], - [8, 960, 16], - [56, 64, 960], - [56, 64, 320], - [8, 40, 1280], - # tiled small matrix sizes from ResNet18 - [8, 32, 152], - [8, 64, 576], - [8, 128, 576], - [112, 128, 128], - [56, 32, 64], - [40, 64, 1152], - [200, 64, 192], - [200, 32, 128], - [56, 8, 576], - [56, 8, 512], - [56, 128, 256], - [8, 200, 512], - # tiled small matrix sizes from Vision-Transformer - [40, 96, 768], - [40, 200, 64], - [200, 64, 200], - [40, 8, 768], - [8, 128, 192], - [8, 40, 768], - # tiled small matrix sizes from I-BERTBase - [32, 64, 768], - [8, 512, 64], - [32, 64, 512], - [128, 8, 768], - [128, 8, 792], - [128, 88, 192], - ] - sizes += nn_size - output_report: dict[str, dict] = {} for size, layout, add_c in itertools.product(