Skip to content

Commit

Permalink
Use PyTorch Benchmarking Timer by default with pytest-benchmark (Ligh…
Browse files Browse the repository at this point in the history
  • Loading branch information
IvanYashchuk authored May 30, 2024
1 parent ebe1326 commit 27158e6
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 45 deletions.
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ addopts = [
"--doctest-modules",
"--color=yes",
"--disable-pytest-warnings",
"--benchmark-timer=torch.utils.benchmark.utils.timer.timer",
"--benchmark-warmup=on",
]
markers = [
"standalone: mark a test as standalone",
Expand Down
1 change: 1 addition & 0 deletions requirements/test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
coverage ==7.5.1
pytest ==8.1.1
pytest-benchmark ==4.0.0
pytest-timeout ==2.3.1
pytest-cov ==4.1.0
pytest-xdist ==3.6.1
Expand Down
2 changes: 0 additions & 2 deletions thunder/benchmarks/einsum.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
grad_executors_ids,
thunder_gradv1,
thunder_torchcompile_gradv1,
wrap_for_benchmark,
)


Expand All @@ -33,7 +32,6 @@ def _instantiate_benchmark_env(

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

return setup, fn

Expand Down
44 changes: 1 addition & 43 deletions thunder/benchmarks/targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import os
import torch
import thunder
from thunder.core.transforms import grad, clear_grads, populate_grads, get_grad, put_grad, put_grads
from thunder.core.transforms import grad, clear_grads, populate_grads
from thunder.core.interpreter import interpret

from thunder.benchmarks import (
Expand All @@ -28,15 +28,13 @@
torch_executor,
torch_compile_executor,
thunder_executor,
thunder_torch_executor,
thunder_torch_compile_executor,
thunder_apex_executor,
thunder_apex_nvfuser_executor,
thunder_cudnn_executor,
thunder_cudnn_nvfuser_executor,
thunder_cudnn_layer_norm_executor,
thunder_cudnn_layer_norm_nvfuser_executor,
thunder_sdpa_executor,
thunder_sdpa_torch_compile_nvfuser_executor,
BatchNormBenchmark,
)
Expand Down Expand Up @@ -69,16 +67,6 @@ def setup():
return setup


def wrap_for_benchmark(fn):
@wraps(fn)
def fn_(*args, **kwargs):
result = fn(*args, **kwargs)
torch.cuda.synchronize()
return result

return fn_


def torch_fwd(b: Benchmark):
module = b.fn()
fn_ = torch_executor(module)
Expand Down Expand Up @@ -349,7 +337,6 @@ def test_nanogpt_gelu_fwd(benchmark, executor: Callable):

setup = make_setup(gelu_bench)
fn = executor(gelu_bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=40, warmup_rounds=1)

Expand All @@ -366,7 +353,6 @@ def test_nanogpt_gelu_grad(benchmark, executor: Callable):

setup = make_setup(gelu_bench)
fn = executor(gelu_bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=40, warmup_rounds=1)

Expand All @@ -383,7 +369,6 @@ def test_batch_norm_fwd(benchmark, executor: Callable):

setup = make_setup(bn_bench)
fn = executor(bn_bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=40, warmup_rounds=1)

Expand All @@ -404,7 +389,6 @@ def test_batch_norm_grad(benchmark, executor: Callable):

setup = make_setup(bn_bench)
fn = executor(bn_bench)
fn = wrap_for_benchmark(fn)
benchmark.pedantic(fn, setup=setup, rounds=200, warmup_rounds=20)


Expand All @@ -426,7 +410,6 @@ def test_nanogpt_cross_entropy_fwd(benchmark, executor: None | Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=20, warmup_rounds=1)

Expand All @@ -449,7 +432,6 @@ def test_nanogpt_cross_entropy_grad(benchmark, executor: None | Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=20, warmup_rounds=1)

Expand All @@ -472,7 +454,6 @@ def test_nanogpt_layer_norm_fwd(benchmark, executor: None | Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=20, warmup_rounds=1)

Expand All @@ -490,7 +471,6 @@ def test_nanogpt_sdpa_fwd(benchmark, executor: None | Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=20, warmup_rounds=1)

Expand All @@ -508,7 +488,6 @@ def test_nanogpt_sdpa_grad(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=20, warmup_rounds=1)

Expand All @@ -525,7 +504,6 @@ def test_llama2_7b_sdpa_grad(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=40, warmup_rounds=1)

Expand Down Expand Up @@ -574,7 +552,6 @@ def test_litgpt_sdpa_grad(benchmark, executor: Callable, bs, config):

setup = make_setup(bench)
fn = thunder_fwd_bwd(bench, compile_fn=executor)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=40, warmup_rounds=1)

Expand All @@ -591,7 +568,6 @@ def test_nanogpt_mlp_fwd(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=40, warmup_rounds=1)

Expand All @@ -608,7 +584,6 @@ def test_nanogpt_mlp_grad(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=40, warmup_rounds=1)

Expand All @@ -629,7 +604,6 @@ def test_nanogpt_csa_fwd(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=20, warmup_rounds=1)

Expand All @@ -650,7 +624,6 @@ def test_nanogpt_csa_grad(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=20, warmup_rounds=1)

Expand All @@ -668,7 +641,6 @@ def test_nanogpt_block_fwd(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=20, warmup_rounds=1)

Expand All @@ -686,7 +658,6 @@ def test_nanogpt_block_grad(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=20, warmup_rounds=1)

Expand All @@ -708,7 +679,6 @@ def test_nanogpt_gpt2_fwd(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=5, warmup_rounds=1)

Expand All @@ -730,7 +700,6 @@ def test_nanogpt_gpt2_grad(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=5, warmup_rounds=1)

Expand All @@ -751,7 +720,6 @@ def test_nanogpt_gpt2xl_fwd(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=5, warmup_rounds=1)

Expand All @@ -773,7 +741,6 @@ def test_nanogpt_gpt2xl_grad(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=5, warmup_rounds=1)

Expand All @@ -792,7 +759,6 @@ def test_open_llama_7b_fwd(benchmark, executor: Callable):

setup = make_setup(b)
fn = executor(b)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=5, warmup_rounds=1)

Expand All @@ -806,7 +772,6 @@ def test_llama_2_7b_hf_fwd(benchmark, executor: Callable):

setup = make_setup(b)
fn = executor(b)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=5, warmup_rounds=1)

Expand All @@ -828,7 +793,6 @@ def test_llama_2_7b_grad(benchmark, executor: Callable):

setup = make_setup(b)
fn = executor(b)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=20, warmup_rounds=1)

Expand All @@ -845,7 +809,6 @@ def test_llama2_mlp_7b_grad(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=40, warmup_rounds=1)

Expand All @@ -862,7 +825,6 @@ def test_llama2_causal_self_attention_7b_grad(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=40, warmup_rounds=1)

Expand All @@ -879,7 +841,6 @@ def test_llama2_7b_rmsnorm_grad(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=40, warmup_rounds=1)

Expand Down Expand Up @@ -968,7 +929,6 @@ def test_litgpt_qkv_split_rope_train_forward(benchmark, executor: Callable, use_

setup = make_setup(bench)
fn = executor(bench.fn())
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=40, warmup_rounds=1)

Expand Down Expand Up @@ -1032,7 +992,6 @@ def test_litgpt_qkv_split_rope_train_backward(benchmark, executor: Callable, use

fw_setup = make_setup(bench)
fn, bw_setup = backward_only(bench.fn(), executor, fw_setup)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=bw_setup, rounds=40, warmup_rounds=1)

Expand All @@ -1054,6 +1013,5 @@ def test_interpreter_nanogpt_gpt2_fwd(benchmark, executor: Callable):

setup = make_setup(bench)
fn = executor(bench)
fn = wrap_for_benchmark(fn)

benchmark.pedantic(fn, setup=setup, rounds=5, warmup_rounds=1)

0 comments on commit 27158e6

Please sign in to comment.