From 64c300bced8e8a00278955bae84f5e2d71e199f3 Mon Sep 17 00:00:00 2001 From: CST18057 <1693590431@qq.com> Date: Thu, 31 Oct 2024 20:22:36 +0800 Subject: [PATCH] test std::generator(co_yield elements_of)/std::generator(for generator)/oeo::generator(no coroutine stack) performance at different depths --- CMakeLists.txt | 19 +- generator_recursive/generator.hpp | 131 +++++++++++ generator_recursive/main.cpp | 349 ++++++++++++++++++++++++++++++ generator_recursive/test.md | 31 +++ 4 files changed, 528 insertions(+), 2 deletions(-) create mode 100644 generator_recursive/generator.hpp create mode 100644 generator_recursive/main.cpp create mode 100644 generator_recursive/test.md diff --git a/CMakeLists.txt b/CMakeLists.txt index 740c9b6..ccb638f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ if (NOT EXISTS ${CMAKE_BINARY_DIR}/CMakeCache.txt) endif() set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -g -O3 -ffast-math -Wall -march=native -fno-omit-frame-pointer") + "${CMAKE_CXX_FLAGS} -g -O3 -ffast-math -Wall -march=native ") CPMAddPackage( NAME benchmark @@ -37,4 +37,19 @@ target_compile_features(bm_hamming_weight32 PRIVATE cxx_std_20) add_executable(bm_histgram "histgram/histgram.cc") target_link_libraries(bm_histgram benchmark) -target_compile_features(bm_histgram PRIVATE cxx_std_20) \ No newline at end of file +target_compile_features(bm_histgram PRIVATE cxx_std_20) + + +add_executable(bm_gen_recursive_short "generator_recursive/main.cpp") +add_executable(bm_gen_recursive_middle "generator_recursive/main.cpp") +add_executable(bm_gen_recursive_long "generator_recursive/main.cpp") +target_compile_definitions(bm_gen_recursive_short PRIVATE TEST_SHORT) +target_compile_definitions(bm_gen_recursive_middle PRIVATE TEST_MIDDLE) +target_compile_definitions(bm_gen_recursive_long PRIVATE TEST_LONG) +target_compile_options(bm_gen_recursive_long PRIVATE -fomit-frame-pointer) +target_link_libraries(bm_gen_recursive_short benchmark) +target_link_libraries(bm_gen_recursive_middle benchmark) +target_link_libraries(bm_gen_recursive_long benchmark) +target_compile_features(bm_gen_recursive_short PRIVATE cxx_std_23) +target_compile_features(bm_gen_recursive_middle PRIVATE cxx_std_23) +target_compile_features(bm_gen_recursive_long PRIVATE cxx_std_23) \ No newline at end of file diff --git a/generator_recursive/generator.hpp b/generator_recursive/generator.hpp new file mode 100644 index 0000000..1a73c0d --- /dev/null +++ b/generator_recursive/generator.hpp @@ -0,0 +1,131 @@ +// 此实现来自群友@OEOTYAN +#ifndef __GENERATOR_HPP__ +#define __GENERATOR_HPP__ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace oeo { + +template +struct generator { + using value = std::remove_cvref_t; + using reference = T&&; + using yielded = reference; + + + struct promise_type { + std::add_pointer_t ptr; + std::exception_ptr exception; + + generator get_return_object() noexcept { return generator{*this}; } + + std::suspend_always initial_suspend() noexcept { return {}; } + + std::suspend_always final_suspend() noexcept { return {}; } + + void unhandled_exception() noexcept { exception = std::current_exception(); } + + void rethrow() { + if (exception) { + std::rethrow_exception(exception); + } + } + std::suspend_always yield_value(yielded val) noexcept { + ptr = std::addressof(val); + return {}; + } + auto yield_value(std::remove_reference_t const& lval) + requires std::is_rvalue_reference_v && + std::constructible_from, std::remove_reference_t const&> + { + struct yield_copied { + std::remove_cvref_t storage; + constexpr yield_copied(std::remove_reference_t const& v) : storage(v) {} + constexpr bool await_ready() const noexcept { return false; } + constexpr void await_suspend(std::coroutine_handle h) noexcept { + h.promise().ptr = std::addressof(storage); + } + constexpr void await_resume() const noexcept {} + }; + return yield_copied{lval}; + } + void return_void() noexcept {} + + template + U&& await_transform(U&&) = delete; + }; + + struct iterator { + using iterator_category = std::input_iterator_tag; + using difference_type = ptrdiff_t; + using value_type = generator::value; + using reference = generator::reference; + using pointer = std::add_pointer_t; + + std::coroutine_handle handle = nullptr; + + iterator() = default; + explicit iterator(std::coroutine_handle handle) noexcept : handle(handle) {} + + iterator& operator++() { + handle.resume(); + if (handle.done()) { + std::exchange(handle, nullptr).promise().rethrow(); + } + + return *this; + } + + void operator++(int) { ++*this; } + + [[nodiscard]] bool operator==(iterator const& other) const noexcept { return handle == other.handle; } + + [[nodiscard]] bool operator!=(iterator const& other) const noexcept { return !(*this == other); } + + [[nodiscard]] reference operator*() const noexcept { return static_cast(*handle.promise().ptr); } + + [[nodiscard]] pointer operator->() const noexcept { return handle.promise().ptr; } + }; + [[nodiscard]] iterator begin() { + if (handle) { + handle.resume(); + if (handle.done()) { + handle.promise().rethrow(); + return {}; + } + } + return iterator{handle}; + } + + [[nodiscard]] iterator end() noexcept { return {}; } + + constexpr generator(generator&& other) noexcept : handle(std::exchange(other.handle, nullptr)) {} + + constexpr ~generator() { + if (handle) { + handle.destroy(); + } + } + constexpr generator() = default; + + generator& operator=(generator&& other) noexcept { + std::swap(other.handle, handle); + return *this; + } + +private: + constexpr explicit generator(promise_type& promise) noexcept + : handle(std::coroutine_handle::from_promise(promise)) {} + + std::coroutine_handle handle = nullptr; +}; +} // namespace oeo + +#endif /* __GENERATOR_HPP__ */ diff --git a/generator_recursive/main.cpp b/generator_recursive/main.cpp new file mode 100644 index 0000000..734bb3a --- /dev/null +++ b/generator_recursive/main.cpp @@ -0,0 +1,349 @@ +#include "benchmark/benchmark.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "generator.hpp" + +#ifdef TEST_SHORT +std::string_view sv{"test"}; +#elif defined (TEST_MIDDILE) +std::string_view sv{"test-test-test-test"}; +#elif defined (TEST_LONG) +std::string_view sv{"test-test-test-test-test-test-test-test-test-test-test-test"}; +#else +std::string_view sv{"test"}; +#endif + +template +std::generator gen1(std::string_view v) { + if constexpr (N > 0) { + co_yield std::ranges::elements_of(gen1(v)); + } else { + for (auto c : v) { + co_yield c; + } + } +} +template +std::generator gen2(std::string_view v) { + if constexpr (N > 0) { + for (auto c : gen2(v)) { + co_yield c; + } + } else { + for (auto c : v) { + co_yield c; + } + } +} +template +oeo::generator gen3(std::string_view v) { + if constexpr (N > 0) { + for (auto c : gen3(v)) { + co_yield c; + } + } else { + for (auto c : v) { + co_yield c; + } + } +} + +template +std::generator f_gen_elements_of(std::string_view v) { + static_assert(N > 0); + co_yield std::ranges::elements_of(gen1(v)); +} + +template +std::generator f_gen_for_gen(std::string_view v) { + static_assert(N > 0); + for (auto x : gen2(v)) { + co_yield x; + } +} + +template +oeo::generator f_oeo_gen_for_gen(std::string_view v) { + static_assert(N > 0); + for (auto x : gen3(v)) { + co_yield x; + } +} + +template +static void GenGen(benchmark::State& state) { + for (auto _ : state) { + int sum = 0; + benchmark::DoNotOptimize(sv); + benchmark::DoNotOptimize(sum); + if constexpr (N == 0) { + for (auto x : sv) { + sum += x; + benchmark::DoNotOptimize(x); + } + } else if constexpr (N == 1) { + for (auto x : gen1<0>(sv)) { + sum += x; + benchmark::DoNotOptimize(x); + } + } else { + for (auto x : f_gen_elements_of(sv)) { + sum += x; + benchmark::DoNotOptimize(x); + } + } + } +} +// Register the function as a benchmark +BENCHMARK_TEMPLATE(GenGen, 0); +BENCHMARK_TEMPLATE(GenGen, 1); +BENCHMARK_TEMPLATE(GenGen, 2); +BENCHMARK_TEMPLATE(GenGen, 3); +BENCHMARK_TEMPLATE(GenGen, 5); +BENCHMARK_TEMPLATE(GenGen, 10); +BENCHMARK_TEMPLATE(GenGen, 20); +BENCHMARK_TEMPLATE(GenGen, 50); +BENCHMARK_TEMPLATE(GenGen, 100); + +template +static void GenFor(benchmark::State& state) { + for (auto _ : state) { + int sum = 0; + benchmark::DoNotOptimize(sv); + benchmark::DoNotOptimize(sum); + if constexpr (N == 0) { + for (auto x : sv) { + sum += x; + benchmark::DoNotOptimize(x); + } + } else if constexpr (N == 1) { + for (auto x : gen2<0>(sv)) { + sum += x; + benchmark::DoNotOptimize(x); + } + } else { + for (auto x : f_gen_for_gen(sv)) { + sum += x; + benchmark::DoNotOptimize(x); + } + } + } +} +BENCHMARK_TEMPLATE(GenFor, 0); +BENCHMARK_TEMPLATE(GenFor, 1); +BENCHMARK_TEMPLATE(GenFor, 2); +BENCHMARK_TEMPLATE(GenFor, 3); +BENCHMARK_TEMPLATE(GenFor, 5); +BENCHMARK_TEMPLATE(GenFor, 10); +BENCHMARK_TEMPLATE(GenFor, 20); +BENCHMARK_TEMPLATE(GenFor, 50); +BENCHMARK_TEMPLATE(GenFor, 100); + +template +static void OEOGenFor(benchmark::State& state) { + for (auto _ : state) { + int sum = 0; + benchmark::DoNotOptimize(sv); + benchmark::DoNotOptimize(sum); + if constexpr (N == 0) { + for (auto x : sv) { + sum += x; + benchmark::DoNotOptimize(x); + } + } else if constexpr (N == 1) { + for (auto x : gen3<0>(sv)) { + sum += x; + benchmark::DoNotOptimize(x); + } + } else { + for (auto x : f_oeo_gen_for_gen(sv)) { + sum += x; + benchmark::DoNotOptimize(x); + } + } + } +} +BENCHMARK_TEMPLATE(OEOGenFor, 0); +BENCHMARK_TEMPLATE(OEOGenFor, 1); +BENCHMARK_TEMPLATE(OEOGenFor, 2); +BENCHMARK_TEMPLATE(OEOGenFor, 3); +BENCHMARK_TEMPLATE(OEOGenFor, 5); +BENCHMARK_TEMPLATE(OEOGenFor, 10); +BENCHMARK_TEMPLATE(OEOGenFor, 20); +BENCHMARK_TEMPLATE(OEOGenFor, 50); +BENCHMARK_TEMPLATE(OEOGenFor, 100); + +static void TrivialGenFor(benchmark::State& state) { + for (auto _ : state) { + int sum = 0; + benchmark::DoNotOptimize(sum); + for (auto x : sv) { + sum += x; + benchmark::DoNotOptimize(x); + } + benchmark::DoNotOptimize(sv); + } +} +BENCHMARK(TrivialGenFor); + +std::generator gen1(std::string_view v, size_t N) { + if (N > 0) { + co_yield std::ranges::elements_of(gen1(v, N - 1)); + } else { + for (auto c : v) { + co_yield c; + } + } +} +std::generator gen2(std::string_view v, size_t N) { + if (N > 0) { + for (auto c : gen2(v, N - 1)) { + co_yield c; + } + } else { + for (auto c : v) { + co_yield c; + } + } +} +oeo::generator gen3(std::string_view v, size_t N) { + if (N > 0) { + for (auto c : gen3(v, N - 1)) { + co_yield c; + } + } else { + for (auto c : v) { + co_yield c; + } + } +} + +std::generator f_gen_elements_of(std::string_view v, size_t N) { + co_yield std::ranges::elements_of(gen1(v, N - 1)); +} + +std::generator f_gen_for_gen(std::string_view v, size_t N) { + for (auto x : gen2(v, N - 1)) { + co_yield x; + } +} + +oeo::generator f_oeo_gen_for_gen(std::string_view v, size_t N) { + for (auto x : gen3(v, N - 1)) { + co_yield x; + } +} + +static void DynGenGen(benchmark::State& state) { + size_t N = state.range(0); + for (auto _ : state) { + int sum = 0; + benchmark::DoNotOptimize(sv); + benchmark::DoNotOptimize(sum); + if (N == 0) { + for (auto x : sv) { + sum += x; + benchmark::DoNotOptimize(x); + } + } else if (N == 1) { + for (auto x : gen1(sv, 0)) { + sum += x; + benchmark::DoNotOptimize(x); + } + } else { + for (auto x : f_gen_elements_of(sv, N - 1)) { + sum += x; + benchmark::DoNotOptimize(x); + } + } + } +} +// Register the function as a benchmark +BENCHMARK(DynGenGen)->Arg(0) +->Arg(1) +->Arg(2) +->Arg(3) +->Arg(5) +->Arg(10) +->Arg(20) +->Arg(50) +->Arg(100) +; + +static void DynGenFor(benchmark::State& state) { + size_t N = state.range(0); + for (auto _ : state) { + int sum = 0; + benchmark::DoNotOptimize(sv); + benchmark::DoNotOptimize(sum); + if (N == 0) { + for (auto x : sv) { + sum += x; + benchmark::DoNotOptimize(x); + } + } else if (N == 1) { + for (auto x : gen2(sv, 0)) { + sum += x; + benchmark::DoNotOptimize(x); + } + } else { + for (auto x : f_gen_for_gen(sv, N - 1)) { + sum += x; + benchmark::DoNotOptimize(x); + } + } + } +} +BENCHMARK(DynGenFor)->Arg(0) +->Arg(1) +->Arg(2) +->Arg(3) +->Arg(5) +->Arg(10) +->Arg(20) +->Arg(50) +->Arg(100) +; + +static void DynOEOGenFor(benchmark::State& state) { + size_t N = state.range(0); + for (auto _ : state) { + int sum = 0; + benchmark::DoNotOptimize(sv); + benchmark::DoNotOptimize(sum); + if (N == 0) { + for (auto x : sv) { + sum += x; + benchmark::DoNotOptimize(x); + } + } else if (N == 1) { + for (auto x : gen3(sv, 0)) { + sum += x; + benchmark::DoNotOptimize(x); + } + } else { + for (auto x : f_oeo_gen_for_gen(sv, N - 1)) { + sum += x; + benchmark::DoNotOptimize(x); + } + } + } +} +BENCHMARK(DynOEOGenFor)->Arg(0) +->Arg(1) +->Arg(2) +->Arg(3) +->Arg(5) +->Arg(10) +->Arg(20) +->Arg(50) +->Arg(100) +; +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/generator_recursive/test.md b/generator_recursive/test.md new file mode 100644 index 0000000..37d1b6d --- /dev/null +++ b/generator_recursive/test.md @@ -0,0 +1,31 @@ +std::string_view sv{"test"} +func short range | 0 | 1 | 2 | 3 | 5 | 10 | 20 | 50 | 100 +-|-|-|-|-|-|-|-|-|- +GenGen| 1.93| 160| 337| 445| 698| 1413| 2787| 10010| 25495 +GenFor| 1.84| 160| 335| 465| 785| 1569| 3530| 17264| 38365 +OEOGenFor| 2.20| 134| 278| 443| 698| 1444| 3139| 12870| 31495 +DynGenGen| 2.29| 153| 298| 459| 753| 1381| 5441| 9208| 20856 +DynGenFor| 1.90| 160| 318| 471| 802| 1611| 5999| 11161| 26681 +DynOEOGenFor| 2.25| 148| 285| 424| 711| 1535| 3209| 11161| 22949 + + +std::string_view sv{"test-test-test-test"} +func middle range | 0 | 1 | 2 | 3 | 5 | 10 | 20 | 50 | 100 +-|-|-|-|-|-|-|-|-|- +GenGen| 1.20| 68.0| 153| 200| 330| 711| 1395| 6278| 14648 +GenFor| 1.00| 69.8| 141| 220| 384| 802| 1883| 8161| 20856 +OEOGenFor| 1.17| 60.0| 128| 201| 330| 698| 1569| 7150| 16497 +DynGenGen| 1.12| 69.8| 135| 199| 322| 645| 3610| 5469| 15346 +DynGenFor| 0.921| 73.2| 153| 225| 385| 854| 4049| 9626| 19043 +DynOEOGenFor| 1.35| 67.0| 134| 205| 337| 725| 1611| 7150| 14439 + + +std::string_view sv{"test-test-test-test-test-test-test-test-test-test-test-test"}; +func long range | 0 | 1 | 2 | 3 | 5 | 10 | 20 | 50 | 100 +-|-|-|-|-|-|-|-|-|- +GenGen| 15.1| 262| 385| 443| 562| 921| 1569| 6138| 14439 +GenFor| 7.32| 251| 547| 942| 1726| 3683| 8894| 36830| 87887 +OEOGenFor| 13.8| 188| 2668| 2860| 3369| 4757| 8894| 32087| 71498 +DynGenGen| 8.02| 267| 361| 430| 600| 921| 3990| 5720| 14125 +DynGenFor| 7.95| 270| 562| 1001| 1726| 4708| 11963| 32087| 64523 +DynOEOGenFor| 9.28| 220| 430| 663| 1147| 2302| 6696| 26681| 51562 \ No newline at end of file