From fcfc545b09e3313963b51e6368ae67f2c57f5daf Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Tue, 31 Dec 2019 03:45:35 -0800 Subject: [PATCH 1/8] conf.py update + MT fix for storage --- docs/conf.py | 4 +--- source/timemory/utility/impl/storage_true.hpp | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 0516fba47..c65d6e59e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -24,7 +24,7 @@ def install(package): # -- Project information ----------------------------------------------------- -project = 'TiMemory' +project = 'timemory' copyright = '2019, Jonathan R. Madsen' author = 'Jonathan R. Madsen' @@ -41,8 +41,6 @@ def install(package): if not os.path.exists(_bindir): os.makedirs(_bindir) - -if not os.path.exists(_doxdir): os.chdir(_bindir) sp.run(["cmake", "-DTIMEMORY_BUILD_DOCS=ON", "-DENABLE_DOXYGEN_HTML_DOCS=ON", diff --git a/source/timemory/utility/impl/storage_true.hpp b/source/timemory/utility/impl/storage_true.hpp index 64e43743f..047f77b68 100644 --- a/source/timemory/utility/impl/storage_true.hpp +++ b/source/timemory/utility/impl/storage_true.hpp @@ -837,6 +837,7 @@ void storage::merge(this_type* itr) { using pre_order_iterator = typename graph_t::pre_order_iterator; + using sibling_iterator = typename graph_t::sibling_iterator; // don't merge self if(itr == this) @@ -893,11 +894,20 @@ storage::merge(this_type* itr) if(graph().is_valid(_nitr.begin()) && _nitr.begin()) { if(settings::debug() || settings::verbose() > 2) - PRINT_HERE("[%s]> worker is merging", Type::label().c_str()); + PRINT_HERE("[%s]> worker is merging %i records into %i records", + Type::label().c_str(), (int) itr->size(), + (int) this->size()); pre_order_iterator _pos = _titr; - pre_order_iterator _other = _nitr.begin(); - graph().append_child(_pos, _other); + sibling_iterator _other = _nitr; + for(auto sitr = _other.begin(); sitr != _other.end(); ++sitr) + { + pre_order_iterator pitr = sitr; + graph().append_child(_pos, pitr); + } _merged = true; + if(settings::debug() || settings::verbose() > 2) + PRINT_HERE("[%s]> master has %i records", Type::label().c_str(), + (int) this->size()); break; } From ff132f1c96c747b215829bc57bfedfde730681c8 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 1 Jan 2020 15:57:00 -0800 Subject: [PATCH 2/8] mpl::sort + integral_constant for {start,stop}_priority - apply::out_of_order - removed unnecessary m_print_{laps,prefix} from component_{tuple,list} - removed unnecessary m_key from component_{tuple,list} - fix to echo_measurement units --- .travis.yml | 39 ++-- pyctest-runner.py | 84 ++++---- source/tests/external/gotcha_tests_lib.cpp | 25 +++ source/tests/external/gotcha_tests_lib.hpp | 3 + source/tests/gotcha_tests.cpp | 78 ++++++-- source/tests/priority_tests.cpp | 73 +++---- source/timemory/components/base.hpp | 13 +- source/timemory/mpl/apply.hpp | 100 ++++++++++ source/timemory/mpl/bits/operations.hpp | 2 +- source/timemory/mpl/filters.hpp | 159 +++++++++++++-- source/timemory/mpl/operations.hpp | 94 ++++++--- source/timemory/mpl/type_traits.hpp | 23 ++- source/timemory/mpl/types.hpp | 6 + source/timemory/plotting.hpp | 4 +- source/timemory/variadic/auto_hybrid.hpp | 11 +- source/timemory/variadic/auto_list.hpp | 3 +- source/timemory/variadic/auto_tuple.hpp | 3 +- .../timemory/variadic/bits/component_list.hpp | 138 ++++++------- .../variadic/bits/component_tuple.hpp | 112 +++++------ source/timemory/variadic/component_hybrid.hpp | 34 ++-- source/timemory/variadic/component_list.hpp | 182 ++++++++++-------- source/timemory/variadic/component_tuple.hpp | 161 +++++++++------- source/tools/timem.cpp | 13 +- 23 files changed, 846 insertions(+), 514 deletions(-) diff --git a/.travis.yml b/.travis.yml index 312422768..e6cf59f37 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,21 +28,20 @@ matrix: - papi-tools - libgoogle-perftools-dev - google-perftools - - lcov - graphviz env: - - MATRIX_EVAL="CC=$(which gcc-6) && CXX=$(which g++-6) && BUILD_TYPE=MinSizeRel && BUILD_ARGS='--build-libs static'" + - MATRIX_EVAL="CC=$(which gcc-6) && CXX=$(which g++-6) && BUILD_TYPE=MinSizeRel && BUILD_ARGS='--papi --mpi --build-libs static'" # GCC 7 - os: linux - dist: xenial + dist: bionic python: "3.6" addons: apt: sources: - ubuntu-toolchain-r-test packages: - - gcc-7 - - g++-7 + - gcc + - g++ - build-essential - libmpich-dev - mpich @@ -53,7 +52,7 @@ matrix: - lcov - graphviz env: - - MATRIX_EVAL="CC=$(which gcc-7) && CXX=$(which g++-7) && BUILD_ARGS='--build-libs static --caliper'" + - MATRIX_EVAL="CC=$(which gcc) && CXX=$(which g++) && COVERAGE=1 && BUILD_TYPE=Debug && BUILD_ARGS='--build-libs static --mpi --papi --caliper --coverage'" # # GCC 8 - os: linux @@ -73,10 +72,9 @@ matrix: - papi-tools - libgoogle-perftools-dev - google-perftools - - lcov - graphviz env: - - MATRIX_EVAL="CC=$(which gcc-8) && CXX=$(which g++-8) && BUILD_ARGS='--build-libs static'" + - MATRIX_EVAL="CC=$(which gcc-8) && CXX=$(which g++-8) && BUILD_ARGS='--build-libs static --mpi --papi'" # Clang 5.0 - os: linux dist: xenial @@ -96,10 +94,9 @@ matrix: - papi-tools - libgoogle-perftools-dev - google-perftools - - lcov - graphviz env: - - MATRIX_EVAL="CC=$(which clang-5.0) && CXX=$(which clang++-5.0) && BUILD_ARGS='--build-libs static'" + - MATRIX_EVAL="CC=$(which clang-5.0) && CXX=$(which clang++-5.0) && BUILD_ARGS='--build-libs static --mpi --papi'" # Clang 7 - os: linux dist: bionic @@ -116,10 +113,9 @@ matrix: - papi-tools - libgoogle-perftools-dev - google-perftools - - lcov - graphviz env: - - MATRIX_EVAL="CC=$(which clang-7) && CXX=$(which clang++-7) && BUILD_ARGS='--build-libs static --caliper'" + - MATRIX_EVAL="CC=$(which clang-7) && CXX=$(which clang++-7) && BUILD_ARGS='--build-libs static --caliper --mpi --papi'" # ------------------------------------------------------------------------ # # # Python 3.7 @@ -143,10 +139,9 @@ matrix: - papi-tools - libgoogle-perftools-dev - google-perftools - - lcov - graphviz env: - - MATRIX_EVAL="CC=$(which gcc-6) && CXX=$(which g++-6) && BUILD_TYPE=MinSizeRel && BUILD_ARGS='--build-libs shared --gotcha'" + - MATRIX_EVAL="CC=$(which gcc-6) && CXX=$(which g++-6) && BUILD_TYPE=MinSizeRel && BUILD_ARGS='--build-libs shared --mpi --papi --gotcha'" # # GCC 8 - os: linux @@ -166,10 +161,9 @@ matrix: - papi-tools - libgoogle-perftools-dev - google-perftools - - lcov - graphviz env: - - MATRIX_EVAL="CC=$(which gcc-8) && CXX=$(which g++-8) && BUILD_ARGS='--build-libs shared'" + - MATRIX_EVAL="CC=$(which gcc-8) && CXX=$(which g++-8) && BUILD_ARGS='--build-libs shared --mpi --papi'" # Clang 5.0 - os: linux dist: xenial @@ -189,10 +183,9 @@ matrix: - papi-tools - libgoogle-perftools-dev - google-perftools - - lcov - graphviz env: - - MATRIX_EVAL="CC=$(which clang-5.0) && CXX=$(which clang++-5.0) && COVERAGE=1 && BUILD_ARGS='--build-libs shared'" + - MATRIX_EVAL="CC=$(which clang-5.0) && CXX=$(which clang++-5.0) && BUILD_ARGS='--build-libs shared --mpi --papi'" # Clang 7 - os: linux dist: bionic @@ -209,10 +202,9 @@ matrix: - papi-tools - libgoogle-perftools-dev - google-perftools - - lcov - graphviz env: - - MATRIX_EVAL="CC=$(which clang-7) && CXX=$(which clang++-7) && BUILD_ARGS='--build-libs shared --gotcha --caliper'" + - MATRIX_EVAL="CC=$(which clang-7) && CXX=$(which clang++-7) && BUILD_ARGS='--build-libs shared --gotcha --caliper --mpi --papi'" # ------------------------------------------------------------------------ # @@ -220,8 +212,6 @@ before_install: - eval "${MATRIX_EVAL}" - export CC=${CC} - export CXX=${CXX} - - export TIMEMORY_VERBOSE=4 - - export TIMEMORY_OUTPUT_TOTAL=1 - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; - bash miniconda.sh -b -p ${HOME}/miniconda - export PATH="${HOME}/miniconda/bin:${PATH}" @@ -232,10 +222,8 @@ install: - env - conda create -c defaults -c conda-forge -n pyctest python=${TRAVIS_PYTHON_VERSION} pyctest scikit-build cmake numpy matplotlib pillow gperftools - source activate pyctest - # - python setup.py install --build-type=Debug -- -DTIMEMORY_BUILD_C=OFF -DTIMEMORY_BUILD_TOOLS=OFF -DBUILD_STATIC_LIBS=OFF -DTIMEMORY_USE_CALIPER=ON -DTIMEMORY_USE_GOTCHA=ON -- -j2 - - export ARGS="-SF --pyctest-model=Continuous --pyctest-site=Travis --no-mpi ${BUILD_ARGS}" + - export ARGS="-SF --pyctest-model=Continuous --pyctest-site=Travis --python ${BUILD_ARGS}" - if [ -z "${BUILD_TYPE}" ]; then BUILD_TYPE=RelWithDebInfo ; fi - - if [ -z "${USE_MPI}" ]; then ARGS="${ARGS} --no-mpi"; fi - if [ -n "${COVERAGE}" ]; then ARGS="${ARGS} --coverage"; else ARGS="${ARGS} --profile=cpu"; fi - travis_wait 75 python ./pyctest-runner.py ${ARGS} --pyctest-build-type=${BUILD_TYPE} -- -VV @@ -248,6 +236,7 @@ after_success: - eval "${MATRIX_EVAL}" - SUBMIT_COVERAGE=0 - if [ -n "${COVERAGE}" ]; then SUBMIT_COVERAGE=1; fi + - if [ -z "$(which lcov)" ]; then SUBMIT_COVERAGE=0; fi # Create lcov report: capture coverage info, filter, debugging, and upload - if [ "${SUBMIT_COVERAGE}" -gt 0 ]; then lcov --directory . --capture --output-file coverage.info ; fi - if [ "${SUBMIT_COVERAGE}" -gt 0 ]; then lcov --remove coverage.info '/usr/*' "${HOME}"'/.cache/*' '*/external/*' --output-file coverage.info; fi diff --git a/pyctest-runner.py b/pyctest-runner.py index 0c1568df5..292f0e6e1 100755 --- a/pyctest-runner.py +++ b/pyctest-runner.py @@ -72,15 +72,13 @@ def configure(): default=False, action='store_true') parser.add_argument("--caliper", help="TIMEMORY_USE_CALIPER=ON", default=False, action='store_true') - parser.add_argument("--no-papi", help="TIMEMORY_USE_PAPI=OFF", + parser.add_argument("--likwid", help="TIMEMORY_USE_LIKWID=ON", default=False, action='store_true') - parser.add_argument("--no-mpi", help="TIMEMORY_USE_MPI=OFF", + parser.add_argument("--papi", help="TIMEMORY_USE_PAPI=ON", default=False, action='store_true') - parser.add_argument("--no-python", help="TIMEMORY_BUILD_PYTHON=OFF", + parser.add_argument("--mpi", help="TIMEMORY_USE_MPI=ON", default=False, action='store_true') - parser.add_argument("--no-c", help="TIMEMORY_BUILD_C=OFF", - default=False, action='store_true') - parser.add_argument("--no-gtest", help="TIMEMORY_BUILD_GTEST=OFF", + parser.add_argument("--python", help="TIMEMORY_BUILD_PYTHON=ON", default=False, action='store_true') parser.add_argument("--extra-optimizations", help="TIMEMORY_BUILD_EXTRA_OPTIMIZATIONS=ON", @@ -114,7 +112,7 @@ def configure(): pyct.BINARY_DIRECTORY, "CMakeCache.txt")) if platform.system() != "Linux": - args.no_papi = True + args.papi = False os.environ["PYCTEST_TESTING"] = "ON" @@ -158,40 +156,36 @@ def run_pyctest(): build_opts = { "BUILD_SHARED_LIBS": "ON" if "shared" in args.build_libs else "OFF", "BUILD_STATIC_LIBS": "ON" if "static" in args.build_libs else "OFF", - "TIMEMORY_BUILD_C": "OFF" if args.no_c else "ON", - "TIMEMORY_BUILD_GTEST": "OFF" if args.no_gtest else "ON", "TIMEMORY_BUILD_TOOLS": "ON" if args.tools else "OFF", - "TIMEMORY_BUILD_PYTHON": "OFF" if args.no_python else "ON", "TIMEMORY_BUILD_GOTCHA": "ON" if args.gotcha else "OFF", + "TIMEMORY_BUILD_PYTHON": "ON" if args.python else "OFF", "TIMEMORY_BUILD_CALIPER": "ON" if args.caliper else "OFF", "TIMEMORY_BUILD_TESTING": "ON", "TIMEMORY_BUILD_EXTRA_OPTIMIZATIONS": "ON" if args.extra_optimizations else "OFF", - "TIMEMORY_USE_MPI": "OFF" if args.no_mpi else "ON", + "TIMEMORY_USE_MPI": "ON" if args.mpi else "OFF", "TIMEMORY_USE_TAU": "ON" if args.tau else "OFF", "TIMEMORY_USE_ARCH": "ON" if args.arch else "OFF", - "TIMEMORY_USE_PAPI": "OFF" if args.no_papi else "ON", + "TIMEMORY_USE_PAPI": "ON" if args.papi else "OFF", "TIMEMORY_USE_CUDA": "ON" if args.cuda else "OFF", "TIMEMORY_USE_CUPTI": "ON" if args.cupti else "OFF", "TIMEMORY_USE_GPERF": "OFF", "TIMEMORY_USE_UPCXX": "ON" if args.upcxx else "OFF", - "TIMEMORY_USE_PYTHON": "OFF" if args.no_python else "ON", + "TIMEMORY_USE_LIKWID": "ON" if args.likwid else "OFF", "TIMEMORY_USE_GOTCHA": "ON" if args.gotcha else "OFF", + "TIMEMORY_USE_PYTHON": "ON" if args.python else "OFF", "TIMEMORY_USE_CALIPER": "ON" if args.caliper else "OFF", "TIMEMORY_USE_COVERAGE": "ON" if args.coverage else "OFF", "TIMEMORY_USE_SANITIZER": "OFF", "TIMEMORY_USE_CLANG_TIDY": "ON" if args.static_analysis else "OFF", - "USE_PAPI": "OFF" if args.no_papi else "ON", - "USE_MPI": "OFF" if args.no_mpi else "ON", + "USE_PAPI": "ON" if args.papi else "OFF", + "USE_MPI": "ON" if args.mpi else "OFF", "USE_CALIPER": "ON" if args.caliper else "OFF", } - if not args.no_mpi: + if args.mpi and args.tools: build_opts["TIMEMORY_BUILD_MPIP"] = "ON" if args.mpip else "OFF" - if not args.no_c: - pyct.BUILD_NAME = "{} C".format(pyct.BUILD_NAME) - - if not args.no_python: + if args.python: pyver = "{}.{}.{}".format( sys.version_info[0], sys.version_info[1], sys.version_info[2]) pyct.BUILD_NAME = "{} PY-{}".format(pyct.BUILD_NAME, pyver) @@ -202,10 +196,10 @@ def run_pyctest(): if args.arch: pyct.BUILD_NAME = "{} ARCH".format(pyct.BUILD_NAME) - if not args.no_mpi: + if args.mpi: pyct.BUILD_NAME = "{} MPI".format(pyct.BUILD_NAME) - if not args.no_papi: + if args.papi: pyct.BUILD_NAME = "{} PAPI".format(pyct.BUILD_NAME) if args.cuda: @@ -226,6 +220,9 @@ def run_pyctest(): if args.tau: pyct.BUILD_NAME = "{} TAU".format(pyct.BUILD_NAME) + if args.likwid: + pyct.BUILD_NAME = "{} LIKWID".format(pyct.BUILD_NAME) + if args.profile is not None: build_opts["TIMEMORY_USE_GPERF"] = "ON" components = "profiler" if args.profile == "cpu" else "tcmalloc" @@ -342,8 +339,6 @@ def construct_command(cmd, args): clobber=clobber_notes) # make sure all subsequent iterations don't clobber clobber_notes = False - # else: - # _cmd.append("{}/timem".format(pyct.BINARY_DIRECTORY)) _cmd.extend(cmd) return _cmd @@ -358,11 +353,9 @@ def construct_roofline_command(cmd, dir, extra_opts=[]): _cmd.extend(cmd) return _cmd - #--------------------------------------------------------------------------# # create tests # - test_env = ";".join(["CPUPROFILE_FREQUENCY=200", "CPUPROFILE_REALTIME=1", "CALI_CONFIG_PROFILE=runtime-report", @@ -516,31 +509,32 @@ def construct_roofline_command(cmd, dir, extra_opts=[]): "TIMEOUT": "300", "ENVIRONMENT": test_env}) - pyct.test(construct_name("test-cpu-roofline"), - construct_roofline_command(["./ex_cpu_roofline"], 'cpu-roofline', - ['-t', 'cpu_roofline']), - {"WORKING_DIRECTORY": pyct.BINARY_DIRECTORY, - "LABELS": pyct.PROJECT_NAME, - "TIMEOUT": "900", - "ENVIRONMENT": test_env}) - - pyct.test(construct_name("test-cpu-roofline.sp"), - construct_roofline_command(["./ex_cpu_roofline.sp"], 'cpu-roofline.sp', - ['-t', 'cpu_roofline']), - {"WORKING_DIRECTORY": pyct.BINARY_DIRECTORY, - "LABELS": pyct.PROJECT_NAME, - "TIMEOUT": "900", - "ENVIRONMENT": test_env}) + if not args.python: + pyct.test(construct_name("test-cpu-roofline"), + construct_roofline_command(["./ex_cpu_roofline"], 'cpu-roofline', + ['-t', 'cpu_roofline']), + {"WORKING_DIRECTORY": pyct.BINARY_DIRECTORY, + "LABELS": pyct.PROJECT_NAME, + "TIMEOUT": "900", + "ENVIRONMENT": test_env}) - if args.cupti: - pyct.test(construct_name("test-gpu-roofline"), - construct_roofline_command(["./ex_gpu_roofline"], 'gpu-roofline', - ['-t', 'gpu_roofline']), + pyct.test(construct_name("test-cpu-roofline.sp"), + construct_roofline_command(["./ex_cpu_roofline.sp"], 'cpu-roofline.sp', + ['-t', 'cpu_roofline']), {"WORKING_DIRECTORY": pyct.BINARY_DIRECTORY, "LABELS": pyct.PROJECT_NAME, "TIMEOUT": "900", "ENVIRONMENT": test_env}) + if args.cupti: + pyct.test(construct_name("test-gpu-roofline"), + construct_roofline_command(["./ex_gpu_roofline"], 'gpu-roofline', + ['-t', 'gpu_roofline']), + {"WORKING_DIRECTORY": pyct.BINARY_DIRECTORY, + "LABELS": pyct.PROJECT_NAME, + "TIMEOUT": "900", + "ENVIRONMENT": test_env}) + pyct.generate_config(pyct.BINARY_DIRECTORY) pyct.generate_test_file(os.path.join(pyct.BINARY_DIRECTORY, "tests")) if not args.generate: diff --git a/source/tests/external/gotcha_tests_lib.cpp b/source/tests/external/gotcha_tests_lib.cpp index a545db145..e3e132f71 100644 --- a/source/tests/external/gotcha_tests_lib.cpp +++ b/source/tests/external/gotcha_tests_lib.cpp @@ -99,6 +99,31 @@ DoWork::execute_fp8(int64_t nitr) //--------------------------------------------------------------------------------------// +void +DoWork::execute_fp(int64_t nitr, std::vector fvals, + const std::deque& dvals) +{ + float fret = 0.0; + for(const auto& itr : fvals) + { + fret += ext::work( + nitr, [](float val) -> float { return cosf(val); }, + [&](float val, int64_t i) -> float { return val + itr * i; }); + } + std::get<0>(m_tuple) = fret; + + double dret = 0.0; + for(const auto& itr : dvals) + { + dret += ext::work( + nitr, [](double val) -> double { return cos(val); }, + [&](double val, int64_t i) -> double { return val + itr * i; }); + } + std::get<1>(m_tuple) = dret; +} + +//--------------------------------------------------------------------------------------// + std::tuple DoWork::get() const { diff --git a/source/tests/external/gotcha_tests_lib.hpp b/source/tests/external/gotcha_tests_lib.hpp index 9ed4b3179..85f0d0701 100644 --- a/source/tests/external/gotcha_tests_lib.hpp +++ b/source/tests/external/gotcha_tests_lib.hpp @@ -28,6 +28,8 @@ #include #include #include +#include +#include namespace ext { @@ -43,6 +45,7 @@ class DoWork void execute_fp4(int64_t); void execute_fp8(int64_t); + void execute_fp(int64_t, std::vector, const std::deque&); std::tuple get() const; friend std::ostream& operator<<(std::ostream& os, const DoWork& obj) diff --git a/source/tests/gotcha_tests.cpp b/source/tests/gotcha_tests.cpp index eddf7e4a3..cb8ffd12a 100644 --- a/source/tests/gotcha_tests.cpp +++ b/source/tests/gotcha_tests.cpp @@ -49,7 +49,7 @@ using gotcha_hybrid_t = tim::auto_hybrid; // create gotcha types for various bundles of functions using mpi_gotcha_t = tim::component::gotcha<1, gotcha_hybrid_t>; using work_gotcha_t = tim::component::gotcha<1, gotcha_hybrid_t, int>; -using memfun_gotcha_t = tim::component::gotcha<3, gotcha_tuple_t>; +using memfun_gotcha_t = tim::component::gotcha<5, gotcha_tuple_t>; using comp_t = component_tuple; using tuple_t = component_tuple; @@ -483,6 +483,12 @@ TEST_F(gotcha_tests, member_functions) TIMEMORY_CXX_GOTCHA(memfun_gotcha_t, 2, &DoWork::execute_fp8); } + { + using func_t = decltype(&DoWork::execute_fp); + print_func_info(TIMEMORY_STRINGIZE(DoWork::execute_fp)); + + TIMEMORY_CXX_GOTCHA(memfun_gotcha_t, 3, &DoWork::execute_fp); + } }; float fsum = 0.0; @@ -492,29 +498,77 @@ TEST_F(gotcha_tests, member_functions) DoWork dw(pair_type(0.25, 0.5)); - auto _nitr = nitr / 8; - for(int i = 0; i < _nitr; ++i) + auto _nitr = nitr / 10; + int64_t ntot = 0; + for(int i = 0; i < _nitr; i += 10) { + ntot += 10; if(i >= (_nitr - 10)) { - dw.execute_fp4(1000); - dw.execute_fp8(1000); + for(int j = 0; j < 10; ++j) + { + dw.execute_fp4(1000); + dw.execute_fp8(1000); + auto ret = dw.get(); + fsum += std::get<0>(ret); + dsum += std::get<1>(ret); + } } else { - auto _fp4 = [&]() { dw.execute_fp4(1000); }; - auto _fp8 = [&]() { dw.execute_fp8(1000); }; + auto _fp4 = [&]() { + for(int j = 0; j < 10; ++j) + { + dw.execute_fp4(1000); + auto ret = dw.get(); + fsum += std::get<0>(ret); + } + }; + + auto _fp8 = [&]() { + for(int j = 0; j < 10; ++j) + { + dw.execute_fp8(1000); + auto ret = dw.get(); + dsum += std::get<1>(ret); + } + }; + std::thread t4(_fp4); std::thread t8(_fp8); t4.join(); t8.join(); } + } + int rank = tim::dmp::rank(); + if(rank == 0) + { + printf("\n"); + printf("[%i]> single-precision sum = %8.2f\n", rank, fsum); + printf("[%i]> double-precision sum = %8.2f\n", rank, dsum); + } + + float fsum2 = 0.0; + double dsum2 = 0.0; + for(int64_t i = 0; i < ntot; ++i) + { + dw.execute_fp(1000, { 0.25 }, { 0.5 }); auto ret = dw.get(); - fsum += std::get<0>(ret); - dsum += std::get<1>(ret); + fsum2 += std::get<0>(ret); + dsum2 += std::get<1>(ret); + } + + if(rank == 0) + { + printf("\n"); + printf("[%i]> single-precision sum2 = %8.2f\n", rank, fsum2); + printf("[%i]> double-precision sum2 = %8.2f\n", rank, dsum2); } + + ASSERT_NEAR(fsum2, fsum, tolerance); + ASSERT_NEAR(dsum2, dsum, tolerance); } auto rank = tim::mpi::rank(); @@ -537,9 +591,9 @@ TEST_F(gotcha_tests, member_functions) auto real_final_size = real_storage->get().size(); printf("[final]> wall-clock storage size: %li\n", (long int) real_final_size); - ASSERT_NEAR(fsum, -302122.44, tolerance); - ASSERT_NEAR(dsum, +110193.87, tolerance); - ASSERT_EQ(real_final_size, 4 + real_init_size); + ASSERT_NEAR(fsum, -241718.61, tolerance); + ASSERT_NEAR(dsum, +88155.09, tolerance); + ASSERT_EQ(real_final_size, 5 + real_init_size); } //======================================================================================// diff --git a/source/tests/priority_tests.cpp b/source/tests/priority_tests.cpp index 277a899fd..e818897b9 100644 --- a/source/tests/priority_tests.cpp +++ b/source/tests/priority_tests.cpp @@ -40,13 +40,13 @@ using namespace tim::component; // make different types to access and change traits individually -template -struct test_clock : public base> +template +struct test_clock : public base> { using ratio_t = std::nano; using value_type = int64_t; - using this_type = test_clock; - using base_type = base, value_type>; + using this_type = test_clock; + using base_type = base; using string_t = std::string; // since this is a template class, need these statements @@ -77,12 +77,16 @@ struct test_clock : public base> void start() { + if(StartSleep) + std::this_thread::sleep_for(std::chrono::milliseconds(500)); set_started(); value = record(); } void stop() { + if(StopSleep) + std::this_thread::sleep_for(std::chrono::milliseconds(500)); auto tmp = record(); accum += (tmp - value); value = tmp; @@ -90,8 +94,8 @@ struct test_clock : public base> } }; -using priority_start_wc = test_clock<0>; -using priority_stop_wc = test_clock<1>; +using priority_start_wc = test_clock<0, false, true>; +using priority_stop_wc = test_clock<1, true, false>; //--------------------------------------------------------------------------------------// @@ -112,10 +116,10 @@ template <> struct uses_timing_units : public std::true_type {}; template <> -struct start_priority : public std::true_type +struct start_priority : public std::integral_constant {}; template <> -struct stop_priority : public std::true_type +struct stop_priority : public std::integral_constant {}; } // namespace trait } // namespace tim @@ -125,10 +129,9 @@ struct stop_priority : public std::true_type using tuple_t = tim::component_tuple; -using prior_start_t = tuple_t::prior_start_t; -using prior_stop_t = tuple_t::prior_stop_t; -using stand_start_t = tuple_t::stand_start_t; -using stand_stop_t = tuple_t::stand_stop_t; +using plus_t = typename tuple_t::operation_t; +using start_t = typename tuple_t::operation_t; +using stop_t = typename tuple_t::operation_t; using apply_v = tim::apply; @@ -167,6 +170,10 @@ class priority_tests : public ::testing::Test TEST_F(priority_tests, simple_check) { + std::cout << "plus : " << tim::demangle() << "\n"; + std::cout << "start : " << tim::demangle() << "\n"; + std::cout << "stop : " << tim::demangle() << "\n"; + tuple_t t(details::get_test_name(), true); // start/stop all to check laps @@ -223,43 +230,17 @@ TEST_F(priority_tests, simple_check) TEST_F(priority_tests, start_stop) { - // lambdas to ensure inline - auto priority_start = [](tuple_t& t) { apply_v::access(t.data()); }; - auto priority_stop = [](tuple_t& t) { apply_v::access(t.data()); }; - auto standard_start = [](tuple_t& t) { apply_v::access(t.data()); }; - auto standard_stop = [](tuple_t& t) { apply_v::access(t.data()); }; + std::cout << "plus : " << tim::demangle() << "\n"; + std::cout << "start : " << tim::demangle() << "\n"; + std::cout << "stop : " << tim::demangle() << "\n"; tuple_t t(details::get_test_name(), true); - // start/stop all to check laps t.start(); - t.stop(); - - t.get().start(); - - do_sleep(250); // TOTAL TIME: 0.25 seconds - - priority_start(t); - - do_sleep(250); // TOTAL TIME: 0.50 seconds - - standard_start(t); - - do_sleep(500); // TOTAL TIME: 1.00 seconds - priority_stop(t); + do_sleep(500); // TOTAL TIME: 0.50 seconds - do_sleep(125); // TOTAL TIME: 1.125 seconds - - standard_stop(t); - - do_sleep(125); // TOTAL TIME: 1.25 seconds - - t.get().stop(); - - // t.start(); - // details::consume(500); - // t.stop(); + t.stop(); auto& native_wc = t.get(); auto& pstart_wc = t.get(); @@ -273,13 +254,13 @@ TEST_F(priority_tests, start_stop) std::cout << t << std::endl; printf("\n"); - double native_exp = 1.25; - double pstart_exp = 0.875; + double native_exp = 1.0; + double pstart_exp = 1.5; double pstop_exp = 0.5; + ASSERT_NEAR(native_exp, native_wc.get(), 0.125); ASSERT_NEAR(pstart_exp, pstart_wc.get(), 0.125); ASSERT_NEAR(pstop_exp, pstop_wc.get(), 0.125); - ASSERT_NEAR(native_exp, native_wc.get(), 0.125); } //--------------------------------------------------------------------------------------// diff --git a/source/timemory/components/base.hpp b/source/timemory/components/base.hpp index 505df2f31..ba57bbaf5 100644 --- a/source/timemory/components/base.hpp +++ b/source/timemory/components/base.hpp @@ -522,11 +522,14 @@ struct base obj += rhs; obj.plus(rhs); Type::append(graph_itr, rhs); - _storage->pop(); - _storage->stack_pop(&rhs); - - auto _end_depth = _storage->depth(); - depth_change = (_beg_depth > _end_depth); + if(_storage) + { + _storage->pop(); + _storage->stack_pop(&rhs); + + auto _end_depth = _storage->depth(); + depth_change = (_beg_depth > _end_depth); + } } obj.is_running = false; is_on_stack = false; diff --git a/source/timemory/mpl/apply.hpp b/source/timemory/mpl/apply.hpp index 5589da377..e894745b6 100644 --- a/source/timemory/mpl/apply.hpp +++ b/source/timemory/mpl/apply.hpp @@ -132,6 +132,86 @@ struct apply { using _Ret = void; + //----------------------------------------------------------------------------------// + + template + struct out_of_order_T; + + template