diff --git a/.appveyor.yml b/.appveyor.yml index d78d26880..24f2b755c 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -42,45 +42,23 @@ environment: USE_ARCH: "OFF" USE_PYTHON: "ON" BUILD_C: "ON" + BUILD_SHARED: "ON" BUILD_STATIC: "OFF" BUILD_TESTING: "ON" LINE_PROFILER: "ON" - - CONDA: 37 - CPP: 14 - CONFIG: RelWithDebInfo - EXAMPLES: "OFF" - TOOLS: "OFF" - WINSOCK: "OFF" - USE_ARCH: "OFF" - USE_PYTHON: "ON" - BUILD_C: "OFF" - BUILD_STATIC: "ON" - BUILD_TESTING: "OFF" - LINE_PROFILER: "ON" - - PYTHON: 36 + - PYTHON: 37 CPP: 14 CONFIG: Debug EXAMPLES: "ON" - TOOLS: "OFF" - WINSOCK: "OFF" - USE_ARCH: "OFF" - USE_PYTHON: "ON" - BUILD_C: "ON" - BUILD_STATIC: "OFF" - BUILD_TESTING: "OFF" - LINE_PROFILER: "OFF" - - PYTHON: 37 - CPP: 17 - CONFIG: MinSizeRel - EXAMPLES: "OFF" TOOLS: "ON" WINSOCK: "OFF" USE_ARCH: "OFF" USE_PYTHON: "ON" BUILD_C: "OFF" + BUILD_SHARED: "OFF" BUILD_STATIC: "ON" BUILD_TESTING: "OFF" - LINE_PROFILER: "ON" + LINE_PROFILER: "OFF" install: # Configure environment @@ -109,7 +87,7 @@ install: $env:PYTHON_EXE = "C:\Python$env:PYTHON\python.exe" } python -m pip install --disable-pip-version-check --user --upgrade pip wheel - python -m pip install --user cython numpy matplotlib pillow pandas pydot + python -m pip install --user matplotlib numpy pillow cython six pandas pydot multiprocess } elseif ($env:CONDA) { if ($env:PLATFORM -eq "x64") { $env:PATH = "C:\Miniconda$env:CONDA-x64;C:\Miniconda$env:CONDA-x64\Scripts;$env:PATH" @@ -122,8 +100,8 @@ install: $env:PYTHONHOME = "C:\Miniconda$env:CONDA" $env:PYTHON_EXE = "C:\Miniconda$env:CONDA\python.exe" } - conda update -y -q -n base conda - conda install -y -c defaults -c conda-forge -q pip setuptools scikit-build numpy matplotlib pillow cython pandas pydot + conda update -y -n base conda + conda install -y -c conda-forge -c defaults matplotlib numpy pillow cython six pandas pydot multiprocess } $env:TIMEMORY_FILE_OUTPUT = "OFF" $env:TIMEMORY_AUTO_OUTPUT = "ON" @@ -134,8 +112,9 @@ build_script: - mkdir build-timemory - cd build-timemory - cmake .. -G "%CMAKE_GENERATOR%" -A "%CMAKE_ARCH%" - -DBUILD_SHARED_LIBS=ON + -DBUILD_SHARED_LIBS="%BUILD_SHARED%" -DBUILD_STATIC_LIBS="%BUILD_STATIC%" + -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE="%CONFIG%" -DCMAKE_CXX_STANDARD="%CPP%" -DCMAKE_INSTALL_PREFIX=..\install-timemory diff --git a/.readthedocs.yml b/.readthedocs.yml index 4a011eba4..cae0dce97 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -6,7 +6,7 @@ conda: python: setup_py_install: False - version: 3.6 + version: 3.7 build: image: latest diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e042b95c..d2cb7eaad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,33 @@ # timemory +## Version 3.2.2 + +> Date: Wed Jul 14 20:42:29 2021 -0500 + +- Python gotcha fixes + - Fixed issues with mallocp segfaulting from Python + - Fixed storage merge() segfaulting +- New Python tools submodule (timemory.tools) + - tools.function_wrappers combines {start,stop}_{mpip,ompt,ncclp,mallocp} + into one configurable handle and provides decorator + context-manager features +- New Python functions which are used within tools.function_wrappers + - timemory.start_function_wrappers + - timemory.stop_function_wrappers +- Fixed timemory-python-line-profiler script calling timemory.profiler +- API change in ring_buffer template + - read/write member functions return pointer to object read/written to + instead of bytes +- API change in storage and tsettings + - Classes are declared as final to optimize any vtable calls +- Removed runtime_configurable restriction for do_enumerator_generate + - This enables user_bundles to be used again in Python +- Added operation::python_class_name +- Updated examples: + - ex_python_bindings (and libex_python_bindings) +- Fix to get_hash_identifier +- Removed concurrency comparison when generating a diff b/t two runs +- Fixed issues with popen.cpp guarding with TIMEMORY_WINDOWS but never defined + ## Version 3.2.1 > Date: Fri Jul 9 16:55:33 2021 -0500 diff --git a/VERSION b/VERSION index e4604e3af..be94e6f53 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.2.1 +3.2.2 diff --git a/docs/conf.py b/docs/conf.py index 5c0034efe..da5c3210f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -81,6 +81,9 @@ def build_doxy_docs(): "-DENABLE_DOXYGEN_LATEX_DOCS=OFF", "-DENABLE_DOXYGEN_MAN_DOCS=OFF", "-DTIMEMORY_BUILD_KOKKOS_TOOLS=ON", + "-DTIMEMORY_BUILD_C=OFF", + "-DTIMEMORY_BUILD_CUDA=OFF", + "-DTIMEMORY_BUILD_FORTRAN=OFF", _srcdir, ] ) diff --git a/docs/environment.yml b/docs/environment.yml index 104ad8630..593a60d26 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -3,7 +3,7 @@ channels: - conda-forge - defaults dependencies: - - python=3.6 + - python=3.7 - cmake - curl - doxygen @@ -18,4 +18,5 @@ dependencies: - setuptools - breathe - sphinx-markdown-tables + - docutils \ No newline at end of file diff --git a/docs/getting_started/integrating.md b/docs/getting_started/integrating.md index a240a1c15..6e1a05788 100644 --- a/docs/getting_started/integrating.md +++ b/docs/getting_started/integrating.md @@ -42,6 +42,7 @@ were not available when timemory was installed. | `timemory::timemory-compile-extra` | Extra optimization flags | | `timemory::timemory-compile-options` | Adds the standard set of compiler flags used by timemory | | `timemory::timemory-compile-timing` | Adds compiler flags which report compilation timing metrics | +| `timemory::timemory-compiler-instrument-compile-options` | INTERFACE | | `timemory::timemory-compiler-instrument` | Provides library for compiler instrumentation | | `timemory::timemory-coverage` | Enables code-coverage flags | | `timemory::timemory-cpu-roofline` | Enables flags and libraries for proper CPU roofline generation | @@ -69,6 +70,7 @@ were not available when timemory was installed. | `timemory::timemory-hidden-visibility` | Adds -fvisibility=hidden compiler flag | | `timemory::timemory-instrument-functions` | Adds compiler flags to enable compile-time instrumentation | | `timemory::timemory-leak-sanitizer` | Adds compiler flags to enable leak sanitizer (-fsanitize=leak) | +| `timemory::timemory-libunwind` | Enables libunwind support | | `timemory::timemory-likwid` | Enables LIKWID support | | `timemory::timemory-lto` | Adds link-time-optimization flags | | `timemory::timemory-mallocp-library` | Provides MALLOCP library for tracking memory allocations | @@ -79,6 +81,7 @@ were not available when timemory was installed. | `timemory::timemory-ncclp-library` | Provides NCCLP library for NCCL performance analysis | | `timemory::timemory-no-mpi-init` | Disables the generation of MPI_Init and MPI_Init_thread symbols | | `timemory::timemory-null-sanitizer` | Adds compiler flags to enable null sanitizer (-fsanitize=null) | +| `timemory::timemory-nvml` | Enables NVML support (NVIDIA) | | `timemory::timemory-ompt-library` | Provides OMPT library for OpenMP performance analysis | | `timemory::timemory-ompt` | Enables OpenMP-tools support | | `timemory::timemory-papi-static` | Enables PAPI support + links to static library | diff --git a/docs/tools/timemory-compiler-instrument/README.md b/docs/tools/timemory-compiler-instrument/README.md index 117acbaec..85fcef782 100644 --- a/docs/tools/timemory-compiler-instrument/README.md +++ b/docs/tools/timemory-compiler-instrument/README.md @@ -39,6 +39,10 @@ In other words, `"TIMEMORY_FLAT_PROFILE=ON"` will not be applied to the compiler to enable flat profiling for the compiler instrumentation, set `"TIMEMORY_COMPILER_FLAT_PROFILE=ON"`, and so on for `"TIMEMORY_COMPILER_OUTPUT_PATH=..."`, etc. +> **NOTE:** Environment variables `TIMEMORY_COMPILER_MAX_DEPTH`, `TIMEMORY_COMPILER_THROTTLE_COUNT`, and `TIMEMORY_COMPILER_THROTTLE_VALUE` +> can be very useful for reducing the overhead of the instrumentation. For more information, see the descriptions provided via +> `timemory-avail -Sd -r 'THROTTLE|MAX_DEPTH'`. + ## Build Timemory provides a `timemory::timemory-compiler-instrument` target in CMake which provides the necessary diff --git a/examples/ex-custom-dynamic-instr/ex_custom_dynamic_instr.cpp b/examples/ex-custom-dynamic-instr/ex_custom_dynamic_instr.cpp index f7f36db90..2d54261e0 100644 --- a/examples/ex-custom-dynamic-instr/ex_custom_dynamic_instr.cpp +++ b/examples/ex-custom-dynamic-instr/ex_custom_dynamic_instr.cpp @@ -23,6 +23,7 @@ // SOFTWARE. // +#include "timemory/components/papi/papi_tuple.hpp" #include "timemory/library.h" #include "timemory/timemory.hpp" diff --git a/examples/ex-optional/ex_optional.cpp b/examples/ex-optional/ex_optional.cpp index 864454d6f..e00f68948 100644 --- a/examples/ex-optional/ex_optional.cpp +++ b/examples/ex-optional/ex_optional.cpp @@ -123,6 +123,9 @@ int main(int argc, char** argv) ret_sum += ret * ret; } + // avoid set but unused warning + if(ret_sum < 0) printf("sum: %li\n", ret_sum); + std::vector ret_reduce; std::vector ret_send; for(size_t i = 0; i < fibvalues.size(); ++i) diff --git a/examples/ex-python/ex_bindings.py b/examples/ex-python/ex_bindings.py index 3c6b2a47d..89001dfeb 100755 --- a/examples/ex-python/ex_bindings.py +++ b/examples/ex-python/ex_bindings.py @@ -1,6 +1,6 @@ #!@PYTHON_EXECUTABLE@ - +import sys import numpy import argparse @@ -8,12 +8,15 @@ try: import mpi4py # noqa: F401 from mpi4py import MPI # noqa: F401 + from mpi4py.MPI import Exception as MPIException # noqa: F401 except ImportError: use_mpi = False + MPIException = RuntimeError pass import timemory # noqa: E402 from timemory.profiler import profile # noqa: E402 +from timemory.tools import function_wrappers # noqa: E402 import libex_python_bindings as ex_bindings # noqa: E402 if use_mpi: @@ -35,37 +38,48 @@ def run_profile(nitr=100, nsize=1000000): def run_mpi(nitr=100, nsize=1000000): - if size != 2: - return + if use_mpi is False: + _sum = 0.0 + for i in range(nitr): + data = numpy.arange(nsize, dtype="i") + _val = numpy.sum(data) + _sum += 1.0 / _val + data = numpy.arange(nsize, dtype=numpy.float64) + _val = numpy.sum(data) + _sum += 1.0 / _val + msgs = set() for i in range(nitr): # passing MPI datatypes explicitly - if rank == 0: - data = numpy.arange(nsize, dtype="i") - comm.Send([data, MPI.INT], dest=1, tag=77) - elif rank == 1: - data = numpy.empty(nsize, dtype="i") - comm.Recv([data, MPI.INT], source=0, tag=77) + try: + if rank == 0: + data = numpy.arange(nsize, dtype="i") + comm.Send([data, MPI.INT], dest=1, tag=77) + elif rank == 1: + data = numpy.empty(nsize, dtype="i") + comm.Recv([data, MPI.INT], source=0, tag=77) + except MPIException as e: + msgs.add(f"{e}") # automatic MPI datatype discovery - if rank == 0: - data = numpy.empty(nsize, dtype=numpy.float64) - comm.Recv(data, source=1, tag=13) - elif rank == 1: - data = numpy.arange(nsize, dtype=numpy.float64) - comm.Send(data, dest=0, tag=13) - + try: + if rank == 0 and size == 2: + data = numpy.empty(nsize, dtype=numpy.float64) + comm.Recv(data, source=1, tag=13) + elif rank == 1: + data = numpy.arange(nsize, dtype=numpy.float64) + comm.Send(data, dest=0, tag=13) + except MPIException as e: + msgs.add(f"{e}") -def main(args): - # start MPI wrappers - id = timemory.start_mpip() + for i, itr in enumerate(msgs): + sys.stderr.write("{}: {}\n".format(i, itr)) - run_mpi(args.iterations) - ans = run_profile(args.iterations, args.size) - # stop MPI wrappers - timemory.stop_mpip(id) - return ans +def main(args): + # start function wrappers (MPI, OpenMP, etc. if available) + with function_wrappers(*args.profile, nccl=False): + return run_profile(args.iterations, args.size) if __name__ == "__main__": @@ -74,7 +88,6 @@ def main(args): parser.add_argument( "-i", "--iterations", - required=False, default=100, type=int, help="Iterations", @@ -82,32 +95,53 @@ def main(args): parser.add_argument( "-n", "--size", - required=False, default=1000000, type=int, help="Array size", ) + parser.add_argument( + "-c", + "--components", + default=[ + "wall_clock", + "peak_rss", + "cpu_clock", + "cpu_util", + "thread_cpu_clock", + "thread_cpu_util", + ], + type=str, + help="Additional components", + nargs="*", + ) + parser.add_argument( + "-p", + "--profile", + default=["mpi", "openmp", "malloc"], + choices=("mpi", "openmp", "malloc", "nccl"), + type=str, + help="Profiling library wrappers to activate", + nargs="*", + ) args = parser.parse_args() timemory.enable_signal_detection() - timemory.settings.width = 12 - timemory.settings.precision = 6 + timemory.settings.width = 8 + timemory.settings.precision = 2 + timemory.settings.scientific = True timemory.settings.plot_output = True - timemory.settings.dart_output = True + timemory.settings.dart_output = False + timemory.timemory_init([__file__]) - with profile( - [ - "wall_clock", - "user_clock", - "system_clock", - "cpu_util", - "peak_rss", - "thread_cpu_clock", - "thread_cpu_util", - ] - ): + @function_wrappers(*args.profile, nccl=False) + def runner(nitr, nsize): + run_mpi(nitr, nsize) + + runner(args.iterations, args.size) + + with profile(args.components): ans = main(args) - print("Answer = {}".format(ans)) + print("Success! Answer = {}. Finalizing...".format(ans)) timemory.finalize() print("Python Finished") diff --git a/examples/ex-python/libex_bindings.cpp b/examples/ex-python/libex_bindings.cpp index 525115812..f30bd2707 100644 --- a/examples/ex-python/libex_bindings.cpp +++ b/examples/ex-python/libex_bindings.cpp @@ -32,6 +32,14 @@ #include "pybind11/pytypes.h" #include "pybind11/stl.h" +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -87,31 +95,29 @@ allreduce(const vector_t& sendbuf) //--------------------------------------------------------------------------------------// double -run(int nitr, int nsize) -{ - rng.seed(54561434UL); +run(int nitr, int nsize); - printf("[%s] Running MPI algorithm with %i iterations and %i entries...\n", __func__, - nitr, nsize); +//--------------------------------------------------------------------------------------// - double dsum = 0.0; -#pragma omp parallel for - for(int i = 0; i < nitr; ++i) - { - auto dsendbuf = generate(nsize); - auto drecvbuf = allreduce(dsendbuf); - auto dtmp = std::accumulate(drecvbuf.begin(), drecvbuf.end(), 0.0); -#pragma omp atomic - dsum += dtmp; - } - return dsum; -} +void +scatter_gather(int num_elements_per_proc); + +//--------------------------------------------------------------------------------------// + +std::vector +create_rand_nums(int num_elements); + +//--------------------------------------------------------------------------------------// + +double +compute_avg(const std::vector& array); //--------------------------------------------------------------------------------------// PYBIND11_MODULE(libex_python_bindings, ex) { auto _run = [](int nitr, int nsize) { + auto value = 0.0; try { py::gil_scoped_release release; @@ -120,13 +126,16 @@ PYBIND11_MODULE(libex_python_bindings, ex) MPI_Comm_size(MPI_COMM_WORLD, &nrank); omp_set_num_threads(std::thread::hardware_concurrency() / nrank); #endif - return run(nitr, nsize); + value = run(nitr, nsize); } catch(std::exception& e) { fprintf(stderr, "Error! %s\n", e.what()); throw; } - return 0.0; + + scatter_gather(nitr * nsize); + + return value; }; ex.def("run", _run, "Run a calculation", py::arg("nitr") = 10, @@ -134,3 +143,134 @@ PYBIND11_MODULE(libex_python_bindings, ex) } //--------------------------------------------------------------------------------------// + +double +run(int nitr, int nsize) +{ + rng.seed(54561434UL); + + printf("[%s] Running MPI algorithm with %i iterations and %i entries...\n", __func__, + nitr, nsize); + + double dsum = 0.0; +#pragma omp parallel for + for(int i = 0; i < nitr; ++i) + { + auto dsendbuf = generate(nsize); + auto drecvbuf = allreduce(dsendbuf); + auto dtmp = std::accumulate(drecvbuf.begin(), drecvbuf.end(), 0.0); +#pragma omp atomic + dsum += dtmp; + } + return dsum; +} + +void +scatter_gather(int num_elements_per_proc) +{ + if(num_elements_per_proc == 0) + return; + +#if defined(USE_MPI) || defined(TIMEMORY_USE_MPI) + auto n = num_elements_per_proc; + + // Seed the random number generator to get different results each time + srand(time(NULL)); + + int world_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + int world_size; + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + + // decide who is the master rank + int master_rank = rand() % world_size; + if(num_elements_per_proc % 2 == 0) + { + int root = 0; + if(world_rank == root) + { + // If we are the root process, send our data to everyone + for(int i = 0; i < world_size; i++) + { + if(i != world_rank) + MPI_Send(&master_rank, 1, MPI_INT, i, n % world_size, MPI_COMM_WORLD); + } + } + else + { + // If we are a receiver process, receive the data from the root + MPI_Recv(&master_rank, 1, MPI_INT, root, n % world_size, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + } + else + { + MPI_Barrier(MPI_COMM_WORLD); + MPI_Bcast(&master_rank, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Barrier(MPI_COMM_WORLD); + } + + if(world_rank == master_rank) + printf("Master rank: %i, Number of elements per process: %i\n", master_rank, + num_elements_per_proc); + + // Create a random array of elements on the root process. Its total + // size will be the number of elements per process times the number + // of processes + std::vector rand_nums; + if(world_rank == master_rank) + rand_nums = create_rand_nums(num_elements_per_proc * world_size); + + // For each process, create a buffer that will hold a subset of the entire array + std::vector sub_rand_nums(num_elements_per_proc, 0.0); + + // Scatter the random numbers from the root process to all processes in the MPI world + MPI_Scatter(rand_nums.data(), num_elements_per_proc, MPI_DOUBLE, sub_rand_nums.data(), + num_elements_per_proc, MPI_DOUBLE, master_rank, MPI_COMM_WORLD); + + // Compute the average of your subset + double sub_avg = compute_avg(sub_rand_nums); + + // Gather all partial averages down to the root process + std::vector sub_avgs; + if(world_rank == master_rank) + sub_avgs.resize(world_size, 0.0); + + MPI_Gather(&sub_avg, 1, MPI_DOUBLE, sub_avgs.data(), 1, MPI_DOUBLE, master_rank, + MPI_COMM_WORLD); + + // Now that we have all of the partial averages on the root, compute the + // total average of all numbers. Since we are assuming each process computed + // an average across an equal amount of elements, this computation will + // produce the correct answer. + if(world_rank == master_rank) + { + double avg = compute_avg(sub_avgs); + double original_data_avg = compute_avg(rand_nums); + printf("Avg of all elements is %10.8f, Avg computed across original data is " + "%10.8f\n", + avg, original_data_avg); + } + + MPI_Barrier(MPI_COMM_WORLD); +#else + (void) compute_avg(create_rand_nums(num_elements_per_proc)); +#endif +} + +// Creates an array of random numbers. Each number has a value from 0 - 1 +std::vector +create_rand_nums(int num_elements) +{ + std::vector rand_nums(num_elements, 0.0); + for(auto& itr : rand_nums) + itr = (rand() / (double) RAND_MAX); + return rand_nums; +} + +// Computes the average of an array of numbers +double +compute_avg(const std::vector& array) +{ + return std::accumulate(array.begin(), array.end(), 0.0) / array.size(); +} diff --git a/pyctest-runner.py b/pyctest-runner.py index 7855637f4..b16938ec5 100755 --- a/pyctest-runner.py +++ b/pyctest-runner.py @@ -1436,56 +1436,43 @@ def add_timem_test(name, cmd): ) if args.python: + _bindings_cmd = [sys.executable, "./ex_python_bindings"] if dmprun is not None: - pyct.test( - construct_name("ex-python-bindings-ref"), - construct_command( - [dmprun] - + dmpargs - + [ - sys.executable, - "./ex_python_bindings", - ], - args, + _bindings_cmd = [dmprun] + dmpargs + _bindings_cmd + + pyct.test( + construct_name("ex-python-bindings-ref"), + construct_command(_bindings_cmd, args), + { + "WORKING_DIRECTORY": pyct.BINARY_DIRECTORY, + "LABELS": pyct.PROJECT_NAME, + "TIMEOUT": "120", + "ENVIRONMENT": ";".join( + [ + base_env, + "TIMEMORY_OUTPUT_PATH=timemory-ex-python-bindings-output-ref", + ] ), - { - "WORKING_DIRECTORY": pyct.BINARY_DIRECTORY, - "LABELS": pyct.PROJECT_NAME, - "TIMEOUT": "120", - "ENVIRONMENT": ";".join( - [ - base_env, - "TIMEMORY_OUTPUT_PATH=timemory-ex-python-bindings-output-ref", - ] - ), - }, - ) + }, + ) - pyct.test( - construct_name("ex-python-bindings"), - construct_command( - [dmprun] - + dmpargs - + [ - sys.executable, - "./ex_python_bindings", - ], - args, + pyct.test( + construct_name("ex-python-bindings"), + construct_command(_bindings_cmd, args), + { + "WORKING_DIRECTORY": pyct.BINARY_DIRECTORY, + "LABELS": pyct.PROJECT_NAME, + "TIMEOUT": "120", + "DEPENDS": construct_name("ex-python-bindings-ref"), + "ENVIRONMENT": ";".join( + [ + base_env, + "TIMEMORY_INPUT_PATH=timemory-ex-python-bindings-output-ref", + "TIMEMORY_DIFF_OUTPUT=ON", + ] ), - { - "WORKING_DIRECTORY": pyct.BINARY_DIRECTORY, - "LABELS": pyct.PROJECT_NAME, - "TIMEOUT": "120", - "DEPENDS": construct_name("ex-python-bindings-ref"), - "ENVIRONMENT": ";".join( - [ - base_env, - "TIMEMORY_INPUT_PATH=timemory-ex-python-bindings-output-ref", - "TIMEMORY_DIFF_OUTPUT=ON", - ] - ), - }, - ) + }, + ) if args.caliper: pyct.test( diff --git a/source/python/README.md b/source/python/README.md index e1dc08b35..86ed5eeb1 100644 --- a/source/python/README.md +++ b/source/python/README.md @@ -24,7 +24,7 @@ PACKAGE CONTENTS component (package) ert (package) hardware_counters (package) - libpytimemory + libs (package) line_profiler (package) mpi (package) mpi_support (package) @@ -46,12 +46,12 @@ SUBMODULES CLASSES pybind11_builtins.pybind11_object(builtins.object) - timemory.libpytimemory.auto_timer - timemory.libpytimemory.component_bundle - timemory.libpytimemory.manager - timemory.libpytimemory.rss_usage - timemory.libpytimemory.settings - timemory.libpytimemory.timer + timemory.libs.auto_timer + timemory.libs.component_bundle + timemory.libs.manager + timemory.libs.rss_usage + timemory.libs.settings + timemory.libs.timer class auto_timer(...) class component_bundle(...) @@ -151,7 +151,7 @@ FUNCTIONS Enable/disable timemory DATA - __all__ = ['version_info', 'build_info', 'version', 'libpytimemory', '... + __all__ = ['version_info', 'build_info', 'version', 'libs', '... __copyright__ = 'Copyright 2020, The Regents of the University of Cali... __email__ = 'jrmadsen@lbl.gov' __license__ = 'MIT' diff --git a/source/python/libpytimemory-components.cpp b/source/python/libpytimemory-components.cpp index 0595cd99a..d73c67f2d 100644 --- a/source/python/libpytimemory-components.cpp +++ b/source/python/libpytimemory-components.cpp @@ -37,11 +37,29 @@ #include "libpytimemory-components.hpp" #include "timemory/components/extern.hpp" #include "timemory/enum.h" +#include "timemory/operations/types/python_class_name.hpp" #include "timemory/timemory.hpp" +#include +#include + namespace pyinternal { // +struct cinfo +{ + bool available = false; + std::string class_name = {}; + std::string id = {}; + std::set ids = {}; + + template + auto find(Tp&& _v) const + { + return ids.find(std::forward(_v)); + } +}; +// //--------------------------------------------------------------------------------------// /// variadic wrapper around each component allowing to to accept arguments that it /// doesn't actually accept and implement functions it does not actually implement @@ -49,53 +67,13 @@ template using pytuple_t = tim::lightweight_tuple; /// a python object generator function via a string ID using keygen_t = std::function; -/// pairs a set of matching strings to a generator function -using keyset_t = std::pair, keygen_t>; +/// component info paired to a generator function +using keyset_t = std::pair; /// a python object generator function via an enumeration ID using indexgen_t = std::function; // //--------------------------------------------------------------------------------------// // -static inline std::string -get_class_name(std::string id) -{ - static const std::set delim{ - '_', - '-', - }; - - if(id.empty()) - return std::string{}; - - id = tim::settings::tolower(id); - - // capitalize after every delimiter - for(size_t i = 0; i < id.size(); ++i) - { - if(i == 0) - id.at(i) = toupper(id.at(i)); - else - { - if(delim.find(id.at(i)) != delim.end() && i + 1 < id.length()) - { - id.at(i + 1) = toupper(id.at(i + 1)); - ++i; - } - } - } - // remove all delimiters - for(auto ditr : delim) - { - size_t _pos = 0; - while((_pos = id.find(ditr)) != std::string::npos) - id = id.erase(_pos, 1); - } - - return id; -} -// -//--------------------------------------------------------------------------------------// -// template ::value, int> = 0> static inline auto @@ -654,7 +632,7 @@ generate(py::module& _pymod, std::array& _boolgen, static_assert(property_t::specialized(), "Error! Missing specialization"); - std::string id = get_class_name(property_t::enum_string()); + std::string id = tim::operation::python_class_name{}(); std::string cid = property_t::id(); auto _init = []() { return new bundle_t{}; }; @@ -730,7 +708,8 @@ generate(py::module& _pymod, std::array& _boolgen, std::set _keys = property_t::ids(); _keys.insert(id); _boolgen[Idx] = true; - _keygen[Idx] = { _keys, []() { return py::cast(new bundle_t{}); } }; + _keygen[Idx] = { cinfo{ true, id, cid, _keys }, + []() { return py::cast(new bundle_t{}); } }; auto idx = static_cast(Idx); _pycomp.def_static("index", [idx]() { return idx; }, @@ -758,7 +737,7 @@ generate(py::module& _pymod, std::array& _boolgen, return; using property_t = tim::component::properties; using bundle_t = pytuple_t; - std::string id = get_class_name(property_t::enum_string()); + std::string id = tim::operation::python_class_name{}(); std::string cid = property_t::id(); std::string _desc = "not available"; @@ -808,8 +787,10 @@ generate(py::module& _pymod, std::array& _boolgen, _pycomp.def_property_readonly_static("has_value", _false, "Whether the component has an accessible value"); + std::set _keys = property_t::ids(); + _keys.insert(id); _boolgen[Idx] = false; - _keygen[Idx] = { {}, []() { return py::none{}; } }; + _keygen[Idx] = { cinfo{ false, id, cid, _keys }, []() { return py::none{}; } }; auto idx = static_cast(Idx); _pycomp.def_static("index", [idx]() { return idx; }, @@ -841,22 +822,6 @@ components(py::module& _pymod, std::array& _boolgen, TIMEMORY_FOLD_EXPRESSION(pyinternal::generate(_pymod, _boolgen, _keygen)); } // -//--------------------------------------------------------------------------------------// -// -template -static auto -get_available(std::index_sequence) -{ - constexpr size_t N = sizeof...(Idx); - std::array _avail_array; - _avail_array.fill(false); - TIMEMORY_FOLD_EXPRESSION( - _avail_array[Idx] = - tim::component::enumerator::value && - !tim::concepts::is_placeholder>::value); - return _avail_array; -} -// } // namespace pyinternal // //======================================================================================// @@ -883,13 +848,33 @@ generate(py::module& _pymod) pyinternal::components(_pycomp, _boolgen, _keygen, std::make_index_sequence{}); - auto _is_available = [](py::object _obj) { + std::vector> _available_and_names{}; + std::vector _names{}; + std::vector _available_names{}; + + _available_and_names.reserve(N); + _names.reserve(N); + _available_names.reserve(N); + + for(const auto& itr : _keygen) + { + _available_and_names.emplace_back(itr.first.available, itr.first.id); + if(itr.first.id.empty()) + continue; + _names.emplace_back(itr.first.id); + if(itr.first.available) + _available_names.emplace_back(itr.first.id); + } + + _available_and_names.shrink_to_fit(); + _available_names.shrink_to_fit(); + _names.shrink_to_fit(); + + auto _is_available = [_available_and_names](py::object _obj) { auto _enum_val = pytim::get_enum(_obj); if(_enum_val >= TIMEMORY_COMPONENTS_END) return false; - static auto _available = pyinternal::get_available( - tim::make_index_sequence{}); - return _available.at(static_cast(_enum_val)); + return _available_and_names.at(static_cast(_enum_val)).first; }; auto _keygenerator = [_keygen, _boolgen](std::string _key) { @@ -899,7 +884,7 @@ generate(py::module& _pymod) { if(!_boolgen[i++]) continue; - if(itr.first.find(_key) != itr.first.end()) + if(itr.first.ids.find(_key) != itr.first.ids.end()) return itr.second; } pyinternal::keygen_t _nogen = []() -> py::object { return py::none{}; }; @@ -926,6 +911,11 @@ generate(py::module& _pymod) _pycomp.def("get_generator", _indexgenerator, "Get a functor for generating the component whose enumeration ID (see " "`help(timemory.component.id)`) match the given enumeration ID"); + _pycomp.def("get_types", [_names]() { return _names; }, + "Get the names of all the component types regardless of whether " + "available or not"); + _pycomp.def("get_available_types", [_available_names]() { return _available_names; }, + "Get the names of all the component types which are available"); return _pycomp; } diff --git a/source/python/libpytimemory.cpp b/source/python/libpytimemory.cpp index ca88f56f0..e225a2742 100644 --- a/source/python/libpytimemory.cpp +++ b/source/python/libpytimemory.cpp @@ -237,7 +237,7 @@ PYBIND11_MODULE(libpytimemory, tim) auto _exit_action = [=](int nsig) { if(_master_manager) { - std::cout << "Finalizing after signal: " << nsig << " :: " + std::cerr << "Finalizing after signal: " << nsig << " :: " << tim::signal_settings::str( static_cast(nsig)) << std::endl; @@ -470,7 +470,7 @@ PYBIND11_MODULE(libpytimemory, tim) using tuple_type = tim::convert_t>; auto json_str = manager_t::get_storage::serialize(_types); if(tim::settings::debug()) - std::cout << "JSON CLASSIC:\n" << json_str << std::endl; + std::cerr << "JSON CLASSIC:\n" << json_str << std::endl; return json_str; }; //----------------------------------------------------------------------------------// @@ -485,7 +485,7 @@ PYBIND11_MODULE(libpytimemory, tim) oa->finishNode(); } if(tim::settings::debug()) - std::cout << "JSON HIERARCHY:\n" << ss.str() << std::endl; + std::cerr << "JSON HIERARCHY:\n" << ss.str() << std::endl; return ss.str(); }; //----------------------------------------------------------------------------------// @@ -797,6 +797,151 @@ PYBIND11_MODULE(libpytimemory, tim) //----------------------------------------------------------------------------------// tim.def("stop_mallocp", &timemory_stop_mallocp, "Deactivate Memory Allocation profiling", py::arg("id")); + //----------------------------------------------------------------------------------// + enum PyProfilingIndex + { + PyProfilingIndex_MPIP = 0, + PyProfilingIndex_OMPT, + PyProfilingIndex_NCCLP, + PyProfilingIndex_MALLOCP, + PyProfilingIndex_END + }; + //----------------------------------------------------------------------------------// + using PyProfilingIndex_array_t = std::array; + using strset_t = std::set; + //----------------------------------------------------------------------------------// + std::map PyProfilingIndex_names = { + { PyProfilingIndex_MPIP, strset_t{ "mpip", "mpi" } }, + { PyProfilingIndex_OMPT, strset_t{ "ompt", "openmp" } }, + { PyProfilingIndex_NCCLP, strset_t{ "ncclp", "nccl" } }, + { PyProfilingIndex_MALLOCP, strset_t{ "mallocp", "malloc", "memory" } } + }; + //----------------------------------------------------------------------------------// + tim.def( + "start_function_wrappers", + [PyProfilingIndex_names](py::args _args, py::kwargs _kwargs) { + PyProfilingIndex_array_t _ret{}; + _ret.fill(0); + + if(!_args && !_kwargs) + return _ret; + + std::map _data{}; + auto _get_str = [](py::handle itr) { + return tim::settings::tolower(itr.cast()); + }; + + if(_args) + { + for(auto itr : _args) + _data.emplace(_get_str(itr), true); + } + + if(_kwargs) + { + for(auto itr : _kwargs) + _data.emplace(_get_str(itr.first), itr.second.cast()); + } + + for(const auto& itr : _data) + { + const auto& _key = itr.first; + const auto& _value = itr.second; + DEBUG_PRINT_HERE("starting %s : %s", _key.c_str(), + (_value) ? "yes" : "no"); + if(PyProfilingIndex_names.at(PyProfilingIndex_MPIP).count(_key) > 0) + { + if(_value) + _ret.at(PyProfilingIndex_MPIP) = timemory_start_mpip(); + } + else if(PyProfilingIndex_names.at(PyProfilingIndex_OMPT).count(_key) > 0) + { + if(_value) + _ret.at(PyProfilingIndex_OMPT) = timemory_start_ompt(); + } + else if(PyProfilingIndex_names.at(PyProfilingIndex_NCCLP).count(_key) > 0) + { + if(_value) + _ret.at(PyProfilingIndex_NCCLP) = timemory_start_ncclp(); + } + else if(PyProfilingIndex_names.at(PyProfilingIndex_MALLOCP).count(_key) > + 0) + { + if(_value) + _ret.at(PyProfilingIndex_MALLOCP) = timemory_start_mallocp(); + } + else + { + std::stringstream _msg{}; + _msg << "Error! Unknown profiling mode: \"" << _key + << "\". Acceptable identifiers: "; + std::stringstream _opts{}; + for(const auto& pitr : PyProfilingIndex_names) + { + for(const auto& eitr : pitr.second) + _opts << ", " << eitr; + } + _msg << _opts.str().substr(2); + throw std::runtime_error(_msg.str()); + } + } + + if(tim::settings::debug()) + { + std::cerr << "[start_function_wrappers]> values :"; + for(auto itr : _ret) + std::cerr << " " << itr; + std::cerr << std::endl; + } + return _ret; + }, + "Start profiling MPI (mpip), OpenMP (ompt), NCCL (ncclp), and/or memory " + "allocations (mallocp). Example: start_function_wrappers(mpi=True, ...)"); + //----------------------------------------------------------------------------------// + tim.def("stop_function_wrappers", + [PyProfilingIndex_names](PyProfilingIndex_array_t _arg) { + auto _print_arg = [=, &_arg](const std::string& _label) { + std::cerr << "[stop_function_wrappers|" << _label << "]> values :"; + for(auto itr : _arg) + std::cerr << " " << itr; + std::cerr << std::endl; + }; + auto _print_entry = [=, &_arg](uint64_t _idx) { + std::cerr << "stopping " << (*PyProfilingIndex_names.at(_idx).begin()) + << " : " << _arg.at(_idx) << std::endl; + }; + + if(tim::settings::debug()) + _print_arg("input"); + for(size_t i = 0; i < _arg.size(); ++i) + { + if(tim::settings::debug()) + _print_entry(i); + switch(i) + { + case PyProfilingIndex_MPIP: + _arg.at(i) = timemory_stop_mpip(_arg.at(i)); + break; + case PyProfilingIndex_OMPT: + _arg.at(i) = timemory_stop_ompt(_arg.at(i)); + break; + case PyProfilingIndex_NCCLP: + _arg.at(i) = timemory_stop_ncclp(_arg.at(i)); + break; + case PyProfilingIndex_MALLOCP: + _arg.at(i) = timemory_stop_mallocp(_arg.at(i)); + break; + case PyProfilingIndex_END: break; + } + } + if(tim::settings::debug()) + _print_arg("return"); + return _arg; + }, + "Stop profiling MPI (mpip), OpenMP (ompt), NCCL (ncclp), and/or memory " + "allocations (mallocp). Provide return value from " + "start_function_wrappers(mpi=True, " + "...)"); //==================================================================================// // @@ -932,17 +1077,17 @@ PYBIND11_MODULE(libpytimemory, tim) auto _handle_data = [&](std::string str) { if(tim::settings::debug() || tim::settings::verbose() > 2) - std::cout << "[timemory-socket][server]> received: " << str << std::endl; + std::cerr << "[timemory-socket][server]> received: " << str << std::endl; _results.emplace_back(std::move(str)); }; if(tim::settings::debug() || tim::settings::verbose() > 2) - std::cout << "[timemory-socket][server]> started listening..." << std::endl; + std::cerr << "[timemory-socket][server]> started listening..." << std::endl; tim::socket::manager{}.listen(_name, _port, _handle_data, _max_packets); if(tim::settings::debug() || tim::settings::verbose() > 2) - std::cout << "[timemory-socket][server]> stopped listening..." << std::endl; + std::cerr << "[timemory-socket][server]> stopped listening..." << std::endl; return _results; }; diff --git a/source/timemory/components/gotcha/backends.hpp b/source/timemory/components/gotcha/backends.hpp index 9f9d06398..b1a732941 100644 --- a/source/timemory/components/gotcha/backends.hpp +++ b/source/timemory/components/gotcha/backends.hpp @@ -47,65 +47,6 @@ namespace component { // //======================================================================================// -// -class gotcha_suppression -{ -private: - template - friend struct gotcha; - - template - struct gotcha_invoker; - - template - friend struct operation::init_storage; - - template - friend struct user_bundle; - - friend struct opaque; - - static bool& get() - { - static thread_local bool _instance = false; - return _instance; - } - -public: - struct auto_toggle - { - explicit auto_toggle(bool& _value, bool _if_equal = false) - : m_value(_value) - , m_if_equal(_if_equal) - { - if(m_value == m_if_equal) - { - m_value = !m_value; - m_did_toggle = true; - } - } - - ~auto_toggle() - { - if(m_value != m_if_equal && m_did_toggle) - { - m_value = !m_value; - } - } - - auto_toggle(const auto_toggle&) = delete; - auto_toggle(auto_toggle&&) = delete; - auto_toggle& operator=(const auto_toggle&) = delete; - auto_toggle& operator=(auto_toggle&&) = delete; - - private: - bool& m_value; - bool m_if_equal; - bool m_did_toggle = false; - }; -}; -// -//======================================================================================// /// /// \struct tim::component::gotcha_invoker /// diff --git a/source/timemory/components/gotcha/components.hpp b/source/timemory/components/gotcha/components.hpp index 62c953497..37a6fd1a1 100644 --- a/source/timemory/components/gotcha/components.hpp +++ b/source/timemory/components/gotcha/components.hpp @@ -320,6 +320,11 @@ struct gotcha if(!_data.filled) { auto _label = demangle(_func); + + // ensure the hash to string pairing is stored + storage_type::instance()->add_hash_id(_func); + storage_type::instance()->add_hash_id(_label); + if(_tool.length() > 0 && _label.find(_tool + "/") != 0) { _label = _tool + "/" + _label; @@ -886,6 +891,26 @@ struct gotcha //----------------------------------------------------------------------------------// + static inline void toggle_suppress_on(bool* _bsuppress, bool& _did) + { + if(_bsuppress && *_bsuppress == false) + { + *(_bsuppress) = true; + _did = true; + } + } + + static inline void toggle_suppress_off(bool* _bsuppress, bool& _did) + { + if(_bsuppress && _did == true && *_bsuppress == true) + { + *(_bsuppress) = false; + _did = false; + } + } + + //----------------------------------------------------------------------------------// + template static TIMEMORY_NOINLINE Ret wrap(Args... _args) { @@ -893,22 +918,34 @@ struct gotcha #if defined(TIMEMORY_USE_GOTCHA) auto& _data = get_data()[N]; - // PRINT_HERE("%s", _data.tool_id.c_str()); - static constexpr bool void_operator = std::is_same::value; static_assert(void_operator, "operator_type should be void!"); + // protects against TLS calling malloc when malloc is wrapped + static bool _protect_tls_alloc = false; - typedef Ret (*func_t)(Args...); + using func_t = Ret (*)(Args...); func_t _orig = (func_t)(gotcha_get_wrappee(_data.wrappee)); - if(_data.is_finalized) - return (_orig) ? (*_orig)(_args...) : Ret{}; + if(!_orig) + { + PRINT_HERE("nullptr to original function! wrappee: %s", + _data.tool_id.c_str()); + return Ret{}; + } + if(_data.is_finalized || _protect_tls_alloc) + return (*_orig)(_args...); + + _protect_tls_alloc = true; auto _suppress = gotcha_suppression::get() || (_data.suppression && *_data.suppression); + _protect_tls_alloc = false; + if(!_data.ready || _suppress) { + _protect_tls_alloc = true; static thread_local bool _recursive = false; + _protect_tls_alloc = false; if(!_recursive && _data.debug && *_data.debug) { _recursive = true; @@ -921,62 +958,40 @@ struct gotcha fflush(stderr); _recursive = false; } - return (_orig) ? (*_orig)(_args...) : Ret{}; + return (*_orig)(_args...); } bool did_data_toggle = false; bool did_glob_toggle = false; - auto toggle_suppress_on = [](bool* _bsuppress, bool& _did) { - if(_bsuppress && *_bsuppress == false) - { - *(_bsuppress) = true; - _did = true; - } - }; - - auto toggle_suppress_off = [](bool* _bsuppress, bool& _did) { - if(_bsuppress && _did == true && *_bsuppress == true) - { - *(_bsuppress) = false; - _did = false; - } - }; - - if(_orig) - { - // make sure the function is not recursively entered - // (important for allocation-based wrappers) - _data.ready = false; - toggle_suppress_on(_data.suppression, did_data_toggle); - - // bundle_type is always: component_{tuple,list,bundle} - toggle_suppress_on(&gotcha_suppression::get(), did_glob_toggle); - // - bundle_type _obj{ _data.tool_id }; - _obj.construct(_args...); - _obj.start(); - _obj.audit(_data, audit::incoming{}, _args...); - toggle_suppress_off(&gotcha_suppression::get(), did_glob_toggle); + // make sure the function is not recursively entered + // (important for allocation-based wrappers) + _data.ready = false; + toggle_suppress_on(_data.suppression, did_data_toggle); - _data.ready = true; - Ret _ret = invoke(_obj, _orig, std::forward(_args)...); - _data.ready = false; + // bundle_type is always: component_{tuple,list,bundle} + toggle_suppress_on(&gotcha_suppression::get(), did_glob_toggle); + // + bundle_type _obj{ _data.tool_id }; + _obj.construct(_args...); + _obj.start(); + _obj.audit(_data, audit::incoming{}, _args...); + toggle_suppress_off(&gotcha_suppression::get(), did_glob_toggle); - toggle_suppress_on(&gotcha_suppression::get(), did_glob_toggle); - _obj.audit(_data, audit::outgoing{}, _ret); - _obj.stop(); - toggle_suppress_off(&gotcha_suppression::get(), did_glob_toggle); + _data.ready = true; + Ret _ret = invoke(_obj, _orig, std::forward(_args)...); + _data.ready = false; - // allow re-entrance into wrapper - toggle_suppress_off(_data.suppression, did_data_toggle); - _data.ready = true; + toggle_suppress_on(&gotcha_suppression::get(), did_glob_toggle); + _obj.audit(_data, audit::outgoing{}, _ret); + _obj.stop(); + toggle_suppress_off(&gotcha_suppression::get(), did_glob_toggle); - return _ret; - } + // allow re-entrance into wrapper + toggle_suppress_off(_data.suppression, did_data_toggle); + _data.ready = true; - if(_data.debug && *_data.debug) - PRINT_HERE("%s", "nullptr to original function!"); + return _ret; #else consume_parameters(_args...); PRINT_HERE("%s", "should not be here!"); @@ -993,25 +1008,37 @@ struct gotcha #if defined(TIMEMORY_USE_GOTCHA) auto& _data = get_data()[N]; - // PRINT_HERE("%s", _data.tool_id.c_str()); - static constexpr bool void_operator = std::is_same::value; static_assert(void_operator, "operator_type should be void!"); + // protects against TLS calling malloc when malloc is wrapped + static bool _protect_tls_alloc = false; + + using func_t = void (*)(Args...); + auto _orig = (func_t)(gotcha_get_wrappee(_data.wrappee)); - auto _orig = (void (*)(Args...)) gotcha_get_wrappee(_data.wrappee); + if(!_orig) + { + PRINT_HERE("nullptr to original function! wrappee: %s", + _data.tool_id.c_str()); + return; + } - if(_data.is_finalized) + if(_data.is_finalized || _protect_tls_alloc) { - if(_orig) - (*_orig)(_args...); + (*_orig)(_args...); return; } + _protect_tls_alloc = true; auto _suppress = gotcha_suppression::get() || (_data.suppression && *_data.suppression); + _protect_tls_alloc = false; + if(!_data.ready || _suppress) { + _protect_tls_alloc = true; static thread_local bool _recursive = false; + _protect_tls_alloc = false; if(!_recursive && _data.debug && *_data.debug) { _recursive = true; @@ -1024,63 +1051,38 @@ struct gotcha fflush(stderr); _recursive = false; } - if(_orig) - (*_orig)(_args...); + (*_orig)(_args...); return; } bool did_data_toggle = false; bool did_glob_toggle = false; - auto toggle_suppress_on = [](bool* _bsuppress, bool& _did) { - if(_bsuppress && *_bsuppress == false) - { - *(_bsuppress) = true; - _did = true; - } - }; - - auto toggle_suppress_off = [](bool* _bsuppress, bool& _did) { - if(_bsuppress && _did == true && *_bsuppress == true) - { - *(_bsuppress) = false; - _did = false; - } - }; - // make sure the function is not recursively entered // (important for allocation-based wrappers) _data.ready = false; toggle_suppress_on(_data.suppression, did_data_toggle); toggle_suppress_on(&gotcha_suppression::get(), did_glob_toggle); - if(_orig) - { - // - bundle_type _obj{ _data.tool_id }; - _obj.construct(_args...); - _obj.start(); - _obj.audit(_data, audit::incoming{}, _args...); - toggle_suppress_off(&gotcha_suppression::get(), did_glob_toggle); + // + bundle_type _obj{ _data.tool_id }; + _obj.construct(_args...); + _obj.start(); + _obj.audit(_data, audit::incoming{}, _args...); + toggle_suppress_off(&gotcha_suppression::get(), did_glob_toggle); - _data.ready = true; - invoke(_obj, _orig, std::forward(_args)...); - _data.ready = false; + _data.ready = true; + invoke(_obj, _orig, std::forward(_args)...); + _data.ready = false; - toggle_suppress_on(&gotcha_suppression::get(), did_glob_toggle); - _obj.audit(_data, audit::outgoing{}); - _obj.stop(); - } - else if(_data.debug && *_data.debug) - { - PRINT_HERE("%s", "nullptr to original function!"); - } + toggle_suppress_on(&gotcha_suppression::get(), did_glob_toggle); + _obj.audit(_data, audit::outgoing{}); + _obj.stop(); // allow re-entrance into wrapper toggle_suppress_off(&gotcha_suppression::get(), did_glob_toggle); toggle_suppress_off(_data.suppression, did_data_toggle); _data.ready = true; - #else consume_parameters(_args...); PRINT_HERE("%s", "should not be here!"); diff --git a/source/timemory/components/gotcha/suppression.hpp b/source/timemory/components/gotcha/suppression.hpp new file mode 100644 index 000000000..239b384f9 --- /dev/null +++ b/source/timemory/components/gotcha/suppression.hpp @@ -0,0 +1,96 @@ +// MIT License +// +// Copyright (c) 2020, The Regents of the University of California, +// through Lawrence Berkeley National Laboratory (subject to receipt of any +// required approvals from the U.S. Dept. of Energy). All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "timemory/macros/attributes.hpp" +#include "timemory/mpl/concepts.hpp" // component::gotcha forward decl + +#include + +namespace tim +{ +namespace component +{ +// +class gotcha_suppression +{ +private: + template + friend struct gotcha; + + static TIMEMORY_NOINLINE bool& get() + { + static thread_local bool _instance = false; + return _instance; + } + +public: + struct auto_toggle + { + explicit auto_toggle(bool& _value, bool _if_equal = false); + auto_toggle(std::false_type); + auto_toggle(std::true_type); + ~auto_toggle(); + auto_toggle(const auto_toggle&) = delete; + auto_toggle(auto_toggle&&) = delete; + auto_toggle& operator=(const auto_toggle&) = delete; + auto_toggle& operator=(auto_toggle&&) = delete; + + private: + bool& m_value; + bool m_if_equal; + bool m_did_toggle = false; + }; +}; +// +inline gotcha_suppression::auto_toggle::auto_toggle(bool& _value, bool _if_equal) +: m_value{ _value } +, m_if_equal{ _if_equal } +{ + if(m_value == m_if_equal) + { + m_value = !m_value; + m_did_toggle = true; + } +} +// +inline gotcha_suppression::auto_toggle::auto_toggle(std::false_type) +: auto_toggle{ get(), false } +{} +// +inline gotcha_suppression::auto_toggle::auto_toggle(std::true_type) +: auto_toggle{ get(), true } +{} +// +inline gotcha_suppression::auto_toggle::~auto_toggle() +{ + if(m_value != m_if_equal && m_did_toggle) + { + m_value = !m_value; + } +} +// +} // namespace component +} // namespace tim diff --git a/source/timemory/components/gotcha/types.hpp b/source/timemory/components/gotcha/types.hpp index fea9305cc..c0688c1dc 100644 --- a/source/timemory/components/gotcha/types.hpp +++ b/source/timemory/components/gotcha/types.hpp @@ -24,6 +24,7 @@ #pragma once +#include "timemory/components/gotcha/suppression.hpp" #include "timemory/components/macros.hpp" #include "timemory/enum.h" #include "timemory/mpl/type_traits.hpp" diff --git a/source/timemory/hash/definition.hpp b/source/timemory/hash/definition.hpp index 628c03b61..8da039dd5 100644 --- a/source/timemory/hash/definition.hpp +++ b/source/timemory/hash/definition.hpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -114,14 +115,11 @@ TIMEMORY_HASH_LINKAGE(std::string) get_hash_identifier(const hash_map_ptr_t& _hash_map, const hash_alias_ptr_t& _hash_alias, hash_value_t _hash_id) { - auto _map_itr = _hash_map->find(_hash_id); - auto _alias_itr = _hash_alias->find(_hash_id); - + auto _map_itr = _hash_map->find(_hash_id); if(_map_itr != _hash_map->end()) - { return _map_itr->second; - } + auto _alias_itr = _hash_alias->find(_hash_id); if(_alias_itr != _hash_alias->end()) { _map_itr = _hash_map->find(_alias_itr->second); @@ -129,24 +127,84 @@ get_hash_identifier(const hash_map_ptr_t& _hash_map, const hash_alias_ptr_t& _ha return _map_itr->second; } + for(const auto& aitr : *_hash_alias) + { + if(_hash_id == aitr.first) + { + for(const auto& mitr : *_hash_map) + { + if(mitr.first == aitr.second) + { + fprintf(stderr, + "[%s@%s:%i]> found hash identifier %llu in alias map via " + "iteration after uomap->find failed! This might be an ABI or " + "an integer overflow problem\n", + __FUNCTION__, + TIMEMORY_TRUNCATED_FILE_STRING(__FILE__).c_str(), __LINE__, + (unsigned long long) _hash_id); + return mitr.second; + } + } + } + } + + for(const auto& mitr : *_hash_map) + { + if(_hash_id == mitr.first) + { + fprintf(stderr, + "[%s@%s:%i]> found hash identifier %llu in hash map via iteration " + "after uomap->find failed! This might be an ABI or an integer " + "overflow problem\n", + __FUNCTION__, TIMEMORY_TRUNCATED_FILE_STRING(__FILE__).c_str(), + __LINE__, (unsigned long long) _hash_id); + return mitr.second; + } + } + if(_hash_id > 0) { std::stringstream ss; ss << "Error! node with hash " << _hash_id - << " does not have an associated string!"; -# if defined(DEBUG) - ss << "\nHash map:\n"; - auto _w = 30; - for(const auto& itr : *_hash_map) - ss << " " << std::setw(_w) << itr.first << " : " << (itr.second) << "\n"; - if(_hash_alias->size() > 0) + << " does not have an associated string!\n"; + static std::set _reported{}; + if(_reported.count(_hash_id) == 0) { - ss << "Alias hash map:\n"; - for(const auto& itr : *_hash_alias) - ss << " " << std::setw(_w) << itr.first << " : " << itr.second << "\n"; + _reported.emplace(_hash_id); + bool _found_direct = (_hash_map->find(_hash_id) != _hash_map->end()); + ss << " Found in map : " << std::boolalpha << _found_direct << '\n'; + bool _found_alias = (_hash_alias->find(_hash_id) != _hash_alias->end()); + ss << " Found in alias map : " << std::boolalpha << _found_alias << '\n'; + if(_found_alias) + { + auto aitr = _hash_alias->find(_hash_id); + ss << " Found aliasing : " << aitr->first << " -> " << aitr->second + << '\n'; + auto mitr = _hash_map->find(aitr->second); + if(mitr != _hash_map->end()) + ss << " Found mapping : " << mitr->first << " -> " << mitr->second + << '\n'; + else + ss << " Missing mapping\n"; + } + else + { + ss << " Missing aliasing\n"; + } + ss << " Hash map:\n"; + auto _w = 20; + for(const auto& itr : *_hash_map) + ss << " " << std::setw(_w) << itr.first << " : " << (itr.second) + << "\n"; + if(_hash_alias->size() > 0) + { + ss << " Alias hash map:\n"; + for(const auto& itr : *_hash_alias) + ss << " " << std::setw(_w) << itr.first << " : " << itr.second + << "\n"; + } + fprintf(stderr, "%s", ss.str().c_str()); } -# endif - fprintf(stderr, "%s\n", ss.str().c_str()); } return std::string("unknown-hash=") + std::to_string(_hash_id); diff --git a/source/timemory/operations/types.hpp b/source/timemory/operations/types.hpp index e9517be25..ff1be89fc 100644 --- a/source/timemory/operations/types.hpp +++ b/source/timemory/operations/types.hpp @@ -1055,6 +1055,11 @@ struct dummy // //--------------------------------------------------------------------------------------// // +template +struct python_class_name; +// +//--------------------------------------------------------------------------------------// +// namespace finalize { // @@ -1305,28 +1310,26 @@ struct print protected: // do not lint misc-non-private-member-variables-in-classes - settings_t m_settings = settings::shared_instance(); // NOLINT - bool debug = false; // NOLINT - bool update = true; // NOLINT - bool json_forced = false; // NOLINT - bool node_init = dmp::is_initialized(); // NOLINT - int32_t node_rank = dmp::rank(); // NOLINT - int32_t node_size = dmp::size(); // NOLINT - int32_t verbose = 0; // NOLINT - int64_t max_depth = 0; // NOLINT - int64_t max_call_stack = std::numeric_limits::max(); // NOLINT - int64_t data_concurrency = 1; // NOLINT - int64_t input_concurrency = 1; // NOLINT - std::string label = ""; // NOLINT - std::string description = ""; // NOLINT - std::string text_outfname = ""; // NOLINT - std::string tree_outfname = ""; // NOLINT - std::string json_outfname = ""; // NOLINT - std::string json_inpfname = ""; // NOLINT - std::string text_diffname = ""; // NOLINT - std::string json_diffname = ""; // NOLINT - stream_type data_stream = stream_type{}; // NOLINT - stream_type diff_stream = stream_type{}; // NOLINT + settings_t m_settings = settings::shared_instance(); // NOLINT + bool debug = false; // NOLINT + bool update = true; // NOLINT + bool json_forced = false; // NOLINT + bool node_init = dmp::is_initialized(); // NOLINT + int32_t node_rank = dmp::rank(); // NOLINT + int32_t node_size = dmp::size(); // NOLINT + int32_t verbose = 0; // NOLINT + int64_t max_depth = 0; // NOLINT + int64_t max_call_stack = std::numeric_limits::max(); // NOLINT + std::string label = ""; // NOLINT + std::string description = ""; // NOLINT + std::string text_outfname = ""; // NOLINT + std::string tree_outfname = ""; // NOLINT + std::string json_outfname = ""; // NOLINT + std::string json_inpfname = ""; // NOLINT + std::string text_diffname = ""; // NOLINT + std::string json_diffname = ""; // NOLINT + stream_type data_stream = stream_type{}; // NOLINT + stream_type diff_stream = stream_type{}; // NOLINT }; // //--------------------------------------------------------------------------------------// @@ -1391,7 +1394,7 @@ struct print : public base::print if(file_output()) { if(json_output()) - print_json(json_outfname, node_results, data_concurrency); + print_json(json_outfname, node_results); if(tree_output()) print_tree(tree_outfname, node_tree); if(text_output()) @@ -1417,19 +1420,13 @@ struct print : public base::print if(file_output()) { if(json_output()) - print_json(json_diffname, node_delta, data_concurrency); + print_json(json_diffname, node_delta); if(text_output()) print_text(text_diffname, diff_stream); if(plot_output()) { std::stringstream ss; ss << "Difference vs. " << json_inpfname; - if(input_concurrency != data_concurrency) - { - auto delta_conc = (data_concurrency - input_concurrency); - ss << " with " << delta_conc << " " - << ((delta_conc > 0) ? "more" : "less") << "threads"; - } print_plot(json_diffname, ss.str()); } } @@ -1465,8 +1462,7 @@ struct print : public base::print TIMEMORY_COLD virtual void print_tree(const std::string& fname, result_tree& rt); TIMEMORY_COLD void write_stream(stream_type& stream, result_type& results); - TIMEMORY_COLD void print_json(const std::string& fname, result_type& results, - int64_t concurrency); + TIMEMORY_COLD void print_json(const std::string& fname, result_type& results); TIMEMORY_COLD const auto& get_data() const { return data; } TIMEMORY_COLD const auto& get_node_results() const { return node_results; } TIMEMORY_COLD const auto& get_node_input() const { return node_input; } diff --git a/source/timemory/operations/types/assemble.hpp b/source/timemory/operations/types/assemble.hpp index bb7b0bee9..bb0726cec 100644 --- a/source/timemory/operations/types/assemble.hpp +++ b/source/timemory/operations/types/assemble.hpp @@ -91,8 +91,7 @@ struct assemble TIMEMORY_DELETED_OBJECT(assemble) private: - using derived_tuple_t = typename trait::derivation_types::type; - static constexpr size_t derived_tuple_v = std::tuple_size::value; + using derived_tuple_t = typename trait::derivation_types::type; template using derived_t = typename std::tuple_element::type; @@ -100,7 +99,8 @@ struct assemble template explicit assemble(type& obj, Args&&... args); - template 0)> = 0> + template ::value, + std::enable_if_t<(N > 0)> = 0> explicit assemble(type& obj, Arg&& arg) { bool b = false; @@ -113,7 +113,7 @@ struct assemble explicit assemble(type& obj, BundleT& arg) { bool b = false; - constexpr auto N = derived_tuple_v; + constexpr auto N = std::tuple_size::value; sfinae(b, obj, make_index_sequence{}, arg); if(!b) sfinae(obj, 0, 0, arg); diff --git a/source/timemory/operations/types/derive.hpp b/source/timemory/operations/types/derive.hpp index e06bd3222..be1385425 100644 --- a/source/timemory/operations/types/derive.hpp +++ b/source/timemory/operations/types/derive.hpp @@ -89,8 +89,7 @@ struct derive TIMEMORY_DELETED_OBJECT(derive) private: - using derived_tuple_t = typename trait::derivation_types::type; - static constexpr size_t derived_tuple_v = std::tuple_size::value; + using derived_tuple_t = typename trait::derivation_types::type; template using derived_t = typename std::tuple_element::type; @@ -98,7 +97,8 @@ struct derive template explicit derive(type& obj, Args&&... args); - template = 0> + template ::value, + std::enable_if_t = 0> explicit derive(type& obj, Arg&& arg) { bool b = false; @@ -111,7 +111,7 @@ struct derive explicit derive(type& obj, BundleT& arg) { bool b = false; - constexpr auto N = derived_tuple_v; + constexpr auto N = std::tuple_size::value; sfinae(b, obj, make_index_sequence{}, arg); if(!b) sfinae(obj, 0, 0, arg); diff --git a/source/timemory/operations/types/finalize/print.hpp b/source/timemory/operations/types/finalize/print.hpp index 825136416..0bc9302fa 100644 --- a/source/timemory/operations/types/finalize/print.hpp +++ b/source/timemory/operations/types/finalize/print.hpp @@ -125,7 +125,6 @@ print::print(storage_type* _data, const settings_t& _settings) node_results = data->dmp_get(); if(tree_output()) node_tree = data->dmp_get(node_tree); - data_concurrency = data->instance_count().load(); dmp::barrier(); settings::indent_width(Tp::get_width()); @@ -247,12 +246,6 @@ print::setup() write_stream(diff_stream, node_delta); std::stringstream ss; ss << description << " vs. " << json_inpfname; - if(input_concurrency != data_concurrency) - { - auto delta_conc = (data_concurrency - input_concurrency); - ss << " with " << delta_conc << " " << ((delta_conc > 0) ? "more" : "less") - << "threads"; - } diff_stream->set_banner(ss.str()); } } @@ -364,7 +357,6 @@ print::update_data() node_results = data->dmp_get(); if(tree_output()) node_tree = data->dmp_get(node_tree); - data_concurrency = data->instance_count().load(); dmp::barrier(); if(m_settings->get_debug()) @@ -401,12 +393,6 @@ print::update_data() write_stream(diff_stream, node_delta); std::stringstream ss; ss << description << " vs. " << json_inpfname; - if(input_concurrency != data_concurrency) - { - auto delta_conc = (data_concurrency - input_concurrency); - ss << " with " << delta_conc << " " << ((delta_conc > 0) ? "more" : "less") - << "threads"; - } diff_stream->set_banner(ss.str()); } @@ -445,7 +431,7 @@ print::update_data() // template void -print::print_json(const std::string& outfname, result_type& results, int64_t) +print::print_json(const std::string& outfname, result_type& results) { using policy_type = policy::output_archive_t; if(outfname.length() > 0) diff --git a/source/timemory/operations/types/python_class_name.hpp b/source/timemory/operations/types/python_class_name.hpp new file mode 100644 index 000000000..707430474 --- /dev/null +++ b/source/timemory/operations/types/python_class_name.hpp @@ -0,0 +1,118 @@ +// MIT License +// +// Copyright (c) 2020, The Regents of the University of California, +// through Lawrence Berkeley National Laboratory (subject to receipt of any +// required approvals from the U.S. Dept. of Energy). All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include + +namespace tim +{ +// +namespace component +{ +template +struct properties; +} +// +namespace operation +{ +// +template +struct python_class_name; +// +//--------------------------------------------------------------------------------------// +// +/// +/// \struct tim::operation::python_class_name +/// \brief This class generates the class name for a component according to the standard +/// Python naming convention +/// +// +//--------------------------------------------------------------------------------------// +// +template <> +struct python_class_name +{ + std::string operator()(std::string id) const; +}; +// +template +struct python_class_name : python_class_name +{ + using type = Tp; + using base_type = python_class_name; + + using base_type::operator(); + std::string operator()() const + { + using properties_t = component::properties; + static_assert(properties_t::specialized(), + "Error! Cannot get python class name if the component properties " + "have not been specialized"); + return this->base_type::operator()(properties_t::enum_string()); + } +}; +// +//--------------------------------------------------------------------------------------// +// +inline std::string +python_class_name::operator()(std::string id) const +{ + if(id.empty()) + return std::string{}; + + for(auto& itr : id) + itr = ::tolower(itr); + + // capitalize after every delimiter + for(size_t i = 0; i < id.size(); ++i) + { + if(i == 0) + id.at(i) = ::toupper(id.at(i)); + else + { + if((id.at(i) == '_' || id.at(i) == '-') && i + 1 < id.length()) + { + id.at(i + 1) = ::toupper(id.at(i + 1)); + ++i; + } + } + } + // remove all delimiters + for(auto ditr : { '_', '-' }) + { + size_t _pos = 0; + while((_pos = id.find(ditr)) != std::string::npos) + id = id.erase(_pos, 1); + } + + return id; +} +// +//--------------------------------------------------------------------------------------// +// +} // namespace operation +} // namespace tim diff --git a/source/timemory/runtime/properties.hpp b/source/timemory/runtime/properties.hpp index be2cb0091..a6ef2853f 100644 --- a/source/timemory/runtime/properties.hpp +++ b/source/timemory/runtime/properties.hpp @@ -68,10 +68,8 @@ get_hash(std::string&& key) // //--------------------------------------------------------------------------------------// // -template > -enable_if_t::value && - !concepts::is_runtime_configurable::value, - void> +template +enable_if_t::value && I != V, void> do_enumerator_generate(std::vector& opaque_array, int idx, Args&&... args) { using type = component::enumerator_t; @@ -88,10 +86,8 @@ do_enumerator_generate(std::vector& opaque_array, int idx, Args&& // //--------------------------------------------------------------------------------------// // -template > -enable_if_t::value || - concepts::is_runtime_configurable::value, - void> +template +enable_if_t::value || I == V, void> do_enumerator_generate(std::vector&, int, Args&&...) {} // @@ -101,14 +97,13 @@ do_enumerator_generate(std::vector&, int, Args&&...) // //--------------------------------------------------------------------------------------// // -template > -enable_if_t::value && - !concepts::is_runtime_configurable::value, - void> +template +enable_if_t::value, void> do_enumerator_init(Tp& obj, int idx, Args&&... args) { using type = component::enumerator_t; - IF_CONSTEXPR(!concepts::is_placeholder::value) + IF_CONSTEXPR(!concepts::is_placeholder::value && + !std::is_same, type>::value) { if(idx == I) obj.template initialize(std::forward(args)...); @@ -117,10 +112,8 @@ do_enumerator_init(Tp& obj, int idx, Args&&... args) // //--------------------------------------------------------------------------------------// // -template > -enable_if_t::value || - concepts::is_runtime_configurable::value, - void> +template +enable_if_t::value, void> do_enumerator_init(Tp&, int, Args&&...) {} // @@ -133,8 +126,7 @@ do_enumerator_enumerate(component_match_vector_t& _vec, component_match_index_t& { using type = component::enumerator_t; constexpr auto _is_ph = concepts::is_placeholder::value; - constexpr auto _is_rt = concepts::is_runtime_configurable::value; - IF_CONSTEXPR(!_is_ph && !_is_rt) + IF_CONSTEXPR(!_is_ph) { std::string _id = component::properties::id(); if(_id != "TIMEMORY_COMPONENTS_END") @@ -170,9 +162,10 @@ template void enumerator_insert(Tp& obj, int idx, int_sequence, Args&&... args) { - std::vector opaque_array; - TIMEMORY_FOLD_EXPRESSION( - do_enumerator_generate(opaque_array, idx, std::forward(args)...)); + constexpr int TpV = component::properties::value; + std::vector opaque_array{}; + TIMEMORY_FOLD_EXPRESSION(do_enumerator_generate( + opaque_array, idx, std::forward(args)...)); for(auto&& itr : opaque_array) obj.insert(std::move(itr.first), std::move(itr.second)); } @@ -183,9 +176,10 @@ template void enumerator_configure(int idx, int_sequence, Args&&... args) { - std::vector opaque_array; - TIMEMORY_FOLD_EXPRESSION( - do_enumerator_generate(opaque_array, idx, std::forward(args)...)); + constexpr int TpV = component::properties::value; + std::vector opaque_array{}; + TIMEMORY_FOLD_EXPRESSION(do_enumerator_generate( + opaque_array, idx, std::forward(args)...)); for(auto&& itr : opaque_array) Tp::configure(std::move(itr.first), std::move(itr.second)); } @@ -196,9 +190,10 @@ template void enumerator_configure(Tp& obj, int idx, int_sequence, Args&&... args) { - std::vector opaque_array; - TIMEMORY_FOLD_EXPRESSION( - do_enumerator_generate(opaque_array, idx, std::forward(args)...)); + constexpr int TpV = component::properties::value; + std::vector opaque_array{}; + TIMEMORY_FOLD_EXPRESSION(do_enumerator_generate( + opaque_array, idx, std::forward(args)...)); for(auto&& itr : opaque_array) obj.configure(std::move(itr.first), std::move(itr.second)); } diff --git a/source/timemory/settings/tsettings.hpp b/source/timemory/settings/tsettings.hpp index 1a34109dd..1b0be7ff0 100644 --- a/source/timemory/settings/tsettings.hpp +++ b/source/timemory/settings/tsettings.hpp @@ -51,7 +51,7 @@ namespace tim /// \struct tim::tsettings /// \brief Implements a specific setting template -struct tsettings : public vsettings +struct tsettings final : public vsettings { private: template diff --git a/source/timemory/storage/declaration.hpp b/source/timemory/storage/declaration.hpp index 007ee54e6..3a099e9ed 100644 --- a/source/timemory/storage/declaration.hpp +++ b/source/timemory/storage/declaration.hpp @@ -797,7 +797,7 @@ storage::master_instance() /// and the serialization library are responsible for most of the timemory compilation /// time. template -class storage : public impl::storage::value> +class storage final : public impl::storage::value> { public: static constexpr bool uses_value_storage_v = trait::uses_value_storage::value; diff --git a/source/timemory/storage/definition.hpp b/source/timemory/storage/definition.hpp index 3e79b92c9..df3c5bd90 100644 --- a/source/timemory/storage/definition.hpp +++ b/source/timemory/storage/definition.hpp @@ -792,8 +792,11 @@ storage::_data() m_settings->get_debug() && m_settings->get_verbose() > 1, 16); } - if(m_node_ids.empty()) - m_node_ids[0][0] = m_graph_data_instance->current(); + if(m_node_ids.empty() && m_graph_data_instance) + { + m_node_ids.emplace(0, iterator_hash_submap_t{}); + m_node_ids.at(0).emplace(0, m_graph_data_instance->current()); + } } m_initialized = true; @@ -822,10 +825,13 @@ storage::merge() l.lock(); for(auto& itr : m_children) - { - if(itr != this) - itr->data().clear(); - } + singleton_t::remove(itr); + + // for(auto& itr : m_children) + // { + // if(itr != this) + // itr->data().clear(); + // } stack_clear(); } diff --git a/source/timemory/storage/ring_buffer.cpp b/source/timemory/storage/ring_buffer.cpp index dd1fa5534..57925ffdf 100644 --- a/source/timemory/storage/ring_buffer.cpp +++ b/source/timemory/storage/ring_buffer.cpp @@ -39,13 +39,18 @@ # include #endif +#if !defined(TIMEMORY_RING_BUFFER_INLINE) +# define TIMEMORY_RING_BUFFER_INLINE +#endif + namespace tim { namespace base { -// +TIMEMORY_RING_BUFFER_INLINE ring_buffer::~ring_buffer() { destroy(); } -// + +TIMEMORY_RING_BUFFER_INLINE void ring_buffer::init(size_t _size) { @@ -71,7 +76,15 @@ ring_buffer::init(size_t _size) m_read_count = 0; m_write_count = 0; + if(!m_use_mmap_explicit) + m_use_mmap = get_env("TIMEMORY_USE_MMAP", m_use_mmap); + #if defined(TIMEMORY_LINUX) + if(!m_use_mmap) + { + m_ptr = malloc(m_size * sizeof(char)); + return; + } // Set file path depending on whether shared memory is compiled in or not. # ifdef SHM char path[] = "/dev/shm/rb-XXXXXX"; @@ -110,32 +123,52 @@ ring_buffer::init(size_t _size) MAP_FIXED | MAP_SHARED, m_fd, 0) == MAP_FAILED) destroy(); #else - m_ptr = malloc(m_size * sizeof(char)); + m_use_mmap = false; + m_ptr = malloc(m_size * sizeof(char)); (void) m_fd; #endif } +TIMEMORY_RING_BUFFER_INLINE void ring_buffer::destroy() { m_init = false; + if(!m_ptr) + return; #if defined(TIMEMORY_LINUX) - // Truncate file to zero, to avoid writing back memory to file, on munmap. - if(ftruncate(m_fd, 0) < 0) + if(!m_use_mmap) + { + ::free(m_ptr); + } + else { - bool _cond = settings::verbose() > 0 || settings::debug(); - CONDITIONAL_PRINT_HERE( - _cond, "Ring buffer failed to truncate the file descriptor %i\n", m_fd); + // Truncate file to zero, to avoid writing back memory to file, on munmap. + if(ftruncate(m_fd, 0) < 0) + { + bool _cond = settings::verbose() > 0 || settings::debug(); + CONDITIONAL_PRINT_HERE( + _cond, "Ring buffer failed to truncate the file descriptor %i\n", m_fd); + } + // Unmap the mapped virtual memmory. + auto ret = munmap(m_ptr, m_size * 2); + // Close the backing file. + close(m_fd); + if(ret) + perror("munmap"); } - // Unmap the mapped virtual memmory. - auto ret = munmap(m_ptr, m_size * 2); - // Close the backing file. - close(m_fd); - if(ret) - perror("munmap"); #else ::free(m_ptr); #endif + m_ptr = nullptr; +} + +TIMEMORY_RING_BUFFER_INLINE +void +ring_buffer::set_use_mmap(bool _v) +{ + m_use_mmap = _v; + m_use_mmap_explicit = true; } } // namespace base diff --git a/source/timemory/storage/ring_buffer.hpp b/source/timemory/storage/ring_buffer.hpp index 9e09dd761..77ec312f4 100644 --- a/source/timemory/storage/ring_buffer.hpp +++ b/source/timemory/storage/ring_buffer.hpp @@ -62,17 +62,21 @@ struct ring_buffer /// Write data to buffer. template - size_t write(Tp* in, std::enable_if_t::value, int> = 0); + std::pair write(Tp* in, + std::enable_if_t::value, int> = 0); template - size_t write(Tp* in, std::enable_if_t::value, int> = 0); + std::pair write(Tp* in, + std::enable_if_t::value, int> = 0); /// Read data from buffer. template - size_t read(Tp* out, std::enable_if_t::value, int> = 0) const; + std::pair read( + Tp* out, std::enable_if_t::value, int> = 0) const; template - size_t read(Tp* out, std::enable_if_t::value, int> = 0) const; + std::pair read( + Tp* out, std::enable_if_t::value, int> = 0) const; /// Returns number of bytes currently held by the buffer. size_t count() const { return m_write_count - m_read_count; } @@ -86,8 +90,15 @@ struct ring_buffer /// Returns if the buffer is full. bool is_full() const { return count() == m_size; } + /// Rewind the read position n bytes size_t rewind(size_t n) const; + /// explicitly configure to use mmap if avail + void set_use_mmap(bool); + + /// query whether using mmap + bool get_use_mmap() const { return m_use_mmap; } + private: /// Returns the current write pointer. void* write_ptr() const @@ -99,20 +110,22 @@ struct ring_buffer void* read_ptr() const { return static_cast(m_ptr) + (m_read_count % m_size); } private: - bool m_init = false; - int m_fd = 0; - void* m_ptr = nullptr; - size_t m_size = 0; - mutable size_t m_read_count = 0; - size_t m_write_count = 0; + bool m_init = false; + bool m_use_mmap = true; + bool m_use_mmap_explicit = false; + int m_fd = 0; + void* m_ptr = nullptr; + size_t m_size = 0; + mutable size_t m_read_count = 0; + size_t m_write_count = 0; }; // template -size_t +std::pair ring_buffer::write(Tp* in, std::enable_if_t::value, int>) { if(in == nullptr) - return 0; + return { 0, nullptr }; auto _length = sizeof(Tp); @@ -121,22 +134,24 @@ ring_buffer::write(Tp* in, std::enable_if_t::value, int>) if(_length > free()) _length = free(); + // pointer in buffer + Tp* out = reinterpret_cast(write_ptr()); + // Copy in. - new(write_ptr()) Tp{ *in }; - // memcpy(write_ptr(), in, _length); + new((void*) out) Tp{ *in }; // Update write count m_write_count += _length; - return _length; + return { _length, out }; } // template -size_t +std::pair ring_buffer::write(Tp* in, std::enable_if_t::value, int>) { if(in == nullptr) - return 0; + return { 0, nullptr }; auto _length = sizeof(Tp); @@ -145,21 +160,24 @@ ring_buffer::write(Tp* in, std::enable_if_t::value, int>) if(_length > free()) _length = free(); + // pointer in buffer + Tp* out = reinterpret_cast(write_ptr()); + // Copy in. - memcpy(write_ptr(), in, _length); + memcpy((void*) out, in, _length); // Update write count m_write_count += _length; - return _length; + return { _length, out }; } // template -size_t +std::pair ring_buffer::read(Tp* out, std::enable_if_t::value, int>) const { if(is_empty() || out == nullptr) - return 0; + return { 0, nullptr }; auto _length = sizeof(Tp); @@ -167,21 +185,24 @@ ring_buffer::read(Tp* out, std::enable_if_t::value, int>) cons if(_length > count()) _length = count(); + // pointer in buffer + Tp* in = reinterpret_cast(read_ptr()); + // Copy out for BYTE, nothing magic here. - *out = *(reinterpret_cast(read_ptr())); + *out = *in; // Update read count. m_read_count += _length; - return _length; + return { _length, in }; } // template -size_t +std::pair ring_buffer::read(Tp* out, std::enable_if_t::value, int>) const { if(is_empty() || out == nullptr) - return 0; + return { 0, nullptr }; auto _length = sizeof(Tp); @@ -192,17 +213,21 @@ ring_buffer::read(Tp* out, std::enable_if_t::value, int>) con _length = count(); assert(out != nullptr); + + // pointer in buffer + Tp* in = reinterpret_cast(read_ptr()); + // Copy out for BYTE, nothing magic here. Up* _out = const_cast(out); - memcpy(_out, read_ptr(), _length); + memcpy(_out, in, _length); // Update read count. m_read_count += _length; - return _length; + return { _length, in }; } // -size_t +inline size_t ring_buffer::rewind(size_t n) const { if(n > m_read_count) @@ -248,11 +273,11 @@ struct ring_buffer : private base::ring_buffer /// Write data to buffer. size_t data_size() { return sizeof(Tp); } - /// Write data to buffer. - size_t write(Tp* in) { return base_type::write(in); } + /// Write data to buffer. Return pointer to location of write + Tp* write(Tp* in) { return base_type::write(in).second; } - /// Read data from buffer. - size_t read(Tp* out) const { return base_type::read(out); } + /// Read data from buffer. Return pointer to location of read + Tp* read(Tp* out) const { return base_type::read(out).second; } /// Returns number of bytes currently held by the buffer. size_t count() const { return base_type::count() / sizeof(Tp); } @@ -292,5 +317,12 @@ struct ring_buffer : private base::ring_buffer } // namespace tim #if !defined(TIMEMORY_COMMON_SOURCE) && !defined(TIMEMORY_USE_COMMON_EXTERN) +# if !defined(TIMEMORY_RING_BUFFER_INLINE) +# define TIMEMORY_RING_BUFFER_INLINE inline +# endif # include "timemory/storage/ring_buffer.cpp" +#else +# if !defined(TIMEMORY_RING_BUFFER_INLINE) +# define TIMEMORY_RING_BUFFER_INLINE +# endif #endif diff --git a/source/timemory/utility/filepath.hpp b/source/timemory/utility/filepath.hpp index 9b1744809..88a8968bc 100644 --- a/source/timemory/utility/filepath.hpp +++ b/source/timemory/utility/filepath.hpp @@ -31,21 +31,9 @@ #pragma once -#include +#include "timemory/macros/os.hpp" -//--------------------------------------------------------------------------------------// -// base operating system - -#if defined(_WIN32) || defined(_WIN64) -# if !defined(TIMEMORY_WINDOWS) -# define TIMEMORY_WINDOWS -# endif -#elif defined(__APPLE__) || defined(__MACH__) || defined(__linux__) || \ - defined(__linux) || defined(linux) || defined(__gnu_linux__) -# if !defined(TIMEMORY_UNIX) -# define TIMEMORY_UNIX -# endif -#endif +#include //--------------------------------------------------------------------------------------// diff --git a/source/timemory/utility/popen.cpp b/source/timemory/utility/popen.cpp index d79b78459..ea5a87165 100644 --- a/source/timemory/utility/popen.cpp +++ b/source/timemory/utility/popen.cpp @@ -23,6 +23,8 @@ // SOFTWARE. // +#include "timemory/macros/os.hpp" + #if !defined(TIMEMORY_WINDOWS) # include "timemory/utility/popen.hpp" diff --git a/source/timemory/utility/popen.hpp b/source/timemory/utility/popen.hpp index 02b48d0ff..5edbf6206 100644 --- a/source/timemory/utility/popen.hpp +++ b/source/timemory/utility/popen.hpp @@ -25,6 +25,7 @@ #pragma once +#include "timemory/macros/os.hpp" #include "timemory/utility/macros.hpp" #include "timemory/utility/utility.hpp" diff --git a/source/timemory/variadic/macros.hpp b/source/timemory/variadic/macros.hpp index e53ef169d..a0c0367d0 100644 --- a/source/timemory/variadic/macros.hpp +++ b/source/timemory/variadic/macros.hpp @@ -36,6 +36,7 @@ #include "timemory/compat/macros.h" #include "timemory/general/source_location.hpp" +#include "timemory/macros/os.hpp" #include "timemory/mpl/apply.hpp" #include "timemory/utility/macros.hpp" #include "timemory/utility/utility.hpp" diff --git a/source/tools/timemory-mallocp/timemory-mallocp.cpp b/source/tools/timemory-mallocp/timemory-mallocp.cpp index 037e489e3..f3929ae9b 100644 --- a/source/tools/timemory-mallocp/timemory-mallocp.cpp +++ b/source/tools/timemory-mallocp/timemory-mallocp.cpp @@ -73,10 +73,28 @@ extern "C" }; tim::manager::instance()->add_cleanup("timemory-mallocp", _cleanup); - global_id = global_cnt; + global_id = global_cnt + 1; } + + // start the wrappers _handle->start(); - return global_cnt++; + // if the first instance, actually call malloc and free to pre-allocate + // storage and disable if a null pointer was returned + if(global_cnt == 0) + { + size_t _size = 8 * sizeof(char); + char* _buff = static_cast(::malloc(_size)); + if(_buff == nullptr) + { + PRINT_HERE("timemory_mallocp was started but malloc(%i) returned a " + "nullptr. Disabling timemory_mallocp", + static_cast(_size)); + _handle->stop(); + return global_cnt; + } + ::free(_buff); + } + return ++global_cnt; } else { diff --git a/timemory/CMakeLists.txt b/timemory/CMakeLists.txt index a660c9214..8b6dcf822 100644 --- a/timemory/CMakeLists.txt +++ b/timemory/CMakeLists.txt @@ -26,7 +26,7 @@ file(RELATIVE_PATH LIB_RELPATH "${_PYLIB}" set(PYTHON_SUBMODULE_FOLDERS analyze api ert mpi mpi_support plotting profiler roofline util bundle component hardware_counters test trace region - libs) + tools libs) file(GLOB PYTHON_SUBMODULE_FILES ${PROJECT_SOURCE_DIR}/${PROJECT_NAME}/*.py) string(REPLACE "${PROJECT_SOURCE_DIR}/${PROJECT_NAME}/" "" @@ -181,6 +181,6 @@ if(TIMEMORY_USE_PYTHON) timemory_python_console_script("timemory-python-trace" "timemory.trace" "main") if(TIMEMORY_BUILD_PYTHON_LINE_PROFILER) - timemory_python_console_script("timemory-python-line-profiler" "timemory.profiler" "main") + timemory_python_console_script("timemory-python-line-profiler" "timemory.line_profiler" "main") endif() endif() diff --git a/timemory/__init__.py.in b/timemory/__init__.py.in index 3284cadea..cef65edc1 100644 --- a/timemory/__init__.py.in +++ b/timemory/__init__.py.in @@ -163,6 +163,7 @@ else: from . import mpi_support as mpi_support from . import mpi as mpi from . import util as util + from . import tools as tools from . import bundle as bundle from . import options as options from . import units as units @@ -180,6 +181,7 @@ else: sys.modules["timemory.mpi_support"] = mpi_support sys.modules["timemory.mpi"] = mpi sys.modules["timemory.util"] = util + sys.modules["timemory.tools"] = tools sys.modules["timemory.bundle"] = bundle sys.modules["timemory.options"] = options sys.modules["timemory.units"] = units diff --git a/timemory/libs/__init__.py b/timemory/libs/__init__.py index b1b018249..9c5ac7843 100644 --- a/timemory/libs/__init__.py +++ b/timemory/libs/__init__.py @@ -78,6 +78,8 @@ stop_ncclp, start_mallocp, stop_mallocp, + start_function_wrappers, + stop_function_wrappers, enable_signal_detection, disable_signal_detection, set_exit_action, @@ -142,6 +144,8 @@ "stop_ncclp", "start_mallocp", "stop_mallocp", + "start_function_wrappers", + "stop_function_wrappers", "enable_signal_detection", "disable_signal_detection", "set_exit_action", diff --git a/timemory/tools/__init__.py b/timemory/tools/__init__.py new file mode 100644 index 000000000..d9a3fe455 --- /dev/null +++ b/timemory/tools/__init__.py @@ -0,0 +1,148 @@ +#!@PYTHON_EXECUTABLE@ +# +# MIT License +# +# Copyright (c) 2018, The Regents of the University of California, +# through Lawrence Berkeley National Laboratory (subject to receipt of any +# required approvals from the U.S. Dept. of Energy). All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import + +__author__ = "Jonathan Madsen" +__copyright__ = "Copyright 2020, The Regents of the University of California" +__credits__ = ["Jonathan Madsen"] +__license__ = "MIT" +__version__ = "@PROJECT_VERSION@" +__maintainer__ = "Jonathan Madsen" +__email__ = "jrmadsen@lbl.gov" +__status__ = "Development" + +from functools import wraps + +try: + from ..libs.libpytimemory import ( + start_function_wrappers as _start_function_wrappers, + ) + from ..libs.libpytimemory import ( + stop_function_wrappers as _stop_function_wrappers, + ) + + available = True +except ImportError as e: + import os + + _debug = os.environ.get("TIMEMORY_DEBUG", "").lower() + if _debug in ["y", "yes", "1", "on", "true", "t"]: + import sys + + sys.stderr.write(f"{e}\n") + + def _start_function_wrappers(*_args, **_kwargs): + return None + + def _stop_function_wrappers(*_args, **_kwargs): + return None + + available = False + +__all__ = ["function_wrappers", "available"] + + +class function_wrappers(object): + """A decorator or context-manager for dynamic function wrappers (Linux-only). + These dynamic function wrappers either re-write the global offset table so that + timemory components can extract their arguments and return values (e.g. wrap around + malloc) or they enable the built-in callback API provided by the library itself + (e.g. OMPT for OpenMP). + + Valid inputs are currently any of the strings following the tool name: + - timemory-mpip: mpi, mpip + - timemory-ompt: ompt, openmp + - timemory-ncclp: nccl, ncclp + - timemory-mallocp: malloc, mallocp, memory + + Example: + + .. highlight:: python + .. code-block:: python + + @function_wrappers("mpi", "nccl", ompt=False) + def foo(): + pass + + def bar(): + with function_wrappers(mpi=True, memory=True): + pass + """ + + def __init__(self, *_args, **_kwargs): + def _start_functor(): + return _start_function_wrappers(*_args, **_kwargs) + + def _stop_functor(_idx): + _ret = _stop_function_wrappers(_idx) + return None if sum(_ret) == 0 else _ret + + self._start = _start_functor + self._stop = _stop_functor + self._idx = None + + def start(self): + self._idx = self._start() + + def stop(self): + self._idx = self._stop(self._idx) + + def is_running(self): + return self._idx is not None + + def __call__(self, func): + """ + Decorator + """ + + @wraps(func) + def function_wrapper(*args, **kwargs): + self._idx = self._start() + _ret = func(*args, **kwargs) + self._idx = self._stop(self._idx) + return _ret + + return function_wrapper + + def __enter__(self, *args, **kwargs): + """ + Context manager + """ + self._idx = self._start() + + def __exit__(self, exc_type, exc_value, exc_traceback): + self._idx = self._stop(self._idx) + if ( + exc_type is not None + and exc_value is not None + and exc_traceback is not None + ): + import traceback + + traceback.print_exception( + exc_type, exc_value, exc_traceback, limit=5 + )