diff --git a/CMakeLists.txt b/CMakeLists.txt index f060a5c933..137a952507 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,6 +68,8 @@ set( with-models OFF CACHE STRING "The models to include as a semicolon-separate set( tics_per_ms "1000.0" CACHE STRING "Specify elementary unit of time [default=1000 tics per ms]." ) set( tics_per_step "100" CACHE STRING "Specify resolution [default=100 tics per step]." ) set( with-detailed-timers OFF CACHE STRING "Build with detailed internal time measurements [default=OFF]. Detailed timers can affect the performance." ) +set( with-mpi-sync-timer OFF CACHE STRING "Build with mpi synchronization barrier and timer [default=OFF]. Can affect the performance." ) +set( with-threaded-timers ON CACHE STRING "Build with one internal timer per thread [default=ON]. Multi-threaded timers can affect the performance." ) set( target-bits-split "standard" CACHE STRING "Split of the 64-bit target neuron identifier type [default='standard']. 'standard' is recommended for most users. If running on more than 262144 MPI processes or more than 512 threads, change to 'hpc'." ) # generic build configuration @@ -143,6 +145,8 @@ nest_process_with_gsl() nest_process_with_openmp() nest_process_with_mpi() nest_process_with_detailed_timers() +nest_process_with_threaded_timers() +nest_process_with_mpi_sync_timer() nest_process_with_libneurosim() nest_process_with_music() nest_process_with_sionlib() diff --git a/cmake/ConfigureSummary.cmake b/cmake/ConfigureSummary.cmake index ea9610aa9a..686edb64bb 100644 --- a/cmake/ConfigureSummary.cmake +++ b/cmake/ConfigureSummary.cmake @@ -150,6 +150,20 @@ function( NEST_PRINT_CONFIG_SUMMARY ) message( "Detailed timers : No" ) endif () + message( "" ) + if ( THREADED_TIMERS ) + message( "Threaded timers : Yes" ) + else () + message( "Threaded timers : No" ) + endif () + + message( "" ) + if ( THREADED_TIMERS ) + message( "MPI sync timer : Yes" ) + else () + message( "MPI sync timer : No" ) + endif () + message( "" ) if ( HAVE_MUSIC ) message( "Use MUSIC : Yes (MUSIC ${MUSIC_VERSION})" ) diff --git a/cmake/ProcessOptions.cmake b/cmake/ProcessOptions.cmake index 09498146d1..fad5dfdc1f 100644 --- a/cmake/ProcessOptions.cmake +++ b/cmake/ProcessOptions.cmake @@ -462,6 +462,20 @@ function( NEST_PROCESS_WITH_DETAILED_TIMERS ) endif () endfunction() +function( NEST_PROCESS_WITH_THREADED_TIMERS ) + set( THREADED_TIMERS OFF PARENT_SCOPE ) + if ( ${with-threaded-timers} STREQUAL "ON" ) + set( THREADED_TIMERS ON PARENT_SCOPE ) + endif () +endfunction() + +function( NEST_PROCESS_WITH_MPI_SYNC_TIMER ) + set( MPI_SYNC_TIMER OFF PARENT_SCOPE ) + if ( ${with-mpi-sync-timer} STREQUAL "ON" ) + set( MPI_SYNC_TIMER ON PARENT_SCOPE ) + endif () +endfunction() + function( NEST_PROCESS_WITH_LIBNEUROSIM ) # Find libneurosim set( HAVE_LIBNEUROSIM OFF PARENT_SCOPE ) diff --git a/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst b/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst index 83460ab19c..f6dceba360 100644 --- a/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst +++ b/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst @@ -556,7 +556,7 @@ For example, the ``stopwatch.h`` file could look like: } inline nest::Stopwatch::timestamp_t - nest::Stopwatch::elapsed_timestamp() const + nest::Stopwatch::elapsed_us() const { #ifndef DISABLE_TIMING if ( isRunning() ) @@ -622,7 +622,7 @@ For example, the ``stopwatch.h`` file could look like: } inline nest::Stopwatch::timestamp_t - nest::Stopwatch::get_timestamp() + nest::Stopwatch::get_current_time() { // works with: // * hambach (Linux 2.6.32 x86_64) @@ -637,12 +637,12 @@ For example, the ``stopwatch.h`` file could look like: } /* namespace timer */ #endif /* STOPWATCH_H */ -And the corresponding ``stopwatch.cpp``: +And the corresponding ``stopwatch_impl.h``: .. code:: cpp /* - * stopwatch.cpp + * stopwatch_impl.h * * This file is part of NEST. * diff --git a/doc/htmldoc/installation/cmake_options.rst b/doc/htmldoc/installation/cmake_options.rst index a1283d537c..323d3c6127 100644 --- a/doc/htmldoc/installation/cmake_options.rst +++ b/doc/htmldoc/installation/cmake_options.rst @@ -102,7 +102,7 @@ For more details, see the :ref:`Python binding ` section be .. _performance_cmake: Maximize performance, reduce energy consumption -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The following options help to optimize NEST for maximal performance and thus reduced energy consumption. @@ -126,7 +126,7 @@ The following options help to optimize NEST for maximal performance and thus red in place. * Using ``-march=native`` requires that you build NEST on the same CPU architecture as you will use to run it. * For the technically minded: Even just using ``-O3`` removes some ``assert()`` statements from NEST since we - have wrapped some of them in functions, which get eliminated due to interprocedural optimization. + have wrapped some of them in functions, which get eliminated due to interprocedural optimization. @@ -197,8 +197,14 @@ NEST properties +-----------------------------------------------+----------------------------------------------------------------+ | ``-Dtics_per_step=[number]`` | Specify resolution [default=100 tics per step]. | +-----------------------------------------------+----------------------------------------------------------------+ +| ``-Dwith-threaded-timers=[OFF|ON]`` | Build with one internal timer per thread [default=ON]. | +| | Multi-threaded timers can affect the performance. | ++-----------------------------------------------+----------------------------------------------------------------+ | ``-Dwith-detailed-timers=[OFF|ON]`` | Build with detailed internal time measurements [default=OFF]. | | | Detailed timers can affect the performance. | ++----------------------------------------------------------------------------------------------------------------+ +| ``-Dwith-mpi-sync-timer=[OFF|ON]`` | Build with mpi synchronization barrier and timer [default=OFF].| +| | Can affect the performance. | +-----------------------------------------------+----------------------------------------------------------------+ | ``-Dtarget-bits-split=['standard'|'hpc']`` | Split of the 64-bit target neuron identifier type | | | [default='standard']. 'standard' is recommended for most users.| diff --git a/doc/htmldoc/nest_behavior/built-in_timers.rst b/doc/htmldoc/nest_behavior/built-in_timers.rst index 32c88d339f..77191fd518 100644 --- a/doc/htmldoc/nest_behavior/built-in_timers.rst +++ b/doc/htmldoc/nest_behavior/built-in_timers.rst @@ -6,11 +6,9 @@ Built-in timers Basic timers ------------ -Basic built-in timers keep track of the time NEST spent for network -construction and actual simulation (propagation of the network -state). These timers are active in all simulations with NEST, and the -measured times can be checked by querying the corresponding kernel -attributes. For example: +Basic built-in timers keep track of the time NEST spent for network construction and actual simulation (propagation of +the network state). These timers are active in all simulations with NEST, and the measured times can be checked by +querying the corresponding kernel attributes. For example: :: @@ -22,7 +20,7 @@ The following basic time measurements are available: |Name |Explanation | +=============================+==================================+ |``time_construction_create`` |Cumulative time NEST spent | -| |creating neurons and devices | +| |creating neurons and devices | +-----------------------------+----------------------------------+ |``time_construction_connect``|Cumulative time NEST spent | | |creating connections | @@ -33,19 +31,14 @@ The following basic time measurements are available: .. note :: - While preparing the actual simulation after network construction, - NEST needs to build the pre-synaptic part of the connection - infrastructure, which requires MPI communication (`Jordan et - al. 2018 `__). This - happens only for the first call to ``Simulate()`` unless - connectivity changed in the meantime, and it may cause significant - overhead by adding to ``time_simulate``. Therefore, the cumulative - time NEST spent for building the pre-synaptic connection - infrastructure is also tracked by a basic timer and available as - kernel attribute ``time_communicate_prepare``. + While preparing the actual simulation after network construction, NEST needs to build the pre-synaptic part of the + connection infrastructure, which requires MPI communication (`Jordan et al. 2018 + `__). This happens only for the first call to ``Simulate()`` unless + connectivity changed in the meantime, and it may cause significant overhead by adding to ``time_simulate``. + Therefore, the cumulative time NEST spent for building the pre-synaptic connection infrastructure is also tracked by + a basic timer and available as kernel attribute ``time_communicate_prepare``. -In the context of NEST performance monitoring, other useful kernel -attributes are: +In the context of NEST performance monitoring, other useful kernel attributes are: +-----------------------+----------------------------------+ |Name |Explanation | @@ -54,39 +47,34 @@ attributes are: +-----------------------+----------------------------------+ |``local_spike_counter``|Number of spikes emitted by the | | |neurons represented on this MPI | -| |rank during the last | +| |rank during the last | | |``Simulate()`` | +-----------------------+----------------------------------+ .. note :: - ``nest.ResetKernel()`` resets all time measurements as well as - ``biological_time`` and ``local_spike_counter``. + ``nest.ResetKernel()`` resets all time measurements as well as ``biological_time`` and ``local_spike_counter``. Detailed timers --------------- -Detailed built-in timers can be activated (and again deactivated) -prior to compilation through the CMake flag -``-Dwith-detailed-timers=ON``. They provide further insights into the -time NEST spends in different phases of the simulation cycle, but they -can impact the runtime. Therefore, detailed timers are by default -inactive. +Detailed built-in timers can be activated (and again deactivated) prior to compilation through the CMake flag +``-Dwith-detailed-timers=ON``. They provide further insights into the time NEST spends in different phases of the +simulation cycle, but they can impact the runtime. Therefore, detailed timers are by default inactive. -If detailed timers are active, the following time measurements are -available as kernel attributes: +If detailed timers are active, the following time measurements are available as kernel attributes: +--------------------------------+----------------------------------+----------------------------------+ |Name |Explanation |Part of | +================================+==================================+==================================+ |``time_gather_target_data`` |Cumulative time for communicating |``time_communicate_prepare`` | | |connection information from | | -| |postsynaptic to presynaptic side | | +| |postsynaptic to presynaptic side | | +--------------------------------+----------------------------------+----------------------------------+ |``time_communicate_target_data``|Cumulative time for core MPI |``time_gather_target_data`` | | |communication when gathering | | -| |target data | | +| |target data | | +--------------------------------+----------------------------------+----------------------------------+ |``time_update`` |Time for neuron update |``time_simulate`` | +--------------------------------+----------------------------------+----------------------------------+ @@ -107,3 +95,36 @@ available as kernel attributes: | |buffers of the corresponding | | | |postsynaptic neurons | | +--------------------------------+----------------------------------+----------------------------------+ +|``time_omp_synchronization_construction`` |Synchronization time of threads during network construction. |``time_construction_create``, ``time_construction_connect``, ``time_communicate_prepare`` | ++--------------------------------+----------------------------------+----------------------------------+ +|``time_omp_synchronization_simulation`` |Synchronization time of threads during simulation. |``time_simulate`` | ++--------------------------------+----------------------------------+----------------------------------+ + +MPI synchronization timer +------------------------- +In order to measure synchronization time between multiple MPI processes, an additional timer can be activated on demand +via the ``-Dwith-mpi-sync-timer=ON`` CMake flag. This timer measures the time between the end of a process' update phase +(i.e., neuron state propagation) and start of collective communication of spikes between all MPI processes. This timer +adds an additional MPI barrier right before the start of communication, which might affect performance. + ++-----------------------------+---------------------------------------+ +|Name |Explanation | ++=============================+=======================================+ +|``time_mpi_synchronization`` |Time spent waiting for other processes.| ++-----------------------------+---------------------------------------+ + +Multi-threaded timers +--------------------- +In previous NEST versions, only the master thread measured timers. Since NEST 3.9, all timers that are recorded in a +parallel (multi-threaded) environment are recorded by each thread individually. + +The legacy timer behavior can be restored via the ``-Dwith-threaded-timers=OFF`` CMake flag. + +Wall-time vs. CPU-time +------------------------- +All timers in NEST measure the actual wall-time spent between starting and stopping the timer. In order to only measure +time spent on calculations, there is an additional variant for each of the timers above, suffixed with ``_cpu``. They +can be accessed in the exact same way. For example: +:: + + nest.time_simulate_cpu diff --git a/libnestutil/CMakeLists.txt b/libnestutil/CMakeLists.txt index 3700edea51..0000aaac77 100644 --- a/libnestutil/CMakeLists.txt +++ b/libnestutil/CMakeLists.txt @@ -31,7 +31,6 @@ set( nestutil_sources numerics.h numerics.cpp regula_falsi.h sort.h - stopwatch.h stopwatch.cpp string_utils.h vector_util.h ) diff --git a/libnestutil/config.h.in b/libnestutil/config.h.in index 08b4251560..70998df6f3 100644 --- a/libnestutil/config.h.in +++ b/libnestutil/config.h.in @@ -182,6 +182,12 @@ /* Whether to enable detailed NEST internal timers */ #cmakedefine TIMER_DETAILED 1 +/* Whether to use one NEST internal timer per thread */ +#cmakedefine THREADED_TIMERS 1 + +/* Whether to use the mpi synchronization timer (including an additional barrier) */ +#cmakedefine MPI_SYNC_TIMER 1 + /* Whether to do full logging */ #cmakedefine ENABLE_FULL_LOGGING 1 diff --git a/libnestutil/stopwatch.cpp b/libnestutil/stopwatch.cpp deleted file mode 100644 index ec7a2b7676..0000000000 --- a/libnestutil/stopwatch.cpp +++ /dev/null @@ -1,33 +0,0 @@ -/* - * stopwatch.cpp - * - * This file is part of NEST. - * - * Copyright (C) 2004 The NEST Initiative - * - * NEST is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or - * (at your option) any later version. - * - * NEST is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with NEST. If not, see . - * - */ - -#include "stopwatch.h" - -namespace nest -{ -std::ostream& -operator<<( std::ostream& os, const Stopwatch& stopwatch ) -{ - stopwatch.print( "", Stopwatch::SECONDS, os ); - return os; -} -} diff --git a/libnestutil/stopwatch.h b/libnestutil/stopwatch.h deleted file mode 100644 index d30474faa2..0000000000 --- a/libnestutil/stopwatch.h +++ /dev/null @@ -1,290 +0,0 @@ -/* - * stopwatch.h - * - * This file is part of NEST. - * - * Copyright (C) 2004 The NEST Initiative - * - * NEST is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or - * (at your option) any later version. - * - * NEST is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with NEST. If not, see . - * - */ - -#ifndef STOPWATCH_H -#define STOPWATCH_H - -// C includes: -#include - -// C++ includes: -#include -#include - -namespace nest -{ - -/*********************************************************************** - * Stopwatch * - * Accumulates time between start and stop, and provides * - * the elapsed time with different time units. * - * * - * Partly inspired by com.google.common.base.Stopwatch.java * - * Not thread-safe: - Do not share stopwatches among threads. * - * - Let each thread have its own stopwatch. * - * * - * Usage example: * - * Stopwatch x; * - * x.start(); * - * // ... do computations for 15.34 sec * - * x.stop(); // only pauses stopwatch * - * x.print("Time needed "); // > Time needed 15.34 sec. * - * x.start(); // resumes stopwatch * - * // ... next computations for 11.22 sec * - * x.stop(); * - * x.print("Time needed "); // > Time needed 26,56 sec. * - * x.reset(); // reset to default values * - * x.start(); // starts the stopwatch from 0 * - * // ... computation 5.7 sec * - * x.print("Time "); // > Time 5.7 sec. * - * // ^ intermediate timing without stopping the stopwatch * - * // ... more computations 1.7643 min * - * x.stop(); * - * x.print("Time needed ", Stopwatch::MINUTES, std::cerr); * - * // > Time needed 1,8593 min. (on cerr) * - * // other units and output streams possible * - ***********************************************************************/ -class Stopwatch -{ -public: - typedef size_t timestamp_t; - typedef size_t timeunit_t; - - enum - { - MICROSEC = static_cast< timeunit_t >( 1 ), - MILLISEC = MICROSEC * static_cast< timeunit_t >( 1000 ), - SECONDS = MILLISEC * static_cast< timeunit_t >( 1000 ), - MINUTES = SECONDS * static_cast< timeunit_t >( 60 ), - HOURS = MINUTES * static_cast< timeunit_t >( 60 ), - DAYS = HOURS * static_cast< timeunit_t >( 24 ) - }; - - static bool correct_timeunit( timeunit_t t ); - - /** - * Creates a stopwatch that is not running. - */ - Stopwatch() - { - reset(); - } - - /** - * Starts or resumes the stopwatch, if it is not running already. - */ - void start(); - - /** - * Stops the stopwatch, if it is not stopped already. - */ - void stop(); - - /** - * Returns, whether the stopwatch is running. - */ - bool isRunning() const; - - /** - * Returns the time elapsed between the start and stop of the - * stopwatch. If it is running, it returns the time from start - * until now. If the stopwatch is run previously, the previous - * runtime is added. If you want only the last measurement, you - * have to reset the timer, before stating the measurement. - * Does not change the running state. - */ - double elapsed( timeunit_t timeunit = SECONDS ) const; - - /** - * Returns the time elapsed between the start and stop of the - * stopwatch. If it is running, it returns the time from start - * until now. If the stopwatch is run previously, the previous - * runtime is added. If you want only the last measurement, you - * have to reset the timer, before stating the measurement. - * Does not change the running state. - * In contrast to Stopwatch::elapsed(), only the timestamp is returned, - * that is the number if microseconds as an integer. - */ - timestamp_t elapsed_timestamp() const; - - /** - * Resets the stopwatch. - */ - void reset(); - - /** - * This method prints out the currently elapsed time. - */ - void print( const char* msg = "", timeunit_t timeunit = SECONDS, std::ostream& os = std::cout ) const; - - /** - * Convenient method for writing time in seconds - * to some ostream. - */ - friend std::ostream& operator<<( std::ostream& os, const Stopwatch& stopwatch ); - -private: -#ifndef DISABLE_TIMING - timestamp_t _beg, _end; - size_t _prev_elapsed; - bool _running; -#endif - - /** - * Returns current time in microseconds since EPOCH. - */ - static timestamp_t get_timestamp(); -}; - -inline bool -Stopwatch::correct_timeunit( timeunit_t t ) -{ - return t == MICROSEC or t == MILLISEC or t == SECONDS or t == MINUTES or t == HOURS or t == DAYS; -} - -inline void -nest::Stopwatch::start() -{ -#ifndef DISABLE_TIMING - if ( not isRunning() ) - { - _prev_elapsed += _end - _beg; // store prev. time, if we resume - _end = _beg = get_timestamp(); // invariant: _end >= _beg - _running = true; // we start running - } -#endif -} - -inline void -nest::Stopwatch::stop() -{ -#ifndef DISABLE_TIMING - if ( isRunning() ) - { - _end = get_timestamp(); // invariant: _end >= _beg - _running = false; // we stopped running - } -#endif -} - -inline bool -nest::Stopwatch::isRunning() const -{ -#ifndef DISABLE_TIMING - return _running; -#else - return false; -#endif -} - -inline double -nest::Stopwatch::elapsed( timeunit_t timeunit ) const -{ -#ifndef DISABLE_TIMING - assert( correct_timeunit( timeunit ) ); - return 1.0 * elapsed_timestamp() / timeunit; -#else - return 0.0; -#endif -} - -inline nest::Stopwatch::timestamp_t -nest::Stopwatch::elapsed_timestamp() const -{ -#ifndef DISABLE_TIMING - if ( isRunning() ) - { - // get intermediate elapsed time; do not change _end, to be const - return get_timestamp() - _beg + _prev_elapsed; - } - else - { - // stopped before, get time of current measurement + last measurements - return _end - _beg + _prev_elapsed; - } -#else - return static_cast< timestamp_t >( 0 ); -#endif -} - -inline void -nest::Stopwatch::reset() -{ -#ifndef DISABLE_TIMING - _beg = 0; // invariant: _end >= _beg - _end = 0; - _prev_elapsed = 0; // erase all prev. measurements - _running = false; // of course not running. -#endif -} - -inline void -nest::Stopwatch::print( const char* msg, timeunit_t timeunit, std::ostream& os ) const -{ -#ifndef DISABLE_TIMING - assert( correct_timeunit( timeunit ) ); - double e = elapsed( timeunit ); - os << msg << e; - switch ( timeunit ) - { - case MICROSEC: - os << " microsec."; - break; - case MILLISEC: - os << " millisec."; - break; - case SECONDS: - os << " sec."; - break; - case MINUTES: - os << " min."; - break; - case HOURS: - os << " h."; - break; - case DAYS: - os << " days."; - break; - } -#ifdef DEBUG - os << " (running: " << ( _running ? "true" : "false" ) << ", begin: " << _beg << ", end: " << _end - << ", diff: " << ( _end - _beg ) << ", prev: " << _prev_elapsed << ")"; -#endif - os << std::endl; -#endif -} - -inline nest::Stopwatch::timestamp_t -nest::Stopwatch::get_timestamp() -{ - // works with: - // * hambach (Linux 2.6.32 x86_64) - // * JuQueen (BG/Q) - // * MacOS 10.9 - struct timeval now; - gettimeofday( &now, static_cast< struct timezone* >( nullptr ) ); - return ( nest::Stopwatch::timestamp_t ) now.tv_usec - + ( nest::Stopwatch::timestamp_t ) now.tv_sec * nest::Stopwatch::SECONDS; -} - -} /* namespace timer */ -#endif /* STOPWATCH_H */ diff --git a/nestkernel/CMakeLists.txt b/nestkernel/CMakeLists.txt index 685fb741aa..264f0deb5b 100644 --- a/nestkernel/CMakeLists.txt +++ b/nestkernel/CMakeLists.txt @@ -116,6 +116,7 @@ set ( nestkernel_sources stimulation_backend.h buffer_resize_log.h buffer_resize_log.cpp nest_extension_interface.h + stopwatch.h stopwatch_impl.h ) diff --git a/nestkernel/connection_manager.cpp b/nestkernel/connection_manager.cpp index 1d47802ff9..648d6c01ea 100644 --- a/nestkernel/connection_manager.cpp +++ b/nestkernel/connection_manager.cpp @@ -55,6 +55,7 @@ #include "nest_names.h" #include "node.h" #include "sonata_connector.h" +#include "stopwatch_impl.h" #include "target_table_devices_impl.h" #include "vp_manager_impl.h" @@ -226,7 +227,7 @@ nest::ConnectionManager::get_status( DictionaryDatum& dict ) def< bool >( dict, names::keep_source_table, keep_source_table_ ); def< bool >( dict, names::use_compressed_spikes, use_compressed_spikes_ ); - def< double >( dict, names::time_construction_connect, sw_construction_connect.elapsed() ); + sw_construction_connect.output_timer( dict, names::time_construction_connect, names::time_construction_connect_cpu ); ArrayDatum connection_rules; for ( auto const& element : *connruledict_ ) @@ -1800,7 +1801,9 @@ nest::ConnectionManager::collect_compressed_spike_data( const size_t tid ) } // of omp single; implicit barrier source_table_.collect_compressible_sources( tid ); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp single { source_table_.fill_compressed_spike_data( compressed_spike_data_ ); diff --git a/nestkernel/connection_manager.h b/nestkernel/connection_manager.h index e6c3888dff..b1fbf65f1b 100644 --- a/nestkernel/connection_manager.h +++ b/nestkernel/connection_manager.h @@ -461,7 +461,7 @@ class ConnectionManager : public ManagerInterface // public stop watch for benchmarking purposes // start and stop in high-level connect functions in nestmodule.cpp and nest.cpp - Stopwatch sw_construction_connect; + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > sw_construction_connect; const std::vector< SpikeData >& get_compressed_spike_data( const synindex syn_id, const size_t idx ); diff --git a/nestkernel/event_delivery_manager.cpp b/nestkernel/event_delivery_manager.cpp index dbb98504d5..ede6812272 100644 --- a/nestkernel/event_delivery_manager.cpp +++ b/nestkernel/event_delivery_manager.cpp @@ -34,14 +34,13 @@ #include "mpi_manager_impl.h" #include "send_buffer_position.h" #include "source.h" +#include "stopwatch_impl.h" #include "vp_manager.h" #include "vp_manager_impl.h" // Includes from sli: #include "dictutils.h" -#include "compose.hpp" - namespace nest { @@ -191,11 +190,11 @@ EventDeliveryManager::get_status( DictionaryDatum& dict ) ( *dict )[ names::spike_buffer_resize_log ] = log_events; send_recv_buffer_resize_log_.to_dict( log_events ); -#ifdef TIMER_DETAILED - def< double >( dict, names::time_collocate_spike_data, sw_collocate_spike_data_.elapsed() ); - def< double >( dict, names::time_communicate_spike_data, sw_communicate_spike_data_.elapsed() ); - def< double >( dict, names::time_communicate_target_data, sw_communicate_target_data_.elapsed() ); -#endif + sw_collocate_spike_data_.output_timer( dict, names::time_collocate_spike_data, names::time_collocate_spike_data_cpu ); + sw_communicate_spike_data_.output_timer( + dict, names::time_communicate_spike_data, names::time_communicate_spike_data_cpu ); + sw_communicate_target_data_.output_timer( + dict, names::time_communicate_target_data, names::time_communicate_target_data_cpu ); } void @@ -317,18 +316,14 @@ EventDeliveryManager::reset_counters() void EventDeliveryManager::reset_timers_for_preparation() { -#ifdef TIMER_DETAILED sw_communicate_target_data_.reset(); -#endif } void EventDeliveryManager::reset_timers_for_dynamics() { -#ifdef TIMER_DETAILED sw_collocate_spike_data_.reset(); sw_communicate_spike_data_.reset(); -#endif } void @@ -400,11 +395,7 @@ EventDeliveryManager::gather_spike_data_( std::vector< SpikeDataT >& send_buffer // Need to get new positions in case buffer size has changed SendBufferPosition send_buffer_position; -#ifdef TIMER_DETAILED - { - sw_collocate_spike_data_.start(); - } -#endif + sw_collocate_spike_data_.start(); // Set marker at end of each chunk to DEFAULT reset_complete_marker_spike_data_( send_buffer_position, send_buffer ); @@ -426,11 +417,12 @@ EventDeliveryManager::gather_spike_data_( std::vector< SpikeDataT >& send_buffer // as all chunk-end entries, have marker DEFAULT. set_end_marker_( send_buffer_position, send_buffer, local_max_spikes_per_rank ); -#ifdef TIMER_DETAILED - { - sw_collocate_spike_data_.stop(); - sw_communicate_spike_data_.start(); - } + sw_collocate_spike_data_.stop(); + sw_communicate_spike_data_.start(); +#ifdef MPI_SYNC_TIMER + kernel().get_mpi_synchronization_stopwatch().start(); + kernel().mpi_manager.synchronize(); + kernel().get_mpi_synchronization_stopwatch().stop(); #endif // Given that we templatize by plain vs offgrid, this if should not be necessary, but ... @@ -443,11 +435,7 @@ EventDeliveryManager::gather_spike_data_( std::vector< SpikeDataT >& send_buffer kernel().mpi_manager.communicate_spike_data_Alltoall( send_buffer, recv_buffer ); } -#ifdef TIMER_DETAILED - { - sw_communicate_spike_data_.stop(); - } -#endif + sw_communicate_spike_data_.stop(); global_max_spikes_per_rank_ = get_global_max_spikes_per_rank_( send_buffer_position, recv_buffer ); @@ -811,7 +799,9 @@ EventDeliveryManager::gather_target_data( const size_t tid ) resize_send_recv_buffers_target_data(); } } // of omp master; (no barrier) + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_construction_stopwatch().stop(); kernel().connection_manager.restore_source_table_entry_point( tid ); @@ -826,18 +816,16 @@ EventDeliveryManager::gather_target_data( const size_t tid ) set_complete_marker_target_data_( assigned_ranks, send_buffer_position ); } kernel().connection_manager.save_source_table_entry_point( tid ); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_construction_stopwatch().stop(); kernel().connection_manager.clean_source_table( tid ); #pragma omp master { -#ifdef TIMER_DETAILED sw_communicate_target_data_.start(); -#endif kernel().mpi_manager.communicate_target_data_Alltoall( send_buffer_target_data_, recv_buffer_target_data_ ); -#ifdef TIMER_DETAILED sw_communicate_target_data_.stop(); -#endif } // of omp master (no barriers!) #pragma omp barrier @@ -883,7 +871,9 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) resize_send_recv_buffers_target_data(); } } // of omp master; no barrier + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_construction_stopwatch().stop(); TargetSendBufferPosition send_buffer_position( assigned_ranks, kernel().mpi_manager.get_send_recv_count_target_data_per_rank() ); @@ -898,17 +888,15 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) set_complete_marker_target_data_( assigned_ranks, send_buffer_position ); } + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp master { -#ifdef TIMER_DETAILED sw_communicate_target_data_.start(); -#endif kernel().mpi_manager.communicate_target_data_Alltoall( send_buffer_target_data_, recv_buffer_target_data_ ); -#ifdef TIMER_DETAILED sw_communicate_target_data_.stop(); -#endif } // of omp master (no barrier) #pragma omp barrier @@ -925,7 +913,9 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) { buffer_size_target_data_has_changed_ = kernel().mpi_manager.increase_buffer_size_target_data(); } // of omp master (no barrier) + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_construction_stopwatch().stop(); } } // of while diff --git a/nestkernel/event_delivery_manager.h b/nestkernel/event_delivery_manager.h index 4f49645077..59e3681646 100644 --- a/nestkernel/event_delivery_manager.h +++ b/nestkernel/event_delivery_manager.h @@ -466,13 +466,11 @@ class EventDeliveryManager : public ManagerInterface PerThreadBoolIndicator gather_completed_checker_; -#ifdef TIMER_DETAILED // private stop watches for benchmarking purposes // (intended for internal core developers, not for use in the public API) - Stopwatch sw_collocate_spike_data_; - Stopwatch sw_communicate_spike_data_; - Stopwatch sw_communicate_target_data_; -#endif + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_collocate_spike_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_communicate_spike_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_communicate_target_data_; }; inline void diff --git a/nestkernel/kernel_manager.cpp b/nestkernel/kernel_manager.cpp index 2e407b249a..cb37d4175b 100644 --- a/nestkernel/kernel_manager.cpp +++ b/nestkernel/kernel_manager.cpp @@ -22,6 +22,8 @@ #include "kernel_manager.h" +#include "stopwatch_impl.h" + nest::KernelManager* nest::KernelManager::kernel_manager_instance_ = nullptr; void @@ -91,6 +93,10 @@ nest::KernelManager::initialize() manager->initialize( /* adjust_number_of_threads_or_rng_only */ false ); } + sw_omp_synchronization_construction_.reset(); + sw_omp_synchronization_simulation_.reset(); + sw_mpi_synchronization_.reset(); + ++fingerprint_; initialized_ = true; FULL_LOGGING_ONLY( dump_.open( @@ -104,6 +110,9 @@ nest::KernelManager::prepare() { manager->prepare(); } + + sw_omp_synchronization_simulation_.reset(); + sw_mpi_synchronization_.reset(); } void @@ -162,6 +171,16 @@ nest::KernelManager::change_number_of_threads( size_t new_num_threads ) // is in place again, we can tell modules to re-register the components // they provide. module_manager.reinitialize_dynamic_modules(); + + // Prepare timers and set the number of threads for multi-threaded timers + kernel().simulation_manager.reset_timers_for_preparation(); + kernel().simulation_manager.reset_timers_for_dynamics(); + kernel().event_delivery_manager.reset_timers_for_preparation(); + kernel().event_delivery_manager.reset_timers_for_dynamics(); + + sw_omp_synchronization_construction_.reset(); + sw_omp_synchronization_simulation_.reset(); + sw_mpi_synchronization_.reset(); } void @@ -184,6 +203,12 @@ nest::KernelManager::get_status( DictionaryDatum& dict ) { manager->get_status( dict ); } + + sw_omp_synchronization_construction_.output_timer( + dict, names::time_omp_synchronization_construction, names::time_omp_synchronization_construction_cpu ); + sw_omp_synchronization_simulation_.output_timer( + dict, names::time_omp_synchronization_simulation, names::time_omp_synchronization_simulation_cpu ); + sw_mpi_synchronization_.output_timer( dict, names::time_mpi_synchronization, names::time_mpi_synchronization_cpu ); } void diff --git a/nestkernel/kernel_manager.h b/nestkernel/kernel_manager.h index 216981b2b7..a055ba0e8b 100644 --- a/nestkernel/kernel_manager.h +++ b/nestkernel/kernel_manager.h @@ -287,12 +287,37 @@ class KernelManager MUSICManager music_manager; NodeManager node_manager; /**@}*/ + + //! Get the stopwatch to measure the time each thread is idle during network construction. + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded >& + get_omp_synchronization_construction_stopwatch() + { + return sw_omp_synchronization_construction_; + } + + //! Get the stopwatch to measure the time each thread is idle during simulation. + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded >& + get_omp_synchronization_simulation_stopwatch() + { + return sw_omp_synchronization_simulation_; + } + + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly >& + get_mpi_synchronization_stopwatch() + { + return sw_mpi_synchronization_; + } + private: //! All managers, order determines initialization and finalization order (latter backwards) std::vector< ManagerInterface* > managers; bool initialized_; //!< true if the kernel is initialized std::ofstream dump_; //!< for FULL_LOGGING output + + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_omp_synchronization_construction_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_omp_synchronization_simulation_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_mpi_synchronization_; }; KernelManager& kernel(); diff --git a/nestkernel/mpi_manager.cpp b/nestkernel/mpi_manager.cpp index b7a348322e..02bde93a52 100644 --- a/nestkernel/mpi_manager.cpp +++ b/nestkernel/mpi_manager.cpp @@ -26,7 +26,7 @@ #include // Includes from libnestutil: -#include "stopwatch.h" +#include "stopwatch_impl.h" // Includes from nestkernel: #include "kernel_manager.h" @@ -834,16 +834,16 @@ nest::MPIManager::time_communicate( int num_bytes, int samples ) std::vector< unsigned int > test_send_buffer( packet_length ); std::vector< unsigned int > test_recv_buffer( packet_length * get_num_processes() ); // start time measurement here - Stopwatch foo; - foo.start(); + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > stopwatch; + stopwatch.start(); for ( int i = 0; i < samples; ++i ) { MPI_Allgather( &test_send_buffer[ 0 ], packet_length, MPI_UNSIGNED, &test_recv_buffer[ 0 ], packet_length, MPI_UNSIGNED, comm ); } // finish time measurement here - foo.stop(); - return foo.elapsed() / samples; + stopwatch.stop(); + return stopwatch.elapsed() / samples; } // average communication time for a packet size of num_bytes using Allgatherv @@ -870,16 +870,16 @@ nest::MPIManager::time_communicatev( int num_bytes, int samples ) } // start time measurement here - Stopwatch foo; - foo.start(); + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > stopwatch; + stopwatch.start(); for ( int i = 0; i < samples; ++i ) { communicate_Allgatherv( test_send_buffer, test_recv_buffer, displacements, n_nodes ); } // finish time measurement here - foo.stop(); - return foo.elapsed() / samples; + stopwatch.stop(); + return stopwatch.elapsed() / samples; } // average communication time for a packet size of num_bytes @@ -898,8 +898,8 @@ nest::MPIManager::time_communicate_offgrid( int num_bytes, int samples ) std::vector< OffGridSpike > test_send_buffer( packet_length ); std::vector< OffGridSpike > test_recv_buffer( packet_length * get_num_processes() ); // start time measurement here - Stopwatch foo; - foo.start(); + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > stopwatch; + stopwatch.start(); for ( int i = 0; i < samples; ++i ) { MPI_Allgather( &test_send_buffer[ 0 ], @@ -911,8 +911,8 @@ nest::MPIManager::time_communicate_offgrid( int num_bytes, int samples ) comm ); } // finish time measurement here - foo.stop(); - return foo.elapsed() / samples; + stopwatch.stop(); + return stopwatch.elapsed() / samples; } // average communication time for a packet size of num_bytes using Alltoall @@ -932,16 +932,16 @@ nest::MPIManager::time_communicate_alltoall( int num_bytes, int samples ) std::vector< unsigned int > test_send_buffer( total_packet_length ); std::vector< unsigned int > test_recv_buffer( total_packet_length ); // start time measurement here - Stopwatch foo; - foo.start(); + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > stopwatch; + stopwatch.start(); for ( int i = 0; i < samples; ++i ) { MPI_Alltoall( &test_send_buffer[ 0 ], packet_length, MPI_UNSIGNED, &test_recv_buffer[ 0 ], packet_length, MPI_UNSIGNED, comm ); } // finish time measurement here - foo.stop(); - return foo.elapsed() / samples; + stopwatch.stop(); + return stopwatch.elapsed() / samples; } // average communication time for a packet size of num_bytes using Alltoallv @@ -969,8 +969,8 @@ nest::MPIManager::time_communicate_alltoallv( int num_bytes, int samples ) } // start time measurement here - Stopwatch foo; - foo.start(); + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > stopwatch; + stopwatch.start(); for ( int i = 0; i < samples; ++i ) { MPI_Alltoallv( &test_send_buffer[ 0 ], @@ -984,8 +984,8 @@ nest::MPIManager::time_communicate_alltoallv( int num_bytes, int samples ) comm ); } // finish time measurement here - foo.stop(); - return foo.elapsed() / samples; + stopwatch.stop(); + return stopwatch.elapsed() / samples; } #else /* #ifdef HAVE_MPI */ diff --git a/nestkernel/nest_names.cpp b/nestkernel/nest_names.cpp index c897a9e6cf..f3992392f1 100644 --- a/nestkernel/nest_names.cpp +++ b/nestkernel/nest_names.cpp @@ -596,19 +596,38 @@ const Name threshold_voltage( "threshold_voltage" ); const Name tics_per_ms( "tics_per_ms" ); const Name tics_per_step( "tics_per_step" ); const Name time_collocate_spike_data( "time_collocate_spike_data" ); +const Name time_collocate_spike_data_cpu( "time_collocate_spike_data_cpu" ); const Name time_communicate_prepare( "time_communicate_prepare" ); +const Name time_communicate_prepare_cpu( "time_communicate_prepare_cpu" ); const Name time_communicate_spike_data( "time_communicate_spike_data" ); +const Name time_communicate_spike_data_cpu( "time_communicate_spike_data_cpu" ); const Name time_communicate_target_data( "time_communicate_target_data" ); +const Name time_communicate_target_data_cpu( "time_communicate_target_data_cpu" ); const Name time_construction_connect( "time_construction_connect" ); +const Name time_construction_connect_cpu( "time_construction_connect_cpu" ); const Name time_construction_create( "time_construction_create" ); +const Name time_construction_create_cpu( "time_construction_create_cpu" ); const Name time_deliver_secondary_data( "time_deliver_secondary_data" ); +const Name time_deliver_secondary_data_cpu( "time_deliver_secondary_data_cpu" ); const Name time_deliver_spike_data( "time_deliver_spike_data" ); +const Name time_deliver_spike_data_cpu( "time_deliver_spike_data_cpu" ); const Name time_gather_secondary_data( "time_gather_secondary_data" ); +const Name time_gather_secondary_data_cpu( "time_gather_secondary_data_cpu" ); const Name time_gather_spike_data( "time_gather_spike_data" ); +const Name time_gather_spike_data_cpu( "time_gather_spike_data_cpu" ); const Name time_gather_target_data( "time_gather_target_data" ); +const Name time_gather_target_data_cpu( "time_gather_target_data_cpu" ); +const Name time_omp_synchronization_construction( "time_omp_synchronization_construction" ); +const Name time_omp_synchronization_construction_cpu( "time_omp_synchronization_construction_cpu" ); +const Name time_omp_synchronization_simulation( "time_omp_synchronization_simulation" ); +const Name time_omp_synchronization_simulation_cpu( "time_omp_synchronization_simulation_cpu" ); +const Name time_mpi_synchronization( "time_mpi_synchronization" ); +const Name time_mpi_synchronization_cpu( "time_mpi_synchronization_cpu" ); const Name time_in_steps( "time_in_steps" ); const Name time_simulate( "time_simulate" ); +const Name time_simulate_cpu( "time_simulate_cpu" ); const Name time_update( "time_update" ); +const Name time_update_cpu( "time_update_cpu" ); const Name times( "times" ); const Name to_do( "to_do" ); const Name total_num_virtual_procs( "total_num_virtual_procs" ); diff --git a/nestkernel/nest_names.h b/nestkernel/nest_names.h index b6d964e04c..759b3b32b4 100644 --- a/nestkernel/nest_names.h +++ b/nestkernel/nest_names.h @@ -624,19 +624,38 @@ extern const Name threshold_voltage; extern const Name tics_per_ms; extern const Name tics_per_step; extern const Name time_collocate_spike_data; +extern const Name time_collocate_spike_data_cpu; extern const Name time_communicate_prepare; +extern const Name time_communicate_prepare_cpu; extern const Name time_communicate_spike_data; +extern const Name time_communicate_spike_data_cpu; extern const Name time_communicate_target_data; +extern const Name time_communicate_target_data_cpu; extern const Name time_construction_connect; +extern const Name time_construction_connect_cpu; extern const Name time_construction_create; +extern const Name time_construction_create_cpu; extern const Name time_deliver_secondary_data; +extern const Name time_deliver_secondary_data_cpu; extern const Name time_deliver_spike_data; +extern const Name time_deliver_spike_data_cpu; extern const Name time_gather_secondary_data; +extern const Name time_gather_secondary_data_cpu; extern const Name time_gather_spike_data; +extern const Name time_gather_spike_data_cpu; extern const Name time_gather_target_data; +extern const Name time_gather_target_data_cpu; +extern const Name time_omp_synchronization_construction; +extern const Name time_omp_synchronization_construction_cpu; +extern const Name time_omp_synchronization_simulation; +extern const Name time_omp_synchronization_simulation_cpu; +extern const Name time_mpi_synchronization; +extern const Name time_mpi_synchronization_cpu; extern const Name time_in_steps; extern const Name time_simulate; +extern const Name time_simulate_cpu; extern const Name time_update; +extern const Name time_update_cpu; extern const Name times; extern const Name to_do; extern const Name total_num_virtual_procs; diff --git a/nestkernel/node_manager.cpp b/nestkernel/node_manager.cpp index 5e984ca7fa..632b750687 100644 --- a/nestkernel/node_manager.cpp +++ b/nestkernel/node_manager.cpp @@ -31,8 +31,6 @@ #include "logging.h" // Includes from nestkernel: -#include "event_delivery_manager.h" -#include "genericmodel.h" #include "kernel_manager.h" #include "model.h" #include "model_manager_impl.h" @@ -788,7 +786,7 @@ void NodeManager::get_status( DictionaryDatum& d ) { def< long >( d, names::network_size, size() ); - def< double >( d, names::time_construction_create, sw_construction_create_.elapsed() ); + sw_construction_create_.output_timer( d, names::time_construction_create, names::time_construction_create_cpu ); } void diff --git a/nestkernel/node_manager.h b/nestkernel/node_manager.h index 86044911b3..f5dd76093b 100644 --- a/nestkernel/node_manager.h +++ b/nestkernel/node_manager.h @@ -356,7 +356,7 @@ class NodeManager : public ManagerInterface std::vector< std::shared_ptr< WrappedThreadException > > exceptions_raised_; // private stop watch for benchmarking purposes - Stopwatch sw_construction_create_; + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > sw_construction_create_; }; inline size_t diff --git a/nestkernel/per_thread_bool_indicator.cpp b/nestkernel/per_thread_bool_indicator.cpp index 267f753298..4032a7320f 100644 --- a/nestkernel/per_thread_bool_indicator.cpp +++ b/nestkernel/per_thread_bool_indicator.cpp @@ -24,6 +24,7 @@ // Includes from nestkernel #include "kernel_manager.h" +#include "stopwatch_impl.h" namespace nest { @@ -64,6 +65,7 @@ PerThreadBoolIndicator::initialize( const size_t num_threads, const bool status bool PerThreadBoolIndicator::all_false() const { + kernel().get_omp_synchronization_construction_stopwatch().start(); // We need two barriers here to ensure that no thread can continue and change the result // before all threads have determined the result. #pragma omp barrier @@ -71,33 +73,43 @@ PerThreadBoolIndicator::all_false() const // before all threads have determined the result. bool ret = ( are_true_ == 0 ); #pragma omp barrier + + kernel().get_omp_synchronization_construction_stopwatch().stop(); return ret; } bool PerThreadBoolIndicator::all_true() const { + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier bool ret = ( are_true_ == size_ ); #pragma omp barrier + kernel().get_omp_synchronization_construction_stopwatch().stop(); return ret; } bool PerThreadBoolIndicator::any_false() const { + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier bool ret = ( are_true_ < size_ ); #pragma omp barrier + + kernel().get_omp_synchronization_construction_stopwatch().stop(); return ret; } bool PerThreadBoolIndicator::any_true() const { + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier bool ret = ( are_true_ > 0 ); #pragma omp barrier + + kernel().get_omp_synchronization_construction_stopwatch().stop(); return ret; } diff --git a/nestkernel/simulation_manager.cpp b/nestkernel/simulation_manager.cpp index 9e0da05602..acd980d8da 100644 --- a/nestkernel/simulation_manager.cpp +++ b/nestkernel/simulation_manager.cpp @@ -37,6 +37,7 @@ #include "connection_manager_impl.h" #include "event_delivery_manager.h" #include "kernel_manager.h" +#include "stopwatch_impl.h" // Includes from sli: #include "dictutils.h" @@ -117,22 +118,18 @@ void nest::SimulationManager::reset_timers_for_preparation() { sw_communicate_prepare_.reset(); -#ifdef TIMER_DETAILED sw_gather_target_data_.reset(); -#endif } void nest::SimulationManager::reset_timers_for_dynamics() { sw_simulate_.reset(); -#ifdef TIMER_DETAILED sw_gather_spike_data_.reset(); sw_gather_secondary_data_.reset(); sw_update_.reset(); sw_deliver_spike_data_.reset(); sw_deliver_secondary_data_.reset(); -#endif } void @@ -477,16 +474,15 @@ nest::SimulationManager::get_status( DictionaryDatum& d ) def< double >( d, names::min_update_time, min_update_time_ ); def< double >( d, names::max_update_time, max_update_time_ ); - def< double >( d, names::time_simulate, sw_simulate_.elapsed() ); - def< double >( d, names::time_communicate_prepare, sw_communicate_prepare_.elapsed() ); -#ifdef TIMER_DETAILED - def< double >( d, names::time_gather_spike_data, sw_gather_spike_data_.elapsed() ); - def< double >( d, names::time_gather_secondary_data, sw_gather_secondary_data_.elapsed() ); - def< double >( d, names::time_update, sw_update_.elapsed() ); - def< double >( d, names::time_gather_target_data, sw_gather_target_data_.elapsed() ); - def< double >( d, names::time_deliver_spike_data, sw_deliver_spike_data_.elapsed() ); - def< double >( d, names::time_deliver_secondary_data, sw_deliver_secondary_data_.elapsed() ); -#endif + sw_simulate_.output_timer( d, names::time_simulate, names::time_simulate_cpu ); + sw_communicate_prepare_.output_timer( d, names::time_communicate_prepare, names::time_communicate_prepare_cpu ); + sw_gather_spike_data_.output_timer( d, names::time_gather_spike_data, names::time_gather_spike_data_cpu ); + sw_gather_secondary_data_.output_timer( d, names::time_gather_secondary_data, names::time_gather_secondary_data_cpu ); + sw_update_.output_timer( d, names::time_update, names::time_update_cpu ); + sw_gather_target_data_.output_timer( d, names::time_gather_target_data, names::time_gather_target_data_cpu ); + sw_deliver_spike_data_.output_timer( d, names::time_deliver_spike_data, names::time_deliver_spike_data_cpu ); + sw_deliver_secondary_data_.output_timer( + d, names::time_deliver_secondary_data, names::time_deliver_secondary_data_cpu ); def< double >( d, names::eprop_update_interval, eprop_update_interval_ ); def< double >( d, names::eprop_learning_window, eprop_learning_window_ ); def< bool >( d, names::eprop_reset_neurons_on_update, eprop_reset_neurons_on_update_ ); @@ -738,17 +734,21 @@ nest::SimulationManager::call_update_() void nest::SimulationManager::update_connection_infrastructure( const size_t tid ) { + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - if ( tid == 0 ) - { - sw_communicate_prepare_.start(); - } + kernel().get_omp_synchronization_construction_stopwatch().stop(); + + sw_communicate_prepare_.start(); - kernel().connection_manager.restructure_connection_tables( tid ); kernel().connection_manager.sort_connections( tid ); + sw_gather_target_data_.start(); + kernel().connection_manager.restructure_connection_tables( tid ); kernel().connection_manager.collect_compressed_spike_data( tid ); + sw_gather_target_data_.stop(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier // wait for all threads to finish sorting + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp single { @@ -763,9 +763,16 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) if ( kernel().connection_manager.secondary_connections_exist() ) { + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_construction_stopwatch().stop(); + kernel().connection_manager.compute_compressed_secondary_recv_buffer_positions( tid ); + + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_construction_stopwatch().stop(); + #pragma omp single { kernel().mpi_manager.communicate_recv_counts_secondary_events(); @@ -773,12 +780,7 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) } } -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_gather_target_data_.start(); - } -#endif + sw_gather_target_data_.start(); // communicate connection information from postsynaptic to // presynaptic side @@ -796,32 +798,23 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) kernel().event_delivery_manager.gather_target_data( tid ); } -#ifdef TIMER_DETAILED -#pragma omp barrier - if ( tid == 0 ) - { - sw_gather_target_data_.stop(); - } -#endif + sw_gather_target_data_.stop(); if ( kernel().connection_manager.secondary_connections_exist() ) { kernel().connection_manager.compress_secondary_send_buffer_pos( tid ); } + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp single { kernel().connection_manager.clear_compressed_spike_data_map(); kernel().node_manager.set_have_nodes_changed( false ); kernel().connection_manager.unset_connections_have_changed(); } - -#pragma omp barrier - if ( tid == 0 ) - { - sw_communicate_prepare_.stop(); - } + sw_communicate_prepare_.stop(); } bool @@ -871,48 +864,30 @@ nest::SimulationManager::update_() // from the time step preceding the arrival of the spike triggering the weight update. if ( kernel().connection_manager.secondary_connections_exist() ) { -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_deliver_secondary_data_.start(); - } -#endif + sw_deliver_secondary_data_.start(); kernel().event_delivery_manager.deliver_secondary_events( tid, false ); -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_deliver_secondary_data_.stop(); - } -#endif + sw_deliver_secondary_data_.stop(); } if ( kernel().connection_manager.has_primary_connections() ) { -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_deliver_spike_data_.start(); - } -#endif + sw_deliver_spike_data_.start(); // Deliver spikes from receive buffer to ring buffers. kernel().event_delivery_manager.deliver_events( tid ); -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_deliver_spike_data_.stop(); - } -#endif + sw_deliver_spike_data_.stop(); } #ifdef HAVE_MUSIC -// advance the time of music by one step (min_delay * h) must -// be done after deliver_events_() since it calls -// music_event_out_proxy::handle(), which hands the spikes over to -// MUSIC *before* MUSIC time is advanced + // advance the time of music by one step (min_delay * h) must + // be done after deliver_events_() since it calls + // music_event_out_proxy::handle(), which hands the spikes over to + // MUSIC *before* MUSIC time is advanced -// wait until all threads are done -> synchronize + // wait until all threads are done -> synchronize + kernel().get_omp_synchronization_simulation_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_simulation_stopwatch().stop(); // the following block is executed by the master thread only // the other threads are enforced to wait at the end of the block #pragma omp master @@ -971,9 +946,13 @@ nest::SimulationManager::update_() // add done value of thread p to done vector #pragma omp critical - done.push_back( done_p ); -// parallel section ends, wait until all threads are done -> synchronize + { + done.push_back( done_p ); + } + // parallel section ends, wait until all threads are done -> synchronize + kernel().get_omp_synchronization_simulation_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_simulation_stopwatch().stop(); // the following block is executed by a single thread // the other threads wait at the end of the block @@ -1033,7 +1012,9 @@ nest::SimulationManager::update_() Node* node = i->get_node(); node->update_synaptic_elements( Time( Time::step( clock_.get_steps() + from_step_ ) ).get_ms() ); } + kernel().get_omp_synchronization_simulation_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_simulation_stopwatch().stop(); #pragma omp single { kernel().sp_manager.update_structural_plasticity(); @@ -1055,14 +1036,7 @@ nest::SimulationManager::update_() } // of structural plasticity - -#ifdef TIMER_DETAILED -#pragma omp barrier - if ( tid == 0 ) - { - sw_update_.start(); - } -#endif + sw_update_.start(); const SparseNodeArray& thread_local_nodes = kernel().node_manager.get_local_nodes( tid ); for ( SparseNodeArray::const_iterator n = thread_local_nodes.begin(); n != thread_local_nodes.end(); ++n ) @@ -1074,18 +1048,15 @@ nest::SimulationManager::update_() } } -// parallel section ends, wait until all threads are done -> synchronize -#pragma omp barrier + sw_update_.stop(); -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_update_.stop(); - } -#endif + // parallel section ends, wait until all threads are done -> synchronize + kernel().get_omp_synchronization_simulation_stopwatch().start(); +#pragma omp barrier + kernel().get_omp_synchronization_simulation_stopwatch().stop(); // the following block is executed by the master thread only -// the other threads are enforced to wait at the end of the block + // the other threads are enforced to wait at the end of the block #pragma omp master { // gather and deliver only at end of slice, i.e., end of min_delay step @@ -1093,24 +1064,15 @@ nest::SimulationManager::update_() { if ( kernel().connection_manager.has_primary_connections() ) { -#ifdef TIMER_DETAILED sw_gather_spike_data_.start(); -#endif - kernel().event_delivery_manager.gather_spike_data(); -#ifdef TIMER_DETAILED sw_gather_spike_data_.stop(); -#endif } if ( kernel().connection_manager.secondary_connections_exist() ) { -#ifdef TIMER_DETAILED sw_gather_secondary_data_.start(); -#endif kernel().event_delivery_manager.gather_secondary_events( true ); -#ifdef TIMER_DETAILED sw_gather_secondary_data_.stop(); -#endif } } @@ -1136,8 +1098,10 @@ nest::SimulationManager::update_() // if block to avoid omp barrier if SIONLIB is not used #ifdef HAVE_SIONLIB kernel().io_manager.post_step_hook(); -// enforce synchronization after post-step activities of the recording backends + // enforce synchronization after post-step activities of the recording backends + kernel().get_omp_synchronization_simulation_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_simulation_stopwatch().stop(); #endif const double end_current_update = sw_simulate_.elapsed(); diff --git a/nestkernel/simulation_manager.h b/nestkernel/simulation_manager.h index 83a37cac31..f446aa9e86 100644 --- a/nestkernel/simulation_manager.h +++ b/nestkernel/simulation_manager.h @@ -228,17 +228,15 @@ class SimulationManager : public ManagerInterface double max_update_time_; //!< longest update time seen so far (seconds) // private stop watches for benchmarking purposes - Stopwatch sw_simulate_; - Stopwatch sw_communicate_prepare_; -#ifdef TIMER_DETAILED + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > sw_simulate_; + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::Threaded > sw_communicate_prepare_; // intended for internal core developers, not for use in the public API - Stopwatch sw_gather_spike_data_; - Stopwatch sw_gather_secondary_data_; - Stopwatch sw_update_; - Stopwatch sw_gather_target_data_; - Stopwatch sw_deliver_spike_data_; - Stopwatch sw_deliver_secondary_data_; -#endif + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_gather_spike_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_gather_secondary_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_update_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_gather_target_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_deliver_spike_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_deliver_secondary_data_; double eprop_update_interval_; double eprop_learning_window_; diff --git a/nestkernel/source_table.cpp b/nestkernel/source_table.cpp index b5e7c49c73..d9fcd7af1c 100644 --- a/nestkernel/source_table.cpp +++ b/nestkernel/source_table.cpp @@ -29,6 +29,7 @@ #include "kernel_manager.h" #include "mpi_manager_impl.h" #include "source_table.h" +#include "stopwatch_impl.h" #include "vp_manager_impl.h" nest::SourceTable::SourceTable() @@ -229,7 +230,9 @@ nest::SourceTable::compute_buffer_pos_for_unique_secondary_sources( const size_t } } } + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp single { diff --git a/nestkernel/stimulation_backend_mpi.cpp b/nestkernel/stimulation_backend_mpi.cpp index 189207f08f..8f908d24ce 100644 --- a/nestkernel/stimulation_backend_mpi.cpp +++ b/nestkernel/stimulation_backend_mpi.cpp @@ -159,7 +159,7 @@ nest::StimulationBackendMPI::prepare() } // Add the id of device of the other thread in the vector_id_device and update the count of all device - for ( int id_thread = 0; id_thread < kernel().vp_manager.get_num_threads(); id_thread++ ) + for ( size_t id_thread = 0; id_thread < kernel().vp_manager.get_num_threads(); id_thread++ ) { // don't do it again for the master thread if ( id_thread != thread_id_master ) diff --git a/nestkernel/stopwatch.h b/nestkernel/stopwatch.h new file mode 100644 index 0000000000..786a7d6ceb --- /dev/null +++ b/nestkernel/stopwatch.h @@ -0,0 +1,505 @@ +/* + * stopwatch.h + * + * This file is part of NEST. + * + * Copyright (C) 2004 The NEST Initiative + * + * NEST is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * NEST is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with NEST. If not, see . + * + */ + +#ifndef STOPWATCH_H +#define STOPWATCH_H + +// C includes: +#include + +// C++ includes: +#include "arraydatum.h" +#include "dictdatum.h" +#include "dictutils.h" +#include +#include +#include +#include +#include + +namespace nest +{ + +// TODO JV: Set this variable via cmake instead +#ifdef TIMER_DETAILED +constexpr bool use_detailed_timers = true; +#else +constexpr bool use_detailed_timers = false; +#endif +#ifdef THREADED_TIMERS +constexpr bool use_threaded_timers = true; +#else +constexpr bool use_threaded_timers = false; +#endif + +/******************************************************************************** + * Stopwatch * + * Accumulates time between start and stop, and provides the elapsed time * + * with different time units. Either runs multi-threaded or only on master. * + * * + * Usage example: * + * Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > x; * + * x.start(); * + * // ... do computations for 15.34 sec * + * x.stop(); // only pauses stopwatch * + * x.print("Time needed "); // > Time needed 15.34 sec. * + * x.start(); // resumes stopwatch * + * // ... next computations for 11.22 sec * + * x.stop(); * + * x.print("Time needed "); // > Time needed 26,56 sec. * + * x.reset(); // reset to default values * + * x.start(); // starts the stopwatch from 0 * + * // ... computation 5.7 sec * + * x.print("Time "); // > Time 5.7 sec. * + * // ^ intermediate timing without stopping the stopwatch * + * // ... more computations 1.7643 min * + * x.stop(); * + * x.print("Time needed ", StopwatchBase< clock_type >::MINUTES, std::cerr); * + * // > Time needed 1,8593 min. (on cerr) * + * // other units and output streams possible * + ********************************************************************************/ +namespace timers +{ +enum timeunit_t : size_t +{ + NANOSEC = 1, + MICROSEC = NANOSEC * 1000, + MILLISEC = MICROSEC * 1000, + SECONDS = MILLISEC * 1000, + MINUTES = SECONDS * 60, + HOURS = MINUTES * 60, + DAYS = HOURS * 24 +}; + +template < clockid_t clock_type > +class StopwatchBase +{ +public: + typedef size_t timestamp_t; + + /** + * Creates a stopwatch that is not running. + */ + StopwatchBase() + { + reset(); + } + + /** + * Starts or resumes the stopwatch, if it is not running already. + */ + void start(); + + /** + * Stops the stopwatch, if it is not stopped already. + */ + void stop(); + + /** + * Returns, whether the stopwatch is running. + */ + bool isRunning() const; + + /** + * Returns the time elapsed between the start and stop of the stopwatch in the given unit. If it is running, it + * returns the time from start until now. If the stopwatch is run previously, the previous runtime is added. If you + * want only the last measurement, you have to reset the timer, before stating the measurement. + * Does not change the running state. + */ + double elapsed( timeunit_t timeunit = SECONDS ) const; + + /** + * Resets the stopwatch. + */ + void reset(); + + /** + * This method prints out the currently elapsed time. + */ + void print( const char* msg = "", timeunit_t timeunit = SECONDS, std::ostream& os = std::cout ) const; + + /** + * Convenient method for writing time in seconds + * to some ostream. + */ + // friend std::ostream& operator<<( std::ostream& os, const StopwatchBase< clock_type >& stopwatch ); + +private: +#ifndef DISABLE_TIMING + timestamp_t _beg, _end; + size_t _prev_elapsed; + bool _running; +#endif + + /** + * Returns current time in microseconds since EPOCH. + */ + static size_t get_current_time(); +}; + +template < clockid_t clock_type > +inline void +StopwatchBase< clock_type >::start() +{ +#ifndef DISABLE_TIMING + if ( not isRunning() ) + { + _prev_elapsed += _end - _beg; // store prev. time, if we resume + _end = _beg = get_current_time(); // invariant: _end >= _beg + _running = true; // we start running + } +#endif +} + +template < clockid_t clock_type > +inline void +StopwatchBase< clock_type >::stop() +{ +#ifndef DISABLE_TIMING + if ( isRunning() ) + { + _end = get_current_time(); // invariant: _end >= _beg + _running = false; // we stopped running + } +#endif +} + +template < clockid_t clock_type > +inline bool +StopwatchBase< clock_type >::isRunning() const +{ +#ifndef DISABLE_TIMING + return _running; +#else + return false; +#endif +} + +template < clockid_t clock_type > +inline double +StopwatchBase< clock_type >::elapsed( timeunit_t timeunit ) const +{ +#ifndef DISABLE_TIMING + size_t time_elapsed; + if ( isRunning() ) + { + // get intermediate elapsed time; do not change _end, to be const + time_elapsed = get_current_time() - _beg + _prev_elapsed; + } + else + { + // stopped before, get time of current measurement + last measurements + time_elapsed = _end - _beg + _prev_elapsed; + } + return static_cast< double >( time_elapsed ) / timeunit; +#else + return 0.; +#endif +} + +template < clockid_t clock_type > +inline void +StopwatchBase< clock_type >::reset() +{ +#ifndef DISABLE_TIMING + _beg = 0; // invariant: _end >= _beg + _end = 0; + _prev_elapsed = 0; // erase all prev. measurements + _running = false; // of course not running. +#endif +} + +template < clockid_t clock_type > +inline void +StopwatchBase< clock_type >::print( const char* msg, timeunit_t timeunit, std::ostream& os ) const +{ +#ifndef DISABLE_TIMING + double e = elapsed( timeunit ); + os << msg << e; + switch ( timeunit ) + { + case MICROSEC: + os << " microsec."; + break; + case MILLISEC: + os << " millisec."; + break; + case SECONDS: + os << " sec."; + break; + case MINUTES: + os << " min."; + break; + case HOURS: + os << " h."; + break; + case DAYS: + os << " days."; + break; + } +#ifdef DEBUG + os << " (running: " << ( _running ? "true" : "false" ) << ", begin: " << _beg << ", end: " << _end + << ", diff: " << ( _end - _beg ) << ", prev: " << _prev_elapsed << ")"; +#endif + os << std::endl; +#endif +} + +template < clockid_t clock_type > +inline size_t +StopwatchBase< clock_type >::get_current_time() +{ + // We use a monotonic timer to make sure the stopwatch is not influenced by time jumps (e.g. summer/winter time). + timespec now; + clock_gettime( clock_type, &now ); + return now.tv_nsec + now.tv_sec * timeunit_t::SECONDS; +} + +template < clockid_t clock_type > +inline std::ostream& +operator<<( std::ostream& os, const StopwatchBase< clock_type >& stopwatch ) +{ + stopwatch.print( "", timeunit_t::SECONDS, os ); + return os; +} + +} + +enum StopwatchVerbosity +{ + Normal, // +class Stopwatch +{ +public: + void + start() + { +#pragma omp master + { + walltime_timer_.start(); + cputime_timer_.start(); + } + } + + void + stop() + { +#pragma omp master + { + walltime_timer_.stop(); + cputime_timer_.stop(); + } + } + + bool + isRunning() const + { + bool isRunning = false; +#pragma omp master + { + isRunning = walltime_timer_.isRunning(); + }; + return isRunning; + } + + double + elapsed( timers::timeunit_t timeunit = timers::timeunit_t::SECONDS ) const + { + double elapsed = 0.; +#pragma omp master + { + elapsed = walltime_timer_.elapsed( timeunit ); + }; + return elapsed; + } + + void + reset() + { +#pragma omp master + { + walltime_timer_.reset(); + cputime_timer_.reset(); + } + } + + void + print( const char* msg = "", + timers::timeunit_t timeunit = timers::timeunit_t::SECONDS, + std::ostream& os = std::cout ) const + { +#pragma omp master + walltime_timer_.print( msg, timeunit, os ); + } + + void + output_timer( DictionaryDatum& d, const Name& walltime_name, const Name& cputime_name ) + { + def< double >( d, walltime_name, walltime_timer_.elapsed() ); + def< double >( d, cputime_name, cputime_timer_.elapsed() ); + } + +private: + timers::StopwatchBase< CLOCK_MONOTONIC > walltime_timer_; + timers::StopwatchBase< CLOCK_THREAD_CPUTIME_ID > cputime_timer_; +}; + +/** If the user deactivated detailed timers, Stopwatch instance with the detailed flag will become an empty Stopwatch, + * which will be safely ignored by the compiler, as if the instance was not declared (e.g. as a member). + */ +template <> +class Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly, std::enable_if< not use_detailed_timers > > +{ +public: + void + start() + { + } + void + stop() + { + } + bool + isRunning() const + { + return false; + } + double + elapsed( timers::timeunit_t = timers::timeunit_t::SECONDS ) const + { + return 0; + } + void + reset() + { + } + void + print( const char* = "", timers::timeunit_t = timers::timeunit_t::SECONDS, std::ostream& = std::cout ) const + { + } + void + output_timer( DictionaryDatum&, const Name&, const Name& ) + { + } +}; + +/** Only provide these template specializations if threaded timers are activated and always fall back to the base + * template specialization if not. Deactivate threaded detailed timers if all detailed timers are deactivated. + */ +template < StopwatchVerbosity detailed_timer > +class Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Detailed and not use_detailed_timers ) > > +{ +public: + void + start() + { + } + void + stop() + { + } + bool + isRunning() const + { + return false; + } + double + elapsed( timers::timeunit_t = timers::timeunit_t::SECONDS ) const + { + return 0; + } + void + reset() + { + } + void + print( const char* = "", timers::timeunit_t = timers::timeunit_t::SECONDS, std::ostream& = std::cout ) const + { + } + void + output_timer( DictionaryDatum&, const Name&, const Name& ) + { + } +}; +template < StopwatchVerbosity detailed_timer > +class Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > > +{ +public: + void start(); + + void stop(); + + bool isRunning() const; + + double elapsed( timers::timeunit_t timeunit = timers::timeunit_t::SECONDS ) const; + + void reset(); + + void print( const char* msg = "", + timers::timeunit_t timeunit = timers::timeunit_t::SECONDS, + std::ostream& os = std::cout ) const; + + void + output_timer( DictionaryDatum& d, const Name& walltime_name, const Name& cputime_name ) + { + std::vector< double > wall_times( walltime_timers_.size() ); + std::transform( walltime_timers_.begin(), + walltime_timers_.end(), + wall_times.begin(), + []( const timers::StopwatchBase< CLOCK_MONOTONIC >& timer ) { return timer.elapsed(); } ); + def< ArrayDatum >( d, walltime_name, ArrayDatum( wall_times ) ); + std::vector< double > cpu_times( cputime_timers_.size() ); + std::transform( cputime_timers_.begin(), + cputime_timers_.end(), + cpu_times.begin(), + []( const timers::StopwatchBase< CLOCK_THREAD_CPUTIME_ID >& timer ) { return timer.elapsed(); } ); + def< ArrayDatum >( d, cputime_name, ArrayDatum( cpu_times ) ); + } + +private: + std::vector< timers::StopwatchBase< CLOCK_MONOTONIC > > walltime_timers_; + std::vector< timers::StopwatchBase< CLOCK_THREAD_CPUTIME_ID > > cputime_timers_; +}; + +} /* namespace timer */ +#endif /* STOPWATCH_H */ diff --git a/nestkernel/stopwatch_impl.h b/nestkernel/stopwatch_impl.h new file mode 100644 index 0000000000..b467b598e8 --- /dev/null +++ b/nestkernel/stopwatch_impl.h @@ -0,0 +1,112 @@ +/* + * stopwatch_impl.h + * + * This file is part of NEST. + * + * Copyright (C) 2004 The NEST Initiative + * + * NEST is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * NEST is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with NEST. If not, see . + * + */ + +#include "kernel_manager.h" +#include "stopwatch.h" + +namespace nest +{ + +template < StopwatchVerbosity detailed_timer > +void +Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::start() +{ + kernel().vp_manager.assert_thread_parallel(); + + walltime_timers_[ kernel().vp_manager.get_thread_id() ].start(); + cputime_timers_[ kernel().vp_manager.get_thread_id() ].start(); +} + +template < StopwatchVerbosity detailed_timer > +void +Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::stop() +{ + kernel().vp_manager.assert_thread_parallel(); + + walltime_timers_[ kernel().vp_manager.get_thread_id() ].stop(); + cputime_timers_[ kernel().vp_manager.get_thread_id() ].stop(); +} + +template < StopwatchVerbosity detailed_timer > +bool +Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::isRunning() const +{ + kernel().vp_manager.assert_thread_parallel(); + + return walltime_timers_[ kernel().vp_manager.get_thread_id() ].isRunning(); +} + +template < StopwatchVerbosity detailed_timer > +double +Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::elapsed( timers::timeunit_t + timeunit ) const +{ + kernel().vp_manager.assert_thread_parallel(); + + return walltime_timers_[ kernel().vp_manager.get_thread_id() ].elapsed( timeunit ); +} + +template < StopwatchVerbosity detailed_timer > +void +Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::print( const char* msg, + timers::timeunit_t timeunit, + std::ostream& os ) const +{ + kernel().vp_manager.assert_thread_parallel(); + + walltime_timers_[ kernel().vp_manager.get_thread_id() ].print( msg, timeunit, os ); +} + +template < StopwatchVerbosity detailed_timer > +void +Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::reset() +{ + kernel().vp_manager.assert_single_threaded(); + + const size_t num_threads = kernel().vp_manager.get_num_threads(); + walltime_timers_.resize( num_threads ); + cputime_timers_.resize( num_threads ); + for ( size_t i = 0; i < num_threads; ++i ) + { + walltime_timers_[ i ].reset(); + cputime_timers_[ i ].reset(); + } +} +}