From c22f28eb801cfea865723cfa9da83ceda8cb06d4 Mon Sep 17 00:00:00 2001 From: Jan Vogelsang Date: Mon, 9 Sep 2024 10:38:34 +0200 Subject: [PATCH 01/11] Added option to measure timers per thread and added timers to measure thread synchronization time --- CMakeLists.txt | 2 + cmake/ConfigureSummary.cmake | 7 + cmake/ProcessOptions.cmake | 7 + doc/htmldoc/installation/cmake_options.rst | 7 +- doc/htmldoc/nest_behavior/built-in_timers.rst | 2 +- libnestutil/CMakeLists.txt | 1 - libnestutil/config.h.in | 3 + nestkernel/CMakeLists.txt | 1 + nestkernel/connection_manager.cpp | 2 + nestkernel/connection_manager.h | 2 +- nestkernel/event_delivery_manager.cpp | 19 ++ nestkernel/event_delivery_manager.h | 6 +- nestkernel/kernel_manager.cpp | 6 + nestkernel/mpi_manager.cpp | 10 +- nestkernel/nest_names.cpp | 1 + nestkernel/nest_names.h | 1 + nestkernel/node_manager.cpp | 2 - nestkernel/node_manager.h | 2 +- nestkernel/per_thread_bool_indicator.cpp | 11 ++ nestkernel/simulation_manager.cpp | 168 ++++++++---------- nestkernel/simulation_manager.h | 11 +- nestkernel/source_table.cpp | 2 + nestkernel/stimulation_backend_mpi.cpp | 2 +- {libnestutil => nestkernel}/stopwatch.cpp | 16 +- {libnestutil => nestkernel}/stopwatch.h | 106 ++++++++--- 25 files changed, 259 insertions(+), 138 deletions(-) rename {libnestutil => nestkernel}/stopwatch.cpp (69%) rename {libnestutil => nestkernel}/stopwatch.h (74%) diff --git a/CMakeLists.txt b/CMakeLists.txt index f060a5c933..542f233df2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,6 +68,7 @@ set( with-models OFF CACHE STRING "The models to include as a semicolon-separate set( tics_per_ms "1000.0" CACHE STRING "Specify elementary unit of time [default=1000 tics per ms]." ) set( tics_per_step "100" CACHE STRING "Specify resolution [default=100 tics per step]." ) set( with-detailed-timers OFF CACHE STRING "Build with detailed internal time measurements [default=OFF]. Detailed timers can affect the performance." ) +set( with-threaded-timers ON CACHE STRING "Build with one internal timer per thread [default=ON]. Multi-threaded timers can affect the performance." ) set( target-bits-split "standard" CACHE STRING "Split of the 64-bit target neuron identifier type [default='standard']. 'standard' is recommended for most users. If running on more than 262144 MPI processes or more than 512 threads, change to 'hpc'." ) # generic build configuration @@ -143,6 +144,7 @@ nest_process_with_gsl() nest_process_with_openmp() nest_process_with_mpi() nest_process_with_detailed_timers() +nest_process_with_threaded_timers() nest_process_with_libneurosim() nest_process_with_music() nest_process_with_sionlib() diff --git a/cmake/ConfigureSummary.cmake b/cmake/ConfigureSummary.cmake index ea9610aa9a..c0b7e24e8b 100644 --- a/cmake/ConfigureSummary.cmake +++ b/cmake/ConfigureSummary.cmake @@ -150,6 +150,13 @@ function( NEST_PRINT_CONFIG_SUMMARY ) message( "Detailed timers : No" ) endif () + message( "" ) + if ( THREADED_TIMERS ) + message( "Threaded timers : Yes" ) + else () + message( "Threaded timers : No" ) + endif () + message( "" ) if ( HAVE_MUSIC ) message( "Use MUSIC : Yes (MUSIC ${MUSIC_VERSION})" ) diff --git a/cmake/ProcessOptions.cmake b/cmake/ProcessOptions.cmake index 09498146d1..c5170a3940 100644 --- a/cmake/ProcessOptions.cmake +++ b/cmake/ProcessOptions.cmake @@ -462,6 +462,13 @@ function( NEST_PROCESS_WITH_DETAILED_TIMERS ) endif () endfunction() +function( NEST_PROCESS_WITH_THREADED_TIMERS ) + set( THREADED_TIMERS OFF PARENT_SCOPE ) + if ( ${with-threaded-timers} STREQUAL "ON" ) + set( THREADED_TIMERS ON PARENT_SCOPE ) + endif () +endfunction() + function( NEST_PROCESS_WITH_LIBNEUROSIM ) # Find libneurosim set( HAVE_LIBNEUROSIM OFF PARENT_SCOPE ) diff --git a/doc/htmldoc/installation/cmake_options.rst b/doc/htmldoc/installation/cmake_options.rst index a1283d537c..e118910d01 100644 --- a/doc/htmldoc/installation/cmake_options.rst +++ b/doc/htmldoc/installation/cmake_options.rst @@ -102,7 +102,7 @@ For more details, see the :ref:`Python binding ` section be .. _performance_cmake: Maximize performance, reduce energy consumption -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The following options help to optimize NEST for maximal performance and thus reduced energy consumption. @@ -126,7 +126,7 @@ The following options help to optimize NEST for maximal performance and thus red in place. * Using ``-march=native`` requires that you build NEST on the same CPU architecture as you will use to run it. * For the technically minded: Even just using ``-O3`` removes some ``assert()`` statements from NEST since we - have wrapped some of them in functions, which get eliminated due to interprocedural optimization. + have wrapped some of them in functions, which get eliminated due to interprocedural optimization. @@ -197,6 +197,9 @@ NEST properties +-----------------------------------------------+----------------------------------------------------------------+ | ``-Dtics_per_step=[number]`` | Specify resolution [default=100 tics per step]. | +-----------------------------------------------+----------------------------------------------------------------+ +| ``-Dwith-threaded-timers=[OFF|ON]`` | Build with one internal timer per thread [default=ON]. | +| | Multi-threaded timers can affect the performance. | ++-----------------------------------------------+----------------------------------------------------------------+ | ``-Dwith-detailed-timers=[OFF|ON]`` | Build with detailed internal time measurements [default=OFF]. | | | Detailed timers can affect the performance. | +-----------------------------------------------+----------------------------------------------------------------+ diff --git a/doc/htmldoc/nest_behavior/built-in_timers.rst b/doc/htmldoc/nest_behavior/built-in_timers.rst index 32c88d339f..e0d5f6069a 100644 --- a/doc/htmldoc/nest_behavior/built-in_timers.rst +++ b/doc/htmldoc/nest_behavior/built-in_timers.rst @@ -86,7 +86,7 @@ available as kernel attributes: +--------------------------------+----------------------------------+----------------------------------+ |``time_communicate_target_data``|Cumulative time for core MPI |``time_gather_target_data`` | | |communication when gathering | | -| |target data | | +| |target data | | +--------------------------------+----------------------------------+----------------------------------+ |``time_update`` |Time for neuron update |``time_simulate`` | +--------------------------------+----------------------------------+----------------------------------+ diff --git a/libnestutil/CMakeLists.txt b/libnestutil/CMakeLists.txt index 3700edea51..0000aaac77 100644 --- a/libnestutil/CMakeLists.txt +++ b/libnestutil/CMakeLists.txt @@ -31,7 +31,6 @@ set( nestutil_sources numerics.h numerics.cpp regula_falsi.h sort.h - stopwatch.h stopwatch.cpp string_utils.h vector_util.h ) diff --git a/libnestutil/config.h.in b/libnestutil/config.h.in index 08b4251560..ddf3921773 100644 --- a/libnestutil/config.h.in +++ b/libnestutil/config.h.in @@ -182,6 +182,9 @@ /* Whether to enable detailed NEST internal timers */ #cmakedefine TIMER_DETAILED 1 +/* Whether to use one NEST internal timer per thread */ +#cmakedefine THREADED_TIMERS 1 + /* Whether to do full logging */ #cmakedefine ENABLE_FULL_LOGGING 1 diff --git a/nestkernel/CMakeLists.txt b/nestkernel/CMakeLists.txt index 685fb741aa..9f1c6fbc67 100644 --- a/nestkernel/CMakeLists.txt +++ b/nestkernel/CMakeLists.txt @@ -116,6 +116,7 @@ set ( nestkernel_sources stimulation_backend.h buffer_resize_log.h buffer_resize_log.cpp nest_extension_interface.h + stopwatch.h stopwatch.cpp ) diff --git a/nestkernel/connection_manager.cpp b/nestkernel/connection_manager.cpp index 1d47802ff9..ebe87e07c2 100644 --- a/nestkernel/connection_manager.cpp +++ b/nestkernel/connection_manager.cpp @@ -1800,7 +1800,9 @@ nest::ConnectionManager::collect_compressed_spike_data( const size_t tid ) } // of omp single; implicit barrier source_table_.collect_compressible_sources( tid ); + DETAILED_TIMER_START( kernel().simulation_manager.get_idle_stopwatch(), tid ); #pragma omp barrier + DETAILED_TIMER_STOP( kernel().simulation_manager.get_idle_stopwatch(), tid ); #pragma omp single { source_table_.fill_compressed_spike_data( compressed_spike_data_ ); diff --git a/nestkernel/connection_manager.h b/nestkernel/connection_manager.h index e6c3888dff..edba8ca4ec 100644 --- a/nestkernel/connection_manager.h +++ b/nestkernel/connection_manager.h @@ -461,7 +461,7 @@ class ConnectionManager : public ManagerInterface // public stop watch for benchmarking purposes // start and stop in high-level connect functions in nestmodule.cpp and nest.cpp - Stopwatch sw_construction_connect; + SingleStopwatch sw_construction_connect; const std::vector< SpikeData >& get_compressed_spike_data( const synindex syn_id, const size_t idx ); diff --git a/nestkernel/event_delivery_manager.cpp b/nestkernel/event_delivery_manager.cpp index dbb98504d5..7bb94dddd2 100644 --- a/nestkernel/event_delivery_manager.cpp +++ b/nestkernel/event_delivery_manager.cpp @@ -429,6 +429,9 @@ EventDeliveryManager::gather_spike_data_( std::vector< SpikeDataT >& send_buffer #ifdef TIMER_DETAILED { sw_collocate_spike_data_.stop(); +#ifdef HAVE_MPI + MPI_Barrier( kernel().mpi_manager.get_communicator() ); +#endif sw_communicate_spike_data_.start(); } #endif @@ -811,7 +814,9 @@ EventDeliveryManager::gather_target_data( const size_t tid ) resize_send_recv_buffers_target_data(); } } // of omp master; (no barrier) + DETAILED_TIMER_START( kernel().simulation_manager.get_idle_stopwatch(), tid ); #pragma omp barrier + DETAILED_TIMER_STOP( kernel().simulation_manager.get_idle_stopwatch(), tid ); kernel().connection_manager.restore_source_table_entry_point( tid ); @@ -826,12 +831,17 @@ EventDeliveryManager::gather_target_data( const size_t tid ) set_complete_marker_target_data_( assigned_ranks, send_buffer_position ); } kernel().connection_manager.save_source_table_entry_point( tid ); + DETAILED_TIMER_START( kernel().simulation_manager.get_idle_stopwatch(), tid ); #pragma omp barrier + DETAILED_TIMER_STOP( kernel().simulation_manager.get_idle_stopwatch(), tid ); kernel().connection_manager.clean_source_table( tid ); #pragma omp master { #ifdef TIMER_DETAILED +#ifdef HAVE_MPI + MPI_Barrier( kernel().mpi_manager.get_communicator() ); +#endif sw_communicate_target_data_.start(); #endif kernel().mpi_manager.communicate_target_data_Alltoall( send_buffer_target_data_, recv_buffer_target_data_ ); @@ -883,7 +893,9 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) resize_send_recv_buffers_target_data(); } } // of omp master; no barrier + DETAILED_TIMER_START( kernel().simulation_manager.get_idle_stopwatch(), tid ); #pragma omp barrier + DETAILED_TIMER_STOP( kernel().simulation_manager.get_idle_stopwatch(), tid ); TargetSendBufferPosition send_buffer_position( assigned_ranks, kernel().mpi_manager.get_send_recv_count_target_data_per_rank() ); @@ -898,11 +910,16 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) set_complete_marker_target_data_( assigned_ranks, send_buffer_position ); } + DETAILED_TIMER_START( kernel().simulation_manager.get_idle_stopwatch(), tid ); #pragma omp barrier + DETAILED_TIMER_STOP( kernel().simulation_manager.get_idle_stopwatch(), tid ); #pragma omp master { #ifdef TIMER_DETAILED +#ifdef HAVE_MPI + MPI_Barrier( kernel().mpi_manager.get_communicator() ); +#endif sw_communicate_target_data_.start(); #endif kernel().mpi_manager.communicate_target_data_Alltoall( send_buffer_target_data_, recv_buffer_target_data_ ); @@ -925,7 +942,9 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) { buffer_size_target_data_has_changed_ = kernel().mpi_manager.increase_buffer_size_target_data(); } // of omp master (no barrier) + DETAILED_TIMER_START( kernel().simulation_manager.get_idle_stopwatch(), tid ); #pragma omp barrier + DETAILED_TIMER_STOP( kernel().simulation_manager.get_idle_stopwatch(), tid ); } } // of while diff --git a/nestkernel/event_delivery_manager.h b/nestkernel/event_delivery_manager.h index 4f49645077..f715eca1de 100644 --- a/nestkernel/event_delivery_manager.h +++ b/nestkernel/event_delivery_manager.h @@ -469,9 +469,9 @@ class EventDeliveryManager : public ManagerInterface #ifdef TIMER_DETAILED // private stop watches for benchmarking purposes // (intended for internal core developers, not for use in the public API) - Stopwatch sw_collocate_spike_data_; - Stopwatch sw_communicate_spike_data_; - Stopwatch sw_communicate_target_data_; + SingleStopwatch sw_collocate_spike_data_; + SingleStopwatch sw_communicate_spike_data_; + SingleStopwatch sw_communicate_target_data_; #endif }; diff --git a/nestkernel/kernel_manager.cpp b/nestkernel/kernel_manager.cpp index 2e407b249a..cfa850e179 100644 --- a/nestkernel/kernel_manager.cpp +++ b/nestkernel/kernel_manager.cpp @@ -162,6 +162,12 @@ nest::KernelManager::change_number_of_threads( size_t new_num_threads ) // is in place again, we can tell modules to re-register the components // they provide. module_manager.reinitialize_dynamic_modules(); + + // Prepare timers + kernel().simulation_manager.reset_timers_for_preparation(); + kernel().simulation_manager.reset_timers_for_dynamics(); + kernel().event_delivery_manager.reset_timers_for_preparation(); + kernel().event_delivery_manager.reset_timers_for_dynamics(); } void diff --git a/nestkernel/mpi_manager.cpp b/nestkernel/mpi_manager.cpp index b7a348322e..2b69dd64ac 100644 --- a/nestkernel/mpi_manager.cpp +++ b/nestkernel/mpi_manager.cpp @@ -834,7 +834,7 @@ nest::MPIManager::time_communicate( int num_bytes, int samples ) std::vector< unsigned int > test_send_buffer( packet_length ); std::vector< unsigned int > test_recv_buffer( packet_length * get_num_processes() ); // start time measurement here - Stopwatch foo; + SingleStopwatch foo; foo.start(); for ( int i = 0; i < samples; ++i ) { @@ -870,7 +870,7 @@ nest::MPIManager::time_communicatev( int num_bytes, int samples ) } // start time measurement here - Stopwatch foo; + SingleStopwatch foo; foo.start(); for ( int i = 0; i < samples; ++i ) { @@ -898,7 +898,7 @@ nest::MPIManager::time_communicate_offgrid( int num_bytes, int samples ) std::vector< OffGridSpike > test_send_buffer( packet_length ); std::vector< OffGridSpike > test_recv_buffer( packet_length * get_num_processes() ); // start time measurement here - Stopwatch foo; + SingleStopwatch foo; foo.start(); for ( int i = 0; i < samples; ++i ) { @@ -932,7 +932,7 @@ nest::MPIManager::time_communicate_alltoall( int num_bytes, int samples ) std::vector< unsigned int > test_send_buffer( total_packet_length ); std::vector< unsigned int > test_recv_buffer( total_packet_length ); // start time measurement here - Stopwatch foo; + SingleStopwatch foo; foo.start(); for ( int i = 0; i < samples; ++i ) { @@ -969,7 +969,7 @@ nest::MPIManager::time_communicate_alltoallv( int num_bytes, int samples ) } // start time measurement here - Stopwatch foo; + SingleStopwatch foo; foo.start(); for ( int i = 0; i < samples; ++i ) { diff --git a/nestkernel/nest_names.cpp b/nestkernel/nest_names.cpp index c897a9e6cf..ee5e6f1c8d 100644 --- a/nestkernel/nest_names.cpp +++ b/nestkernel/nest_names.cpp @@ -606,6 +606,7 @@ const Name time_deliver_spike_data( "time_deliver_spike_data" ); const Name time_gather_secondary_data( "time_gather_secondary_data" ); const Name time_gather_spike_data( "time_gather_spike_data" ); const Name time_gather_target_data( "time_gather_target_data" ); +const Name time_idle( "time_idle" ); const Name time_in_steps( "time_in_steps" ); const Name time_simulate( "time_simulate" ); const Name time_update( "time_update" ); diff --git a/nestkernel/nest_names.h b/nestkernel/nest_names.h index b6d964e04c..9e9def60cd 100644 --- a/nestkernel/nest_names.h +++ b/nestkernel/nest_names.h @@ -634,6 +634,7 @@ extern const Name time_deliver_spike_data; extern const Name time_gather_secondary_data; extern const Name time_gather_spike_data; extern const Name time_gather_target_data; +extern const Name time_idle; extern const Name time_in_steps; extern const Name time_simulate; extern const Name time_update; diff --git a/nestkernel/node_manager.cpp b/nestkernel/node_manager.cpp index 5e984ca7fa..9336f30459 100644 --- a/nestkernel/node_manager.cpp +++ b/nestkernel/node_manager.cpp @@ -31,8 +31,6 @@ #include "logging.h" // Includes from nestkernel: -#include "event_delivery_manager.h" -#include "genericmodel.h" #include "kernel_manager.h" #include "model.h" #include "model_manager_impl.h" diff --git a/nestkernel/node_manager.h b/nestkernel/node_manager.h index 86044911b3..ba53f4cc5a 100644 --- a/nestkernel/node_manager.h +++ b/nestkernel/node_manager.h @@ -356,7 +356,7 @@ class NodeManager : public ManagerInterface std::vector< std::shared_ptr< WrappedThreadException > > exceptions_raised_; // private stop watch for benchmarking purposes - Stopwatch sw_construction_create_; + SingleStopwatch sw_construction_create_; }; inline size_t diff --git a/nestkernel/per_thread_bool_indicator.cpp b/nestkernel/per_thread_bool_indicator.cpp index 267f753298..a7e99bd29c 100644 --- a/nestkernel/per_thread_bool_indicator.cpp +++ b/nestkernel/per_thread_bool_indicator.cpp @@ -64,6 +64,7 @@ PerThreadBoolIndicator::initialize( const size_t num_threads, const bool status bool PerThreadBoolIndicator::all_false() const { + DETAILED_TIMER_START( kernel().simulation_manager.get_idle_stopwatch(), kernel().vp_manager.get_thread_id() ); // We need two barriers here to ensure that no thread can continue and change the result // before all threads have determined the result. #pragma omp barrier @@ -71,33 +72,43 @@ PerThreadBoolIndicator::all_false() const // before all threads have determined the result. bool ret = ( are_true_ == 0 ); #pragma omp barrier + + DETAILED_TIMER_STOP( kernel().simulation_manager.get_idle_stopwatch(), kernel().vp_manager.get_thread_id() ); return ret; } bool PerThreadBoolIndicator::all_true() const { + DETAILED_TIMER_START( kernel().simulation_manager.get_idle_stopwatch(), kernel().vp_manager.get_thread_id() ); #pragma omp barrier bool ret = ( are_true_ == size_ ); #pragma omp barrier + DETAILED_TIMER_STOP( kernel().simulation_manager.get_idle_stopwatch(), kernel().vp_manager.get_thread_id() ); return ret; } bool PerThreadBoolIndicator::any_false() const { + DETAILED_TIMER_START( kernel().simulation_manager.get_idle_stopwatch(), kernel().vp_manager.get_thread_id() ); #pragma omp barrier bool ret = ( are_true_ < size_ ); #pragma omp barrier + + DETAILED_TIMER_STOP( kernel().simulation_manager.get_idle_stopwatch(), kernel().vp_manager.get_thread_id() ); return ret; } bool PerThreadBoolIndicator::any_true() const { + DETAILED_TIMER_START( kernel().simulation_manager.get_idle_stopwatch(), kernel().vp_manager.get_thread_id() ); #pragma omp barrier bool ret = ( are_true_ > 0 ); #pragma omp barrier + + DETAILED_TIMER_STOP( kernel().simulation_manager.get_idle_stopwatch(), kernel().vp_manager.get_thread_id() ); return ret; } diff --git a/nestkernel/simulation_manager.cpp b/nestkernel/simulation_manager.cpp index 9e0da05602..e15fec3cb8 100644 --- a/nestkernel/simulation_manager.cpp +++ b/nestkernel/simulation_manager.cpp @@ -116,8 +116,8 @@ nest::SimulationManager::finalize( const bool ) void nest::SimulationManager::reset_timers_for_preparation() { - sw_communicate_prepare_.reset(); #ifdef TIMER_DETAILED + sw_communicate_prepare_.reset(); sw_gather_target_data_.reset(); #endif } @@ -132,6 +132,7 @@ nest::SimulationManager::reset_timers_for_dynamics() sw_update_.reset(); sw_deliver_spike_data_.reset(); sw_deliver_secondary_data_.reset(); + sw_idle_.reset(); #endif } @@ -478,14 +479,15 @@ nest::SimulationManager::get_status( DictionaryDatum& d ) def< double >( d, names::max_update_time, max_update_time_ ); def< double >( d, names::time_simulate, sw_simulate_.elapsed() ); - def< double >( d, names::time_communicate_prepare, sw_communicate_prepare_.elapsed() ); + OUTPUT_TIMER( d, names::time_communicate_prepare, sw_communicate_prepare_ ); #ifdef TIMER_DETAILED - def< double >( d, names::time_gather_spike_data, sw_gather_spike_data_.elapsed() ); - def< double >( d, names::time_gather_secondary_data, sw_gather_secondary_data_.elapsed() ); - def< double >( d, names::time_update, sw_update_.elapsed() ); - def< double >( d, names::time_gather_target_data, sw_gather_target_data_.elapsed() ); - def< double >( d, names::time_deliver_spike_data, sw_deliver_spike_data_.elapsed() ); - def< double >( d, names::time_deliver_secondary_data, sw_deliver_secondary_data_.elapsed() ); + OUTPUT_TIMER( d, names::time_gather_spike_data, sw_gather_spike_data_ ); + OUTPUT_TIMER( d, names::time_gather_secondary_data, sw_gather_secondary_data_ ); + OUTPUT_TIMER( d, names::time_update, sw_update_ ); + OUTPUT_TIMER( d, names::time_gather_target_data, sw_gather_target_data_ ); + OUTPUT_TIMER( d, names::time_deliver_spike_data, sw_deliver_spike_data_ ); + OUTPUT_TIMER( d, names::time_deliver_secondary_data, sw_deliver_secondary_data_ ); + OUTPUT_TIMER( d, names::time_idle, sw_idle_ ); #endif def< double >( d, names::eprop_update_interval, eprop_update_interval_ ); def< double >( d, names::eprop_learning_window, eprop_learning_window_ ); @@ -523,6 +525,7 @@ nest::SimulationManager::prepare() // this call sets the member variables kernel().connection_manager.update_delay_extrema_(); kernel().event_delivery_manager.init_moduli(); + kernel().node_manager.ensure_valid_thread_local_ids(); // if at the beginning of a simulation, set up spike buffers if ( not simulated_ ) @@ -738,17 +741,21 @@ nest::SimulationManager::call_update_() void nest::SimulationManager::update_connection_infrastructure( const size_t tid ) { + DETAILED_TIMER_START( sw_idle_, tid ); #pragma omp barrier - if ( tid == 0 ) - { - sw_communicate_prepare_.start(); - } + DETAILED_TIMER_STOP( sw_idle_, tid ); + + DETAILED_TIMER_START( sw_communicate_prepare_, tid ); - kernel().connection_manager.restructure_connection_tables( tid ); kernel().connection_manager.sort_connections( tid ); + DETAILED_TIMER_START( sw_gather_target_data_, tid ); + kernel().connection_manager.restructure_connection_tables( tid ); kernel().connection_manager.collect_compressed_spike_data( tid ); + DETAILED_TIMER_STOP( sw_gather_target_data_, tid ); + DETAILED_TIMER_START( sw_idle_, tid ); #pragma omp barrier // wait for all threads to finish sorting + DETAILED_TIMER_STOP( sw_idle_, tid ); #pragma omp single { @@ -765,7 +772,9 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) { #pragma omp barrier kernel().connection_manager.compute_compressed_secondary_recv_buffer_positions( tid ); + DETAILED_TIMER_START( sw_idle_, tid ); #pragma omp barrier + DETAILED_TIMER_STOP( sw_idle_, tid ); #pragma omp single { kernel().mpi_manager.communicate_recv_counts_secondary_events(); @@ -773,12 +782,7 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) } } -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_gather_target_data_.start(); - } -#endif + DETAILED_TIMER_START( sw_gather_target_data_, tid ); // communicate connection information from postsynaptic to // presynaptic side @@ -796,32 +800,23 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) kernel().event_delivery_manager.gather_target_data( tid ); } -#ifdef TIMER_DETAILED -#pragma omp barrier - if ( tid == 0 ) - { - sw_gather_target_data_.stop(); - } -#endif + DETAILED_TIMER_STOP( sw_gather_target_data_, tid ); if ( kernel().connection_manager.secondary_connections_exist() ) { kernel().connection_manager.compress_secondary_send_buffer_pos( tid ); } + DETAILED_TIMER_START( sw_idle_, tid ); #pragma omp barrier + DETAILED_TIMER_STOP( sw_idle_, tid ); #pragma omp single { kernel().connection_manager.clear_compressed_spike_data_map(); kernel().node_manager.set_have_nodes_changed( false ); kernel().connection_manager.unset_connections_have_changed(); } - -#pragma omp barrier - if ( tid == 0 ) - { - sw_communicate_prepare_.stop(); - } + DETAILED_TIMER_STOP( sw_communicate_prepare_, tid ); } bool @@ -871,48 +866,31 @@ nest::SimulationManager::update_() // from the time step preceding the arrival of the spike triggering the weight update. if ( kernel().connection_manager.secondary_connections_exist() ) { -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_deliver_secondary_data_.start(); - } -#endif + DETAILED_TIMER_START( sw_deliver_secondary_data_, tid ); kernel().event_delivery_manager.deliver_secondary_events( tid, false ); -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_deliver_secondary_data_.stop(); - } -#endif + DETAILED_TIMER_STOP( sw_deliver_secondary_data_, tid ); } if ( kernel().connection_manager.has_primary_connections() ) { -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_deliver_spike_data_.start(); - } -#endif + DETAILED_TIMER_START( sw_deliver_spike_data_, tid ); // Deliver spikes from receive buffer to ring buffers. kernel().event_delivery_manager.deliver_events( tid ); -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_deliver_spike_data_.stop(); - } -#endif + + DETAILED_TIMER_STOP( sw_deliver_spike_data_, tid ); } #ifdef HAVE_MUSIC -// advance the time of music by one step (min_delay * h) must -// be done after deliver_events_() since it calls -// music_event_out_proxy::handle(), which hands the spikes over to -// MUSIC *before* MUSIC time is advanced + // advance the time of music by one step (min_delay * h) must + // be done after deliver_events_() since it calls + // music_event_out_proxy::handle(), which hands the spikes over to + // MUSIC *before* MUSIC time is advanced -// wait until all threads are done -> synchronize + // wait until all threads are done -> synchronize + DETAILED_TIMER_START( sw_idle_, tid ); #pragma omp barrier + DETAILED_TIMER_STOP( sw_idle_, tid ); // the following block is executed by the master thread only // the other threads are enforced to wait at the end of the block #pragma omp master @@ -972,8 +950,10 @@ nest::SimulationManager::update_() // add done value of thread p to done vector #pragma omp critical done.push_back( done_p ); -// parallel section ends, wait until all threads are done -> synchronize + // parallel section ends, wait until all threads are done -> synchronize + DETAILED_TIMER_START( sw_idle_, tid ); #pragma omp barrier + DETAILED_TIMER_STOP( sw_idle_, tid ); // the following block is executed by a single thread // the other threads wait at the end of the block @@ -1033,7 +1013,9 @@ nest::SimulationManager::update_() Node* node = i->get_node(); node->update_synaptic_elements( Time( Time::step( clock_.get_steps() + from_step_ ) ).get_ms() ); } + DETAILED_TIMER_START( sw_idle_, tid ); #pragma omp barrier + DETAILED_TIMER_STOP( sw_idle_, tid ); #pragma omp single { kernel().sp_manager.update_structural_plasticity(); @@ -1055,14 +1037,7 @@ nest::SimulationManager::update_() } // of structural plasticity - -#ifdef TIMER_DETAILED -#pragma omp barrier - if ( tid == 0 ) - { - sw_update_.start(); - } -#endif + DETAILED_TIMER_START( sw_update_, tid ); const SparseNodeArray& thread_local_nodes = kernel().node_manager.get_local_nodes( tid ); for ( SparseNodeArray::const_iterator n = thread_local_nodes.begin(); n != thread_local_nodes.end(); ++n ) @@ -1074,46 +1049,41 @@ nest::SimulationManager::update_() } } -// parallel section ends, wait until all threads are done -> synchronize -#pragma omp barrier + DETAILED_TIMER_STOP( sw_update_, tid ); -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_update_.stop(); - } -#endif + // parallel section ends, wait until all threads are done -> synchronize + DETAILED_TIMER_START( sw_idle_, tid ); +#pragma omp barrier + DETAILED_TIMER_STOP( sw_idle_, tid ); // the following block is executed by the master thread only -// the other threads are enforced to wait at the end of the block -#pragma omp master + // the other threads are enforced to wait at the end of the block + + // gather and deliver only at end of slice, i.e., end of min_delay step + if ( to_step_ == kernel().connection_manager.get_min_delay() ) { - // gather and deliver only at end of slice, i.e., end of min_delay step - if ( to_step_ == kernel().connection_manager.get_min_delay() ) + if ( kernel().connection_manager.has_primary_connections() ) { - if ( kernel().connection_manager.has_primary_connections() ) + DETAILED_TIMER_START( sw_gather_spike_data_, tid ); +#pragma omp master { -#ifdef TIMER_DETAILED - sw_gather_spike_data_.start(); -#endif - kernel().event_delivery_manager.gather_spike_data(); -#ifdef TIMER_DETAILED - sw_gather_spike_data_.stop(); -#endif } - if ( kernel().connection_manager.secondary_connections_exist() ) + DETAILED_TIMER_STOP( sw_gather_spike_data_, tid ); + } + if ( kernel().connection_manager.secondary_connections_exist() ) + { + DETAILED_TIMER_START( sw_gather_secondary_data_, tid ); +#pragma omp master { -#ifdef TIMER_DETAILED - sw_gather_secondary_data_.start(); -#endif kernel().event_delivery_manager.gather_secondary_events( true ); -#ifdef TIMER_DETAILED - sw_gather_secondary_data_.stop(); -#endif } + DETAILED_TIMER_STOP( sw_gather_secondary_data_, tid ); } + } +#pragma omp master + { advance_time_(); if ( print_time_ ) @@ -1136,8 +1106,10 @@ nest::SimulationManager::update_() // if block to avoid omp barrier if SIONLIB is not used #ifdef HAVE_SIONLIB kernel().io_manager.post_step_hook(); -// enforce synchronization after post-step activities of the recording backends + // enforce synchronization after post-step activities of the recording backends + DETAILED_TIMER_START( sw_idle_, tid ); #pragma omp barrier + DETAILED_TIMER_STOP( sw_idle_, tid ); #endif const double end_current_update = sw_simulate_.elapsed(); diff --git a/nestkernel/simulation_manager.h b/nestkernel/simulation_manager.h index 83a37cac31..6e23eb73cb 100644 --- a/nestkernel/simulation_manager.h +++ b/nestkernel/simulation_manager.h @@ -189,6 +189,14 @@ class SimulationManager : public ManagerInterface Time get_eprop_learning_window() const; bool get_eprop_reset_neurons_on_update() const; +#ifdef TIMER_DETAILED + Stopwatch& + get_idle_stopwatch() + { + return sw_idle_; + } +#endif + private: void call_update_(); //!< actually run simulation, aka wrap update_ void update_(); //! actually perform simulation @@ -228,7 +236,7 @@ class SimulationManager : public ManagerInterface double max_update_time_; //!< longest update time seen so far (seconds) // private stop watches for benchmarking purposes - Stopwatch sw_simulate_; + SingleStopwatch sw_simulate_; Stopwatch sw_communicate_prepare_; #ifdef TIMER_DETAILED // intended for internal core developers, not for use in the public API @@ -238,6 +246,7 @@ class SimulationManager : public ManagerInterface Stopwatch sw_gather_target_data_; Stopwatch sw_deliver_spike_data_; Stopwatch sw_deliver_secondary_data_; + Stopwatch sw_idle_; #endif double eprop_update_interval_; diff --git a/nestkernel/source_table.cpp b/nestkernel/source_table.cpp index b5e7c49c73..e2fb36d1fe 100644 --- a/nestkernel/source_table.cpp +++ b/nestkernel/source_table.cpp @@ -229,7 +229,9 @@ nest::SourceTable::compute_buffer_pos_for_unique_secondary_sources( const size_t } } } + DETAILED_TIMER_START( kernel().simulation_manager.get_idle_stopwatch(), tid ); #pragma omp barrier + DETAILED_TIMER_STOP( kernel().simulation_manager.get_idle_stopwatch(), tid ); #pragma omp single { diff --git a/nestkernel/stimulation_backend_mpi.cpp b/nestkernel/stimulation_backend_mpi.cpp index 189207f08f..8f908d24ce 100644 --- a/nestkernel/stimulation_backend_mpi.cpp +++ b/nestkernel/stimulation_backend_mpi.cpp @@ -159,7 +159,7 @@ nest::StimulationBackendMPI::prepare() } // Add the id of device of the other thread in the vector_id_device and update the count of all device - for ( int id_thread = 0; id_thread < kernel().vp_manager.get_num_threads(); id_thread++ ) + for ( size_t id_thread = 0; id_thread < kernel().vp_manager.get_num_threads(); id_thread++ ) { // don't do it again for the master thread if ( id_thread != thread_id_master ) diff --git a/libnestutil/stopwatch.cpp b/nestkernel/stopwatch.cpp similarity index 69% rename from libnestutil/stopwatch.cpp rename to nestkernel/stopwatch.cpp index ec7a2b7676..f80dc17352 100644 --- a/libnestutil/stopwatch.cpp +++ b/nestkernel/stopwatch.cpp @@ -21,13 +21,25 @@ */ #include "stopwatch.h" +#include "kernel_manager.h" namespace nest { std::ostream& -operator<<( std::ostream& os, const Stopwatch& stopwatch ) +operator<<( std::ostream& os, const SingleStopwatch& stopwatch ) { - stopwatch.print( "", Stopwatch::SECONDS, os ); + stopwatch.print( "", SingleStopwatch::SECONDS, os ); return os; } + +void +StopwatchArray::reset() +{ + const size_t num_threads = kernel().vp_manager.get_num_threads(); + timers_.resize( num_threads ); + for ( size_t i = 0; i < num_threads; ++i ) + { + timers_[ i ].reset(); + } +} } diff --git a/libnestutil/stopwatch.h b/nestkernel/stopwatch.h similarity index 74% rename from libnestutil/stopwatch.h rename to nestkernel/stopwatch.h index d30474faa2..28d628069d 100644 --- a/libnestutil/stopwatch.h +++ b/nestkernel/stopwatch.h @@ -29,12 +29,43 @@ // C++ includes: #include #include +#include namespace nest { +/** + * Wrap timer code for easy enabling/disabling via -Dwith-threaded-timers=ON/OFF. + */ +#ifdef THREADED_TIMERS +#define TIMER( timer, tid ) timer[ tid ] +#else +#define TIMER( timer, tid ) \ + if ( tid == 0 ) \ + timer +#endif + +#ifdef THREADED_TIMERS +#define OUTPUT_TIMER( dict, name, timer ) def< ArrayDatum >( dict, name, ArrayDatum( timer.elapsed() ) ) +#else +#define OUTPUT_TIMER( dict, name, timer ) def< double >( dict, name, timer.elapsed() ) +#endif + +#ifdef TIMER_DETAILED +#define DETAILED_TIMER_START( timer, tid ) TIMER( timer, tid ).start(); +#else +#define DETAILED_TIMER_START( timer, tid ) +#endif + +#ifdef TIMER_DETAILED +#define DETAILED_TIMER_STOP( timer, tid ) TIMER( timer, tid ).stop(); +#else +#define DETAILED_TIMER_STOP( timer, tid ) +#endif + +// TODO JV: Update docs /*********************************************************************** - * Stopwatch * + * SingleStopwatch * * Accumulates time between start and stop, and provides * * the elapsed time with different time units. * * * @@ -43,7 +74,7 @@ namespace nest * - Let each thread have its own stopwatch. * * * * Usage example: * - * Stopwatch x; * + * SingleStopwatch x; * * x.start(); * * // ... do computations for 15.34 sec * * x.stop(); // only pauses stopwatch * @@ -59,11 +90,11 @@ namespace nest * // ^ intermediate timing without stopping the stopwatch * * // ... more computations 1.7643 min * * x.stop(); * - * x.print("Time needed ", Stopwatch::MINUTES, std::cerr); * + * x.print("Time needed ", SingleStopwatch::MINUTES, std::cerr); * * // > Time needed 1,8593 min. (on cerr) * * // other units and output streams possible * ***********************************************************************/ -class Stopwatch +class SingleStopwatch { public: typedef size_t timestamp_t; @@ -84,7 +115,7 @@ class Stopwatch /** * Creates a stopwatch that is not running. */ - Stopwatch() + SingleStopwatch() { reset(); } @@ -121,7 +152,7 @@ class Stopwatch * runtime is added. If you want only the last measurement, you * have to reset the timer, before stating the measurement. * Does not change the running state. - * In contrast to Stopwatch::elapsed(), only the timestamp is returned, + * In contrast to SingleStopwatch::elapsed(), only the timestamp is returned, * that is the number if microseconds as an integer. */ timestamp_t elapsed_timestamp() const; @@ -140,7 +171,7 @@ class Stopwatch * Convenient method for writing time in seconds * to some ostream. */ - friend std::ostream& operator<<( std::ostream& os, const Stopwatch& stopwatch ); + friend std::ostream& operator<<( std::ostream& os, const SingleStopwatch& stopwatch ); private: #ifndef DISABLE_TIMING @@ -156,13 +187,13 @@ class Stopwatch }; inline bool -Stopwatch::correct_timeunit( timeunit_t t ) +SingleStopwatch::correct_timeunit( timeunit_t t ) { return t == MICROSEC or t == MILLISEC or t == SECONDS or t == MINUTES or t == HOURS or t == DAYS; } inline void -nest::Stopwatch::start() +nest::SingleStopwatch::start() { #ifndef DISABLE_TIMING if ( not isRunning() ) @@ -175,7 +206,7 @@ nest::Stopwatch::start() } inline void -nest::Stopwatch::stop() +nest::SingleStopwatch::stop() { #ifndef DISABLE_TIMING if ( isRunning() ) @@ -187,7 +218,7 @@ nest::Stopwatch::stop() } inline bool -nest::Stopwatch::isRunning() const +nest::SingleStopwatch::isRunning() const { #ifndef DISABLE_TIMING return _running; @@ -197,7 +228,7 @@ nest::Stopwatch::isRunning() const } inline double -nest::Stopwatch::elapsed( timeunit_t timeunit ) const +nest::SingleStopwatch::elapsed( timeunit_t timeunit ) const { #ifndef DISABLE_TIMING assert( correct_timeunit( timeunit ) ); @@ -207,8 +238,8 @@ nest::Stopwatch::elapsed( timeunit_t timeunit ) const #endif } -inline nest::Stopwatch::timestamp_t -nest::Stopwatch::elapsed_timestamp() const +inline nest::SingleStopwatch::timestamp_t +nest::SingleStopwatch::elapsed_timestamp() const { #ifndef DISABLE_TIMING if ( isRunning() ) @@ -227,7 +258,7 @@ nest::Stopwatch::elapsed_timestamp() const } inline void -nest::Stopwatch::reset() +nest::SingleStopwatch::reset() { #ifndef DISABLE_TIMING _beg = 0; // invariant: _end >= _beg @@ -238,7 +269,7 @@ nest::Stopwatch::reset() } inline void -nest::Stopwatch::print( const char* msg, timeunit_t timeunit, std::ostream& os ) const +nest::SingleStopwatch::print( const char* msg, timeunit_t timeunit, std::ostream& os ) const { #ifndef DISABLE_TIMING assert( correct_timeunit( timeunit ) ); @@ -273,8 +304,8 @@ nest::Stopwatch::print( const char* msg, timeunit_t timeunit, std::ostream& os ) #endif } -inline nest::Stopwatch::timestamp_t -nest::Stopwatch::get_timestamp() +inline nest::SingleStopwatch::timestamp_t +nest::SingleStopwatch::get_timestamp() { // works with: // * hambach (Linux 2.6.32 x86_64) @@ -282,9 +313,44 @@ nest::Stopwatch::get_timestamp() // * MacOS 10.9 struct timeval now; gettimeofday( &now, static_cast< struct timezone* >( nullptr ) ); - return ( nest::Stopwatch::timestamp_t ) now.tv_usec - + ( nest::Stopwatch::timestamp_t ) now.tv_sec * nest::Stopwatch::SECONDS; + return ( nest::SingleStopwatch::timestamp_t ) now.tv_usec + + ( nest::SingleStopwatch::timestamp_t ) now.tv_sec * nest::SingleStopwatch::SECONDS; +} + +class StopwatchArray +{ +public: + void reset(); + + std::vector< double > elapsed(); + + SingleStopwatch& + operator[]( const size_t tid ) + { + return timers_[ tid ]; + } + +private: + std::vector< SingleStopwatch > timers_; +}; + +inline std::vector< double > +StopwatchArray::elapsed() +{ + std::vector< double > times; + times.reserve( timers_.size() ); + for ( auto& timer : timers_ ) + { + times.push_back( timer.elapsed() ); + } + return times; } +#ifdef THREADED_TIMERS +typedef StopwatchArray Stopwatch; +#else +typedef SingleStopwatch Stopwatch; +#endif + } /* namespace timer */ #endif /* STOPWATCH_H */ From 18149a8b5094b37d918d6c1df5b390f1f02d1930 Mon Sep 17 00:00:00 2001 From: Jan Vogelsang Date: Tue, 10 Dec 2024 17:46:24 +0100 Subject: [PATCH 02/11] Made timers much more accessible --- CMakeLists.txt | 2 + cmake/ConfigureSummary.cmake | 7 + cmake/ProcessOptions.cmake | 7 + .../guidelines/coding_guidelines_cpp.rst | 4 +- doc/htmldoc/installation/cmake_options.rst | 7 +- doc/htmldoc/nest_behavior/built-in_timers.rst | 8 +- libnestutil/CMakeLists.txt | 1 - libnestutil/config.h.in | 3 + libnestutil/stopwatch.cpp | 33 -- nestkernel/CMakeLists.txt | 1 + nestkernel/connection_manager.cpp | 3 + nestkernel/connection_manager.h | 2 +- nestkernel/event_delivery_manager.cpp | 66 ++-- nestkernel/event_delivery_manager.h | 9 +- nestkernel/kernel_manager.cpp | 20 ++ nestkernel/kernel_manager.h | 16 + nestkernel/mpi_manager.cpp | 42 +-- nestkernel/nest_names.cpp | 2 + nestkernel/nest_names.h | 2 + nestkernel/node_manager.cpp | 4 +- nestkernel/node_manager.h | 2 +- nestkernel/per_thread_bool_indicator.cpp | 12 + nestkernel/simulation_manager.cpp | 175 +++++------ nestkernel/simulation_manager.h | 18 +- nestkernel/source_table.cpp | 3 + nestkernel/stimulation_backend_mpi.cpp | 2 +- {libnestutil => nestkernel}/stopwatch.h | 297 +++++++++++++++--- nestkernel/stopwatch_impl.h | 106 +++++++ 28 files changed, 590 insertions(+), 264 deletions(-) delete mode 100644 libnestutil/stopwatch.cpp rename {libnestutil => nestkernel}/stopwatch.h (52%) create mode 100644 nestkernel/stopwatch_impl.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f060a5c933..542f233df2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,6 +68,7 @@ set( with-models OFF CACHE STRING "The models to include as a semicolon-separate set( tics_per_ms "1000.0" CACHE STRING "Specify elementary unit of time [default=1000 tics per ms]." ) set( tics_per_step "100" CACHE STRING "Specify resolution [default=100 tics per step]." ) set( with-detailed-timers OFF CACHE STRING "Build with detailed internal time measurements [default=OFF]. Detailed timers can affect the performance." ) +set( with-threaded-timers ON CACHE STRING "Build with one internal timer per thread [default=ON]. Multi-threaded timers can affect the performance." ) set( target-bits-split "standard" CACHE STRING "Split of the 64-bit target neuron identifier type [default='standard']. 'standard' is recommended for most users. If running on more than 262144 MPI processes or more than 512 threads, change to 'hpc'." ) # generic build configuration @@ -143,6 +144,7 @@ nest_process_with_gsl() nest_process_with_openmp() nest_process_with_mpi() nest_process_with_detailed_timers() +nest_process_with_threaded_timers() nest_process_with_libneurosim() nest_process_with_music() nest_process_with_sionlib() diff --git a/cmake/ConfigureSummary.cmake b/cmake/ConfigureSummary.cmake index ea9610aa9a..c0b7e24e8b 100644 --- a/cmake/ConfigureSummary.cmake +++ b/cmake/ConfigureSummary.cmake @@ -150,6 +150,13 @@ function( NEST_PRINT_CONFIG_SUMMARY ) message( "Detailed timers : No" ) endif () + message( "" ) + if ( THREADED_TIMERS ) + message( "Threaded timers : Yes" ) + else () + message( "Threaded timers : No" ) + endif () + message( "" ) if ( HAVE_MUSIC ) message( "Use MUSIC : Yes (MUSIC ${MUSIC_VERSION})" ) diff --git a/cmake/ProcessOptions.cmake b/cmake/ProcessOptions.cmake index 09498146d1..c5170a3940 100644 --- a/cmake/ProcessOptions.cmake +++ b/cmake/ProcessOptions.cmake @@ -462,6 +462,13 @@ function( NEST_PROCESS_WITH_DETAILED_TIMERS ) endif () endfunction() +function( NEST_PROCESS_WITH_THREADED_TIMERS ) + set( THREADED_TIMERS OFF PARENT_SCOPE ) + if ( ${with-threaded-timers} STREQUAL "ON" ) + set( THREADED_TIMERS ON PARENT_SCOPE ) + endif () +endfunction() + function( NEST_PROCESS_WITH_LIBNEUROSIM ) # Find libneurosim set( HAVE_LIBNEUROSIM OFF PARENT_SCOPE ) diff --git a/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst b/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst index 83460ab19c..68f2acd1b5 100644 --- a/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst +++ b/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst @@ -637,12 +637,12 @@ For example, the ``stopwatch.h`` file could look like: } /* namespace timer */ #endif /* STOPWATCH_H */ -And the corresponding ``stopwatch.cpp``: +And the corresponding ``stopwatch_impl.h``: .. code:: cpp /* - * stopwatch.cpp + * stopwatch_impl.h * * This file is part of NEST. * diff --git a/doc/htmldoc/installation/cmake_options.rst b/doc/htmldoc/installation/cmake_options.rst index a1283d537c..e118910d01 100644 --- a/doc/htmldoc/installation/cmake_options.rst +++ b/doc/htmldoc/installation/cmake_options.rst @@ -102,7 +102,7 @@ For more details, see the :ref:`Python binding ` section be .. _performance_cmake: Maximize performance, reduce energy consumption -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The following options help to optimize NEST for maximal performance and thus reduced energy consumption. @@ -126,7 +126,7 @@ The following options help to optimize NEST for maximal performance and thus red in place. * Using ``-march=native`` requires that you build NEST on the same CPU architecture as you will use to run it. * For the technically minded: Even just using ``-O3`` removes some ``assert()`` statements from NEST since we - have wrapped some of them in functions, which get eliminated due to interprocedural optimization. + have wrapped some of them in functions, which get eliminated due to interprocedural optimization. @@ -197,6 +197,9 @@ NEST properties +-----------------------------------------------+----------------------------------------------------------------+ | ``-Dtics_per_step=[number]`` | Specify resolution [default=100 tics per step]. | +-----------------------------------------------+----------------------------------------------------------------+ +| ``-Dwith-threaded-timers=[OFF|ON]`` | Build with one internal timer per thread [default=ON]. | +| | Multi-threaded timers can affect the performance. | ++-----------------------------------------------+----------------------------------------------------------------+ | ``-Dwith-detailed-timers=[OFF|ON]`` | Build with detailed internal time measurements [default=OFF]. | | | Detailed timers can affect the performance. | +-----------------------------------------------+----------------------------------------------------------------+ diff --git a/doc/htmldoc/nest_behavior/built-in_timers.rst b/doc/htmldoc/nest_behavior/built-in_timers.rst index 32c88d339f..97ab79e383 100644 --- a/doc/htmldoc/nest_behavior/built-in_timers.rst +++ b/doc/htmldoc/nest_behavior/built-in_timers.rst @@ -22,7 +22,7 @@ The following basic time measurements are available: |Name |Explanation | +=============================+==================================+ |``time_construction_create`` |Cumulative time NEST spent | -| |creating neurons and devices | +| |creating neurons and devices | +-----------------------------+----------------------------------+ |``time_construction_connect``|Cumulative time NEST spent | | |creating connections | @@ -54,7 +54,7 @@ attributes are: +-----------------------+----------------------------------+ |``local_spike_counter``|Number of spikes emitted by the | | |neurons represented on this MPI | -| |rank during the last | +| |rank during the last | | |``Simulate()`` | +-----------------------+----------------------------------+ @@ -82,11 +82,11 @@ available as kernel attributes: +================================+==================================+==================================+ |``time_gather_target_data`` |Cumulative time for communicating |``time_communicate_prepare`` | | |connection information from | | -| |postsynaptic to presynaptic side | | +| |postsynaptic to presynaptic side | | +--------------------------------+----------------------------------+----------------------------------+ |``time_communicate_target_data``|Cumulative time for core MPI |``time_gather_target_data`` | | |communication when gathering | | -| |target data | | +| |target data | | +--------------------------------+----------------------------------+----------------------------------+ |``time_update`` |Time for neuron update |``time_simulate`` | +--------------------------------+----------------------------------+----------------------------------+ diff --git a/libnestutil/CMakeLists.txt b/libnestutil/CMakeLists.txt index 3700edea51..0000aaac77 100644 --- a/libnestutil/CMakeLists.txt +++ b/libnestutil/CMakeLists.txt @@ -31,7 +31,6 @@ set( nestutil_sources numerics.h numerics.cpp regula_falsi.h sort.h - stopwatch.h stopwatch.cpp string_utils.h vector_util.h ) diff --git a/libnestutil/config.h.in b/libnestutil/config.h.in index 08b4251560..ddf3921773 100644 --- a/libnestutil/config.h.in +++ b/libnestutil/config.h.in @@ -182,6 +182,9 @@ /* Whether to enable detailed NEST internal timers */ #cmakedefine TIMER_DETAILED 1 +/* Whether to use one NEST internal timer per thread */ +#cmakedefine THREADED_TIMERS 1 + /* Whether to do full logging */ #cmakedefine ENABLE_FULL_LOGGING 1 diff --git a/libnestutil/stopwatch.cpp b/libnestutil/stopwatch.cpp deleted file mode 100644 index ec7a2b7676..0000000000 --- a/libnestutil/stopwatch.cpp +++ /dev/null @@ -1,33 +0,0 @@ -/* - * stopwatch.cpp - * - * This file is part of NEST. - * - * Copyright (C) 2004 The NEST Initiative - * - * NEST is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or - * (at your option) any later version. - * - * NEST is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with NEST. If not, see . - * - */ - -#include "stopwatch.h" - -namespace nest -{ -std::ostream& -operator<<( std::ostream& os, const Stopwatch& stopwatch ) -{ - stopwatch.print( "", Stopwatch::SECONDS, os ); - return os; -} -} diff --git a/nestkernel/CMakeLists.txt b/nestkernel/CMakeLists.txt index 685fb741aa..264f0deb5b 100644 --- a/nestkernel/CMakeLists.txt +++ b/nestkernel/CMakeLists.txt @@ -116,6 +116,7 @@ set ( nestkernel_sources stimulation_backend.h buffer_resize_log.h buffer_resize_log.cpp nest_extension_interface.h + stopwatch.h stopwatch_impl.h ) diff --git a/nestkernel/connection_manager.cpp b/nestkernel/connection_manager.cpp index 1d47802ff9..b67e2cb5df 100644 --- a/nestkernel/connection_manager.cpp +++ b/nestkernel/connection_manager.cpp @@ -55,6 +55,7 @@ #include "nest_names.h" #include "node.h" #include "sonata_connector.h" +#include "stopwatch_impl.h" #include "target_table_devices_impl.h" #include "vp_manager_impl.h" @@ -1800,7 +1801,9 @@ nest::ConnectionManager::collect_compressed_spike_data( const size_t tid ) } // of omp single; implicit barrier source_table_.collect_compressible_sources( tid ); + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); #pragma omp single { source_table_.fill_compressed_spike_data( compressed_spike_data_ ); diff --git a/nestkernel/connection_manager.h b/nestkernel/connection_manager.h index e6c3888dff..b1fbf65f1b 100644 --- a/nestkernel/connection_manager.h +++ b/nestkernel/connection_manager.h @@ -461,7 +461,7 @@ class ConnectionManager : public ManagerInterface // public stop watch for benchmarking purposes // start and stop in high-level connect functions in nestmodule.cpp and nest.cpp - Stopwatch sw_construction_connect; + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > sw_construction_connect; const std::vector< SpikeData >& get_compressed_spike_data( const synindex syn_id, const size_t idx ); diff --git a/nestkernel/event_delivery_manager.cpp b/nestkernel/event_delivery_manager.cpp index dbb98504d5..bd17cd1aae 100644 --- a/nestkernel/event_delivery_manager.cpp +++ b/nestkernel/event_delivery_manager.cpp @@ -34,14 +34,13 @@ #include "mpi_manager_impl.h" #include "send_buffer_position.h" #include "source.h" +#include "stopwatch_impl.h" #include "vp_manager.h" #include "vp_manager_impl.h" // Includes from sli: #include "dictutils.h" -#include "compose.hpp" - namespace nest { @@ -191,11 +190,9 @@ EventDeliveryManager::get_status( DictionaryDatum& dict ) ( *dict )[ names::spike_buffer_resize_log ] = log_events; send_recv_buffer_resize_log_.to_dict( log_events ); -#ifdef TIMER_DETAILED - def< double >( dict, names::time_collocate_spike_data, sw_collocate_spike_data_.elapsed() ); - def< double >( dict, names::time_communicate_spike_data, sw_communicate_spike_data_.elapsed() ); - def< double >( dict, names::time_communicate_target_data, sw_communicate_target_data_.elapsed() ); -#endif + sw_collocate_spike_data_.output_timer( dict, names::time_collocate_spike_data ); + sw_communicate_spike_data_.output_timer( dict, names::time_communicate_spike_data ); + sw_communicate_target_data_.output_timer( dict, names::time_communicate_target_data ); } void @@ -317,18 +314,14 @@ EventDeliveryManager::reset_counters() void EventDeliveryManager::reset_timers_for_preparation() { -#ifdef TIMER_DETAILED sw_communicate_target_data_.reset(); -#endif } void EventDeliveryManager::reset_timers_for_dynamics() { -#ifdef TIMER_DETAILED sw_collocate_spike_data_.reset(); sw_communicate_spike_data_.reset(); -#endif } void @@ -400,11 +393,7 @@ EventDeliveryManager::gather_spike_data_( std::vector< SpikeDataT >& send_buffer // Need to get new positions in case buffer size has changed SendBufferPosition send_buffer_position; -#ifdef TIMER_DETAILED - { - sw_collocate_spike_data_.start(); - } -#endif + sw_collocate_spike_data_.start(); // Set marker at end of each chunk to DEFAULT reset_complete_marker_spike_data_( send_buffer_position, send_buffer ); @@ -426,12 +415,13 @@ EventDeliveryManager::gather_spike_data_( std::vector< SpikeDataT >& send_buffer // as all chunk-end entries, have marker DEFAULT. set_end_marker_( send_buffer_position, send_buffer, local_max_spikes_per_rank ); -#ifdef TIMER_DETAILED - { - sw_collocate_spike_data_.stop(); - sw_communicate_spike_data_.start(); - } + sw_collocate_spike_data_.stop(); +#if defined( HAVE_MPI ) && defined( TIMER_DETAILED ) + kernel().get_mpi_synchronization_stopwatch().start(); + kernel().mpi_manager.synchronize(); + kernel().get_mpi_synchronization_stopwatch().stop(); #endif + sw_communicate_spike_data_.start(); // Given that we templatize by plain vs offgrid, this if should not be necessary, but ... if ( off_grid_spiking_ ) @@ -443,11 +433,7 @@ EventDeliveryManager::gather_spike_data_( std::vector< SpikeDataT >& send_buffer kernel().mpi_manager.communicate_spike_data_Alltoall( send_buffer, recv_buffer ); } -#ifdef TIMER_DETAILED - { - sw_communicate_spike_data_.stop(); - } -#endif + sw_communicate_spike_data_.stop(); global_max_spikes_per_rank_ = get_global_max_spikes_per_rank_( send_buffer_position, recv_buffer ); @@ -811,7 +797,9 @@ EventDeliveryManager::gather_target_data( const size_t tid ) resize_send_recv_buffers_target_data(); } } // of omp master; (no barrier) + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); kernel().connection_manager.restore_source_table_entry_point( tid ); @@ -826,18 +814,21 @@ EventDeliveryManager::gather_target_data( const size_t tid ) set_complete_marker_target_data_( assigned_ranks, send_buffer_position ); } kernel().connection_manager.save_source_table_entry_point( tid ); + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); kernel().connection_manager.clean_source_table( tid ); #pragma omp master { -#ifdef TIMER_DETAILED - sw_communicate_target_data_.start(); +#if defined( HAVE_MPI ) && defined( TIMER_DETAILED ) + kernel().get_mpi_synchronization_stopwatch().start(); + kernel().mpi_manager.synchronize(); + kernel().get_mpi_synchronization_stopwatch().stop(); #endif + sw_communicate_target_data_.start(); kernel().mpi_manager.communicate_target_data_Alltoall( send_buffer_target_data_, recv_buffer_target_data_ ); -#ifdef TIMER_DETAILED sw_communicate_target_data_.stop(); -#endif } // of omp master (no barriers!) #pragma omp barrier @@ -883,7 +874,9 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) resize_send_recv_buffers_target_data(); } } // of omp master; no barrier + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); TargetSendBufferPosition send_buffer_position( assigned_ranks, kernel().mpi_manager.get_send_recv_count_target_data_per_rank() ); @@ -898,17 +891,20 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) set_complete_marker_target_data_( assigned_ranks, send_buffer_position ); } + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); #pragma omp master { -#ifdef TIMER_DETAILED - sw_communicate_target_data_.start(); +#if defined( HAVE_MPI ) && defined( TIMER_DETAILED ) + kernel().get_mpi_synchronization_stopwatch().start(); + kernel().mpi_manager.synchronize(); + kernel().get_mpi_synchronization_stopwatch().stop(); #endif + sw_communicate_target_data_.start(); kernel().mpi_manager.communicate_target_data_Alltoall( send_buffer_target_data_, recv_buffer_target_data_ ); -#ifdef TIMER_DETAILED sw_communicate_target_data_.stop(); -#endif } // of omp master (no barrier) #pragma omp barrier @@ -925,7 +921,9 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) { buffer_size_target_data_has_changed_ = kernel().mpi_manager.increase_buffer_size_target_data(); } // of omp master (no barrier) + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); } } // of while diff --git a/nestkernel/event_delivery_manager.h b/nestkernel/event_delivery_manager.h index 4f49645077..9df0cc5a3e 100644 --- a/nestkernel/event_delivery_manager.h +++ b/nestkernel/event_delivery_manager.h @@ -466,13 +466,12 @@ class EventDeliveryManager : public ManagerInterface PerThreadBoolIndicator gather_completed_checker_; -#ifdef TIMER_DETAILED // private stop watches for benchmarking purposes // (intended for internal core developers, not for use in the public API) - Stopwatch sw_collocate_spike_data_; - Stopwatch sw_communicate_spike_data_; - Stopwatch sw_communicate_target_data_; -#endif + // TODO JV: Make sure DETAILED_TIMERS is only ever used in stopwatch.h + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_collocate_spike_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_communicate_spike_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_communicate_target_data_; }; inline void diff --git a/nestkernel/kernel_manager.cpp b/nestkernel/kernel_manager.cpp index 2e407b249a..846123ed2b 100644 --- a/nestkernel/kernel_manager.cpp +++ b/nestkernel/kernel_manager.cpp @@ -22,6 +22,8 @@ #include "kernel_manager.h" +#include "stopwatch_impl.h" + nest::KernelManager* nest::KernelManager::kernel_manager_instance_ = nullptr; void @@ -91,6 +93,9 @@ nest::KernelManager::initialize() manager->initialize( /* adjust_number_of_threads_or_rng_only */ false ); } + sw_omp_synchronization_.reset(); + sw_mpi_synchronization_.reset(); + ++fingerprint_; initialized_ = true; FULL_LOGGING_ONLY( dump_.open( @@ -104,6 +109,9 @@ nest::KernelManager::prepare() { manager->prepare(); } + + sw_omp_synchronization_.reset(); + sw_mpi_synchronization_.reset(); } void @@ -162,6 +170,15 @@ nest::KernelManager::change_number_of_threads( size_t new_num_threads ) // is in place again, we can tell modules to re-register the components // they provide. module_manager.reinitialize_dynamic_modules(); + + // Prepare timers and set the number of threads for multi-threaded timers + kernel().simulation_manager.reset_timers_for_preparation(); + kernel().simulation_manager.reset_timers_for_dynamics(); + kernel().event_delivery_manager.reset_timers_for_preparation(); + kernel().event_delivery_manager.reset_timers_for_dynamics(); + + sw_omp_synchronization_.reset(); + sw_mpi_synchronization_.reset(); } void @@ -184,6 +201,9 @@ nest::KernelManager::get_status( DictionaryDatum& dict ) { manager->get_status( dict ); } + + sw_omp_synchronization_.output_timer( dict, names::time_omp_synchronization ); + sw_mpi_synchronization_.output_timer( dict, names::time_mpi_synchronization ); } void diff --git a/nestkernel/kernel_manager.h b/nestkernel/kernel_manager.h index 216981b2b7..e06af9a9af 100644 --- a/nestkernel/kernel_manager.h +++ b/nestkernel/kernel_manager.h @@ -287,12 +287,28 @@ class KernelManager MUSICManager music_manager; NodeManager node_manager; /**@}*/ + + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded >& + get_omp_synchronization_stopwatch() + { + return sw_omp_synchronization_; + } + + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly >& + get_mpi_synchronization_stopwatch() + { + return sw_mpi_synchronization_; + } + private: //! All managers, order determines initialization and finalization order (latter backwards) std::vector< ManagerInterface* > managers; bool initialized_; //!< true if the kernel is initialized std::ofstream dump_; //!< for FULL_LOGGING output + + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_omp_synchronization_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_mpi_synchronization_; }; KernelManager& kernel(); diff --git a/nestkernel/mpi_manager.cpp b/nestkernel/mpi_manager.cpp index b7a348322e..02bde93a52 100644 --- a/nestkernel/mpi_manager.cpp +++ b/nestkernel/mpi_manager.cpp @@ -26,7 +26,7 @@ #include // Includes from libnestutil: -#include "stopwatch.h" +#include "stopwatch_impl.h" // Includes from nestkernel: #include "kernel_manager.h" @@ -834,16 +834,16 @@ nest::MPIManager::time_communicate( int num_bytes, int samples ) std::vector< unsigned int > test_send_buffer( packet_length ); std::vector< unsigned int > test_recv_buffer( packet_length * get_num_processes() ); // start time measurement here - Stopwatch foo; - foo.start(); + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > stopwatch; + stopwatch.start(); for ( int i = 0; i < samples; ++i ) { MPI_Allgather( &test_send_buffer[ 0 ], packet_length, MPI_UNSIGNED, &test_recv_buffer[ 0 ], packet_length, MPI_UNSIGNED, comm ); } // finish time measurement here - foo.stop(); - return foo.elapsed() / samples; + stopwatch.stop(); + return stopwatch.elapsed() / samples; } // average communication time for a packet size of num_bytes using Allgatherv @@ -870,16 +870,16 @@ nest::MPIManager::time_communicatev( int num_bytes, int samples ) } // start time measurement here - Stopwatch foo; - foo.start(); + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > stopwatch; + stopwatch.start(); for ( int i = 0; i < samples; ++i ) { communicate_Allgatherv( test_send_buffer, test_recv_buffer, displacements, n_nodes ); } // finish time measurement here - foo.stop(); - return foo.elapsed() / samples; + stopwatch.stop(); + return stopwatch.elapsed() / samples; } // average communication time for a packet size of num_bytes @@ -898,8 +898,8 @@ nest::MPIManager::time_communicate_offgrid( int num_bytes, int samples ) std::vector< OffGridSpike > test_send_buffer( packet_length ); std::vector< OffGridSpike > test_recv_buffer( packet_length * get_num_processes() ); // start time measurement here - Stopwatch foo; - foo.start(); + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > stopwatch; + stopwatch.start(); for ( int i = 0; i < samples; ++i ) { MPI_Allgather( &test_send_buffer[ 0 ], @@ -911,8 +911,8 @@ nest::MPIManager::time_communicate_offgrid( int num_bytes, int samples ) comm ); } // finish time measurement here - foo.stop(); - return foo.elapsed() / samples; + stopwatch.stop(); + return stopwatch.elapsed() / samples; } // average communication time for a packet size of num_bytes using Alltoall @@ -932,16 +932,16 @@ nest::MPIManager::time_communicate_alltoall( int num_bytes, int samples ) std::vector< unsigned int > test_send_buffer( total_packet_length ); std::vector< unsigned int > test_recv_buffer( total_packet_length ); // start time measurement here - Stopwatch foo; - foo.start(); + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > stopwatch; + stopwatch.start(); for ( int i = 0; i < samples; ++i ) { MPI_Alltoall( &test_send_buffer[ 0 ], packet_length, MPI_UNSIGNED, &test_recv_buffer[ 0 ], packet_length, MPI_UNSIGNED, comm ); } // finish time measurement here - foo.stop(); - return foo.elapsed() / samples; + stopwatch.stop(); + return stopwatch.elapsed() / samples; } // average communication time for a packet size of num_bytes using Alltoallv @@ -969,8 +969,8 @@ nest::MPIManager::time_communicate_alltoallv( int num_bytes, int samples ) } // start time measurement here - Stopwatch foo; - foo.start(); + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > stopwatch; + stopwatch.start(); for ( int i = 0; i < samples; ++i ) { MPI_Alltoallv( &test_send_buffer[ 0 ], @@ -984,8 +984,8 @@ nest::MPIManager::time_communicate_alltoallv( int num_bytes, int samples ) comm ); } // finish time measurement here - foo.stop(); - return foo.elapsed() / samples; + stopwatch.stop(); + return stopwatch.elapsed() / samples; } #else /* #ifdef HAVE_MPI */ diff --git a/nestkernel/nest_names.cpp b/nestkernel/nest_names.cpp index c897a9e6cf..4d10766098 100644 --- a/nestkernel/nest_names.cpp +++ b/nestkernel/nest_names.cpp @@ -606,6 +606,8 @@ const Name time_deliver_spike_data( "time_deliver_spike_data" ); const Name time_gather_secondary_data( "time_gather_secondary_data" ); const Name time_gather_spike_data( "time_gather_spike_data" ); const Name time_gather_target_data( "time_gather_target_data" ); +const Name time_omp_synchronization( "time_omp_synchronization" ); +const Name time_mpi_synchronization( "time_mpi_synchronization" ); const Name time_in_steps( "time_in_steps" ); const Name time_simulate( "time_simulate" ); const Name time_update( "time_update" ); diff --git a/nestkernel/nest_names.h b/nestkernel/nest_names.h index b6d964e04c..701bcb538b 100644 --- a/nestkernel/nest_names.h +++ b/nestkernel/nest_names.h @@ -634,6 +634,8 @@ extern const Name time_deliver_spike_data; extern const Name time_gather_secondary_data; extern const Name time_gather_spike_data; extern const Name time_gather_target_data; +extern const Name time_omp_synchronization; +extern const Name time_mpi_synchronization; extern const Name time_in_steps; extern const Name time_simulate; extern const Name time_update; diff --git a/nestkernel/node_manager.cpp b/nestkernel/node_manager.cpp index 5e984ca7fa..7708a787cb 100644 --- a/nestkernel/node_manager.cpp +++ b/nestkernel/node_manager.cpp @@ -31,8 +31,6 @@ #include "logging.h" // Includes from nestkernel: -#include "event_delivery_manager.h" -#include "genericmodel.h" #include "kernel_manager.h" #include "model.h" #include "model_manager_impl.h" @@ -788,7 +786,7 @@ void NodeManager::get_status( DictionaryDatum& d ) { def< long >( d, names::network_size, size() ); - def< double >( d, names::time_construction_create, sw_construction_create_.elapsed() ); + sw_construction_create_.output_timer( d, names::time_construction_create ); } void diff --git a/nestkernel/node_manager.h b/nestkernel/node_manager.h index 86044911b3..f5dd76093b 100644 --- a/nestkernel/node_manager.h +++ b/nestkernel/node_manager.h @@ -356,7 +356,7 @@ class NodeManager : public ManagerInterface std::vector< std::shared_ptr< WrappedThreadException > > exceptions_raised_; // private stop watch for benchmarking purposes - Stopwatch sw_construction_create_; + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > sw_construction_create_; }; inline size_t diff --git a/nestkernel/per_thread_bool_indicator.cpp b/nestkernel/per_thread_bool_indicator.cpp index 267f753298..c7f158261d 100644 --- a/nestkernel/per_thread_bool_indicator.cpp +++ b/nestkernel/per_thread_bool_indicator.cpp @@ -24,6 +24,7 @@ // Includes from nestkernel #include "kernel_manager.h" +#include "stopwatch_impl.h" namespace nest { @@ -64,6 +65,7 @@ PerThreadBoolIndicator::initialize( const size_t num_threads, const bool status bool PerThreadBoolIndicator::all_false() const { + kernel().get_omp_synchronization_stopwatch().start(); // We need two barriers here to ensure that no thread can continue and change the result // before all threads have determined the result. #pragma omp barrier @@ -71,33 +73,43 @@ PerThreadBoolIndicator::all_false() const // before all threads have determined the result. bool ret = ( are_true_ == 0 ); #pragma omp barrier + + kernel().get_omp_synchronization_stopwatch().stop(); return ret; } bool PerThreadBoolIndicator::all_true() const { + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier bool ret = ( are_true_ == size_ ); #pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); return ret; } bool PerThreadBoolIndicator::any_false() const { + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier bool ret = ( are_true_ < size_ ); #pragma omp barrier + + kernel().get_omp_synchronization_stopwatch().stop(); return ret; } bool PerThreadBoolIndicator::any_true() const { + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier bool ret = ( are_true_ > 0 ); #pragma omp barrier + + kernel().get_omp_synchronization_stopwatch().stop(); return ret; } diff --git a/nestkernel/simulation_manager.cpp b/nestkernel/simulation_manager.cpp index 9e0da05602..95cc0a3a18 100644 --- a/nestkernel/simulation_manager.cpp +++ b/nestkernel/simulation_manager.cpp @@ -37,6 +37,7 @@ #include "connection_manager_impl.h" #include "event_delivery_manager.h" #include "kernel_manager.h" +#include "stopwatch_impl.h" // Includes from sli: #include "dictutils.h" @@ -117,22 +118,18 @@ void nest::SimulationManager::reset_timers_for_preparation() { sw_communicate_prepare_.reset(); -#ifdef TIMER_DETAILED sw_gather_target_data_.reset(); -#endif } void nest::SimulationManager::reset_timers_for_dynamics() { sw_simulate_.reset(); -#ifdef TIMER_DETAILED sw_gather_spike_data_.reset(); sw_gather_secondary_data_.reset(); sw_update_.reset(); sw_deliver_spike_data_.reset(); sw_deliver_secondary_data_.reset(); -#endif } void @@ -477,16 +474,14 @@ nest::SimulationManager::get_status( DictionaryDatum& d ) def< double >( d, names::min_update_time, min_update_time_ ); def< double >( d, names::max_update_time, max_update_time_ ); - def< double >( d, names::time_simulate, sw_simulate_.elapsed() ); - def< double >( d, names::time_communicate_prepare, sw_communicate_prepare_.elapsed() ); -#ifdef TIMER_DETAILED - def< double >( d, names::time_gather_spike_data, sw_gather_spike_data_.elapsed() ); - def< double >( d, names::time_gather_secondary_data, sw_gather_secondary_data_.elapsed() ); - def< double >( d, names::time_update, sw_update_.elapsed() ); - def< double >( d, names::time_gather_target_data, sw_gather_target_data_.elapsed() ); - def< double >( d, names::time_deliver_spike_data, sw_deliver_spike_data_.elapsed() ); - def< double >( d, names::time_deliver_secondary_data, sw_deliver_secondary_data_.elapsed() ); -#endif + sw_simulate_.output_timer( d, names::time_simulate ); + sw_communicate_prepare_.output_timer( d, names::time_communicate_prepare ); + sw_gather_spike_data_.output_timer( d, names::time_gather_spike_data ); + sw_gather_secondary_data_.output_timer( d, names::time_gather_secondary_data ); + sw_update_.output_timer( d, names::time_update ); + sw_gather_target_data_.output_timer( d, names::time_gather_target_data ); + sw_deliver_spike_data_.output_timer( d, names::time_deliver_spike_data ); + sw_deliver_secondary_data_.output_timer( d, names::time_deliver_secondary_data ); def< double >( d, names::eprop_update_interval, eprop_update_interval_ ); def< double >( d, names::eprop_learning_window, eprop_learning_window_ ); def< bool >( d, names::eprop_reset_neurons_on_update, eprop_reset_neurons_on_update_ ); @@ -738,17 +733,21 @@ nest::SimulationManager::call_update_() void nest::SimulationManager::update_connection_infrastructure( const size_t tid ) { + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier - if ( tid == 0 ) - { - sw_communicate_prepare_.start(); - } + kernel().get_omp_synchronization_stopwatch().stop(); + + sw_communicate_prepare_.start(); - kernel().connection_manager.restructure_connection_tables( tid ); kernel().connection_manager.sort_connections( tid ); + sw_gather_target_data_.start(); + kernel().connection_manager.restructure_connection_tables( tid ); kernel().connection_manager.collect_compressed_spike_data( tid ); + sw_gather_target_data_.stop(); + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier // wait for all threads to finish sorting + kernel().get_omp_synchronization_stopwatch().stop(); #pragma omp single { @@ -765,7 +764,9 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) { #pragma omp barrier kernel().connection_manager.compute_compressed_secondary_recv_buffer_positions( tid ); + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); #pragma omp single { kernel().mpi_manager.communicate_recv_counts_secondary_events(); @@ -773,12 +774,7 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) } } -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_gather_target_data_.start(); - } -#endif + sw_gather_target_data_.start(); // communicate connection information from postsynaptic to // presynaptic side @@ -796,32 +792,23 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) kernel().event_delivery_manager.gather_target_data( tid ); } -#ifdef TIMER_DETAILED -#pragma omp barrier - if ( tid == 0 ) - { - sw_gather_target_data_.stop(); - } -#endif + sw_gather_target_data_.stop(); if ( kernel().connection_manager.secondary_connections_exist() ) { kernel().connection_manager.compress_secondary_send_buffer_pos( tid ); } + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); #pragma omp single { kernel().connection_manager.clear_compressed_spike_data_map(); kernel().node_manager.set_have_nodes_changed( false ); kernel().connection_manager.unset_connections_have_changed(); } - -#pragma omp barrier - if ( tid == 0 ) - { - sw_communicate_prepare_.stop(); - } + sw_communicate_prepare_.stop(); } bool @@ -871,48 +858,30 @@ nest::SimulationManager::update_() // from the time step preceding the arrival of the spike triggering the weight update. if ( kernel().connection_manager.secondary_connections_exist() ) { -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_deliver_secondary_data_.start(); - } -#endif + sw_deliver_secondary_data_.start(); kernel().event_delivery_manager.deliver_secondary_events( tid, false ); -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_deliver_secondary_data_.stop(); - } -#endif + sw_deliver_secondary_data_.stop(); } if ( kernel().connection_manager.has_primary_connections() ) { -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_deliver_spike_data_.start(); - } -#endif + sw_deliver_spike_data_.start(); // Deliver spikes from receive buffer to ring buffers. kernel().event_delivery_manager.deliver_events( tid ); -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_deliver_spike_data_.stop(); - } -#endif + sw_deliver_spike_data_.stop(); } #ifdef HAVE_MUSIC -// advance the time of music by one step (min_delay * h) must -// be done after deliver_events_() since it calls -// music_event_out_proxy::handle(), which hands the spikes over to -// MUSIC *before* MUSIC time is advanced + // advance the time of music by one step (min_delay * h) must + // be done after deliver_events_() since it calls + // music_event_out_proxy::handle(), which hands the spikes over to + // MUSIC *before* MUSIC time is advanced -// wait until all threads are done -> synchronize + // wait until all threads are done -> synchronize + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); // the following block is executed by the master thread only // the other threads are enforced to wait at the end of the block #pragma omp master @@ -971,9 +940,13 @@ nest::SimulationManager::update_() // add done value of thread p to done vector #pragma omp critical - done.push_back( done_p ); -// parallel section ends, wait until all threads are done -> synchronize + { + done.push_back( done_p ); + } + // parallel section ends, wait until all threads are done -> synchronize + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); // the following block is executed by a single thread // the other threads wait at the end of the block @@ -1033,7 +1006,9 @@ nest::SimulationManager::update_() Node* node = i->get_node(); node->update_synaptic_elements( Time( Time::step( clock_.get_steps() + from_step_ ) ).get_ms() ); } + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); #pragma omp single { kernel().sp_manager.update_structural_plasticity(); @@ -1055,14 +1030,7 @@ nest::SimulationManager::update_() } // of structural plasticity - -#ifdef TIMER_DETAILED -#pragma omp barrier - if ( tid == 0 ) - { - sw_update_.start(); - } -#endif + sw_update_.start(); const SparseNodeArray& thread_local_nodes = kernel().node_manager.get_local_nodes( tid ); for ( SparseNodeArray::const_iterator n = thread_local_nodes.begin(); n != thread_local_nodes.end(); ++n ) @@ -1074,46 +1042,41 @@ nest::SimulationManager::update_() } } -// parallel section ends, wait until all threads are done -> synchronize -#pragma omp barrier + sw_update_.stop(); -#ifdef TIMER_DETAILED - if ( tid == 0 ) - { - sw_update_.stop(); - } -#endif + // parallel section ends, wait until all threads are done -> synchronize + kernel().get_omp_synchronization_stopwatch().start(); +#pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); // the following block is executed by the master thread only -// the other threads are enforced to wait at the end of the block -#pragma omp master + // the other threads are enforced to wait at the end of the block + + // gather and deliver only at end of slice, i.e., end of min_delay step + if ( to_step_ == kernel().connection_manager.get_min_delay() ) { - // gather and deliver only at end of slice, i.e., end of min_delay step - if ( to_step_ == kernel().connection_manager.get_min_delay() ) + if ( kernel().connection_manager.has_primary_connections() ) { - if ( kernel().connection_manager.has_primary_connections() ) + sw_gather_spike_data_.start(); +#pragma omp master { -#ifdef TIMER_DETAILED - sw_gather_spike_data_.start(); -#endif - kernel().event_delivery_manager.gather_spike_data(); -#ifdef TIMER_DETAILED - sw_gather_spike_data_.stop(); -#endif } - if ( kernel().connection_manager.secondary_connections_exist() ) + sw_gather_spike_data_.stop(); + } + if ( kernel().connection_manager.secondary_connections_exist() ) + { + sw_gather_secondary_data_.start(); +#pragma omp master { -#ifdef TIMER_DETAILED - sw_gather_secondary_data_.start(); -#endif kernel().event_delivery_manager.gather_secondary_events( true ); -#ifdef TIMER_DETAILED - sw_gather_secondary_data_.stop(); -#endif } + sw_gather_secondary_data_.stop(); } + } +#pragma omp master + { advance_time_(); if ( print_time_ ) @@ -1136,8 +1099,10 @@ nest::SimulationManager::update_() // if block to avoid omp barrier if SIONLIB is not used #ifdef HAVE_SIONLIB kernel().io_manager.post_step_hook(); -// enforce synchronization after post-step activities of the recording backends + // enforce synchronization after post-step activities of the recording backends + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); #endif const double end_current_update = sw_simulate_.elapsed(); diff --git a/nestkernel/simulation_manager.h b/nestkernel/simulation_manager.h index 83a37cac31..c701dd0f74 100644 --- a/nestkernel/simulation_manager.h +++ b/nestkernel/simulation_manager.h @@ -228,17 +228,15 @@ class SimulationManager : public ManagerInterface double max_update_time_; //!< longest update time seen so far (seconds) // private stop watches for benchmarking purposes - Stopwatch sw_simulate_; - Stopwatch sw_communicate_prepare_; -#ifdef TIMER_DETAILED + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > sw_simulate_; + Stopwatch< StopwatchVerbosity::Normal, StopwatchType::Threaded > sw_communicate_prepare_; // intended for internal core developers, not for use in the public API - Stopwatch sw_gather_spike_data_; - Stopwatch sw_gather_secondary_data_; - Stopwatch sw_update_; - Stopwatch sw_gather_target_data_; - Stopwatch sw_deliver_spike_data_; - Stopwatch sw_deliver_secondary_data_; -#endif + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_gather_spike_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_gather_secondary_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_update_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_gather_target_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_deliver_spike_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_deliver_secondary_data_; double eprop_update_interval_; double eprop_learning_window_; diff --git a/nestkernel/source_table.cpp b/nestkernel/source_table.cpp index b5e7c49c73..0e3d4bc072 100644 --- a/nestkernel/source_table.cpp +++ b/nestkernel/source_table.cpp @@ -29,6 +29,7 @@ #include "kernel_manager.h" #include "mpi_manager_impl.h" #include "source_table.h" +#include "stopwatch_impl.h" #include "vp_manager_impl.h" nest::SourceTable::SourceTable() @@ -229,7 +230,9 @@ nest::SourceTable::compute_buffer_pos_for_unique_secondary_sources( const size_t } } } + kernel().get_omp_synchronization_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_stopwatch().stop(); #pragma omp single { diff --git a/nestkernel/stimulation_backend_mpi.cpp b/nestkernel/stimulation_backend_mpi.cpp index 189207f08f..8f908d24ce 100644 --- a/nestkernel/stimulation_backend_mpi.cpp +++ b/nestkernel/stimulation_backend_mpi.cpp @@ -159,7 +159,7 @@ nest::StimulationBackendMPI::prepare() } // Add the id of device of the other thread in the vector_id_device and update the count of all device - for ( int id_thread = 0; id_thread < kernel().vp_manager.get_num_threads(); id_thread++ ) + for ( size_t id_thread = 0; id_thread < kernel().vp_manager.get_num_threads(); id_thread++ ) { // don't do it again for the master thread if ( id_thread != thread_id_master ) diff --git a/libnestutil/stopwatch.h b/nestkernel/stopwatch.h similarity index 52% rename from libnestutil/stopwatch.h rename to nestkernel/stopwatch.h index d30474faa2..ef691573af 100644 --- a/libnestutil/stopwatch.h +++ b/nestkernel/stopwatch.h @@ -27,14 +27,23 @@ #include // C++ includes: +#include "arraydatum.h" +#include "dictdatum.h" +#include "dictutils.h" +#include #include #include +#include namespace nest { +constexpr bool use_detailed_timers = TIMER_DETAILED; +constexpr bool use_threaded_timers = THREADED_TIMERS; + +// TODO JV: Update docs /*********************************************************************** - * Stopwatch * + * StopwatchBase * * Accumulates time between start and stop, and provides * * the elapsed time with different time units. * * * @@ -43,7 +52,7 @@ namespace nest * - Let each thread have its own stopwatch. * * * * Usage example: * - * Stopwatch x; * + * StopwatchBase x; * * x.start(); * * // ... do computations for 15.34 sec * * x.stop(); // only pauses stopwatch * @@ -59,32 +68,29 @@ namespace nest * // ^ intermediate timing without stopping the stopwatch * * // ... more computations 1.7643 min * * x.stop(); * - * x.print("Time needed ", Stopwatch::MINUTES, std::cerr); * + * x.print("Time needed ", StopwatchBase::MINUTES, std::cerr); * * // > Time needed 1,8593 min. (on cerr) * * // other units and output streams possible * ***********************************************************************/ -class Stopwatch +class StopwatchBase { public: typedef size_t timestamp_t; - typedef size_t timeunit_t; - enum + enum timeunit_t : size_t { - MICROSEC = static_cast< timeunit_t >( 1 ), - MILLISEC = MICROSEC * static_cast< timeunit_t >( 1000 ), - SECONDS = MILLISEC * static_cast< timeunit_t >( 1000 ), - MINUTES = SECONDS * static_cast< timeunit_t >( 60 ), - HOURS = MINUTES * static_cast< timeunit_t >( 60 ), - DAYS = HOURS * static_cast< timeunit_t >( 24 ) + MICROSEC = 1, + MILLISEC = MICROSEC * 1000, + SECONDS = MILLISEC * 1000, + MINUTES = SECONDS * 60, + HOURS = MINUTES * 60, + DAYS = HOURS * 24 }; - static bool correct_timeunit( timeunit_t t ); - /** * Creates a stopwatch that is not running. */ - Stopwatch() + StopwatchBase() { reset(); } @@ -121,7 +127,7 @@ class Stopwatch * runtime is added. If you want only the last measurement, you * have to reset the timer, before stating the measurement. * Does not change the running state. - * In contrast to Stopwatch::elapsed(), only the timestamp is returned, + * In contrast to StopwatchBase::elapsed(), only the timestamp is returned, * that is the number if microseconds as an integer. */ timestamp_t elapsed_timestamp() const; @@ -140,7 +146,7 @@ class Stopwatch * Convenient method for writing time in seconds * to some ostream. */ - friend std::ostream& operator<<( std::ostream& os, const Stopwatch& stopwatch ); + friend std::ostream& operator<<( std::ostream& os, const StopwatchBase& stopwatch ); private: #ifndef DISABLE_TIMING @@ -155,14 +161,8 @@ class Stopwatch static timestamp_t get_timestamp(); }; -inline bool -Stopwatch::correct_timeunit( timeunit_t t ) -{ - return t == MICROSEC or t == MILLISEC or t == SECONDS or t == MINUTES or t == HOURS or t == DAYS; -} - inline void -nest::Stopwatch::start() +nest::StopwatchBase::start() { #ifndef DISABLE_TIMING if ( not isRunning() ) @@ -175,7 +175,7 @@ nest::Stopwatch::start() } inline void -nest::Stopwatch::stop() +nest::StopwatchBase::stop() { #ifndef DISABLE_TIMING if ( isRunning() ) @@ -187,7 +187,7 @@ nest::Stopwatch::stop() } inline bool -nest::Stopwatch::isRunning() const +nest::StopwatchBase::isRunning() const { #ifndef DISABLE_TIMING return _running; @@ -197,18 +197,17 @@ nest::Stopwatch::isRunning() const } inline double -nest::Stopwatch::elapsed( timeunit_t timeunit ) const +nest::StopwatchBase::elapsed( timeunit_t timeunit ) const { #ifndef DISABLE_TIMING - assert( correct_timeunit( timeunit ) ); return 1.0 * elapsed_timestamp() / timeunit; #else return 0.0; #endif } -inline nest::Stopwatch::timestamp_t -nest::Stopwatch::elapsed_timestamp() const +inline nest::StopwatchBase::timestamp_t +nest::StopwatchBase::elapsed_timestamp() const { #ifndef DISABLE_TIMING if ( isRunning() ) @@ -227,7 +226,7 @@ nest::Stopwatch::elapsed_timestamp() const } inline void -nest::Stopwatch::reset() +nest::StopwatchBase::reset() { #ifndef DISABLE_TIMING _beg = 0; // invariant: _end >= _beg @@ -238,10 +237,9 @@ nest::Stopwatch::reset() } inline void -nest::Stopwatch::print( const char* msg, timeunit_t timeunit, std::ostream& os ) const +nest::StopwatchBase::print( const char* msg, timeunit_t timeunit, std::ostream& os ) const { #ifndef DISABLE_TIMING - assert( correct_timeunit( timeunit ) ); double e = elapsed( timeunit ); os << msg << e; switch ( timeunit ) @@ -273,18 +271,235 @@ nest::Stopwatch::print( const char* msg, timeunit_t timeunit, std::ostream& os ) #endif } -inline nest::Stopwatch::timestamp_t -nest::Stopwatch::get_timestamp() +inline nest::StopwatchBase::timestamp_t +nest::StopwatchBase::get_timestamp() { - // works with: - // * hambach (Linux 2.6.32 x86_64) - // * JuQueen (BG/Q) - // * MacOS 10.9 struct timeval now; gettimeofday( &now, static_cast< struct timezone* >( nullptr ) ); - return ( nest::Stopwatch::timestamp_t ) now.tv_usec - + ( nest::Stopwatch::timestamp_t ) now.tv_sec * nest::Stopwatch::SECONDS; + return ( nest::StopwatchBase::timestamp_t ) now.tv_usec + + ( nest::StopwatchBase::timestamp_t ) now.tv_sec * nest::StopwatchBase::SECONDS; } +inline std::ostream& +operator<<( std::ostream& os, const StopwatchBase& stopwatch ) +{ + stopwatch.print( "", StopwatchBase::timeunit_t::SECONDS, os ); + return os; +} + +enum StopwatchVerbosity +{ + Normal, // +class Stopwatch +{ +public: + void + output_timer( DictionaryDatum& d, const Name& name ) + { + def< double >( d, name, timer_.elapsed() ); + } + + void + start() + { +#pragma omp master + timer_.start(); + } + + void + stop() + { +#pragma omp master + timer_.stop(); + } + + bool + isRunning() const + { + bool isRunning = false; +#pragma omp master + { + isRunning = timer_.isRunning(); + }; + return isRunning; + } + + double + elapsed( StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS ) const + { + double elapsed = 0.; +#pragma omp master + { + elapsed = timer_.elapsed( timeunit ); + }; + return elapsed; + } + + StopwatchBase::timestamp_t + elapsed_timestamp() const + { + StopwatchBase::timestamp_t elapsed = 0; +#pragma omp master + { + elapsed = timer_.elapsed_timestamp(); + }; + return elapsed; + } + + void + reset() + { +#pragma omp master + timer_.reset(); + } + + void + print( const char* msg = "", + StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS, + std::ostream& os = std::cout ) const + { +#pragma omp master + timer_.print( msg, timeunit, os ); + } + +private: + StopwatchBase timer_; +}; + +/** If the user deactivated detailed timers, Stopwatch instance with the detailed flag will become an empty Stopwatch, + * which will be safely ignored by the compiler, as if the instance was not declared (e.g. as a member). + */ +template <> +class Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly, std::enable_if< not use_detailed_timers > > +{ + void + start() + { + } + void + stop() + { + } + bool + isRunning() const + { + return false; + } + double + elapsed( StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS ) const + { + return 0; + } + StopwatchBase::timestamp_t + elapsed_timestamp() const + { + return 0; + } + void + reset() + { + } + void + print( const char* msg = "", + StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS, + std::ostream& os = std::cout ) const + { + } +}; + +/** Only provide these template specializations if threaded timers are activated and always fall back to the base + * template specialization if not. Deactivate threaded detailed timers if all detailed timers are deactivated. + */ +template < StopwatchVerbosity detailed_timer > +class Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Detailed and not use_detailed_timers ) > > +{ + void + start() + { + } + void + stop() + { + } + bool + isRunning() const + { + return false; + } + double + elapsed( StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS ) const + { + return 0; + } + StopwatchBase::timestamp_t + elapsed_timestamp() const + { + return 0; + } + void + reset() + { + } + void + print( const char* msg = "", + StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS, + std::ostream& os = std::cout ) const + { + } +}; +template < StopwatchVerbosity detailed_timer > +class Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > > +{ +public: + void + output_timer( DictionaryDatum& d, const Name& name ) + { + std::vector< double > times( timers_.size() ); + std::transform( + timers_.begin(), timers_.end(), times.begin(), []( const StopwatchBase& timer ) { return timer.elapsed(); } ); + def< ArrayDatum >( d, name, ArrayDatum( times ) ); + } + + void start(); + + void stop(); + + bool isRunning() const; + + double elapsed( StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS ) const; + + StopwatchBase::timestamp_t elapsed_timestamp() const; + + void reset(); + + void print( const char* msg = "", + StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS, + std::ostream& os = std::cout ) const; + +private: + std::vector< StopwatchBase > timers_; +}; + } /* namespace timer */ #endif /* STOPWATCH_H */ diff --git a/nestkernel/stopwatch_impl.h b/nestkernel/stopwatch_impl.h new file mode 100644 index 0000000000..0b6a873a58 --- /dev/null +++ b/nestkernel/stopwatch_impl.h @@ -0,0 +1,106 @@ +/* + * stopwatch_impl.h + * + * This file is part of NEST. + * + * Copyright (C) 2004 The NEST Initiative + * + * NEST is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * NEST is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with NEST. If not, see . + * + */ + +#include "kernel_manager.h" +#include "stopwatch.h" + +namespace nest +{ + +template < StopwatchVerbosity detailed_timer > +void +Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::start() +{ + timers_[ kernel().vp_manager.get_thread_id() ].start(); +} + +template < StopwatchVerbosity detailed_timer > +void +Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::stop() +{ + timers_[ kernel().vp_manager.get_thread_id() ].stop(); +} + +template < StopwatchVerbosity detailed_timer > +bool +Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::isRunning() const +{ + return timers_[ kernel().vp_manager.get_thread_id() ].isRunning(); +} + +template < StopwatchVerbosity detailed_timer > +double +Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::elapsed( StopwatchBase::timeunit_t + timeunit ) const +{ + return timers_[ kernel().vp_manager.get_thread_id() ].elapsed( timeunit ); +} + +template < StopwatchVerbosity detailed_timer > +StopwatchBase::timestamp_t +Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::elapsed_timestamp() const +{ + return timers_[ kernel().vp_manager.get_thread_id() ].elapsed_timestamp(); +} + +template < StopwatchVerbosity detailed_timer > +void +Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::print( const char* msg, + StopwatchBase::timeunit_t timeunit, + std::ostream& os ) const +{ + timers_[ kernel().vp_manager.get_thread_id() ].print( msg, timeunit, os ); +} + +template < StopwatchVerbosity detailed_timer > +void +Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::reset() +{ + const size_t num_threads = kernel().vp_manager.get_num_threads(); + timers_.resize( num_threads ); + for ( size_t i = 0; i < num_threads; ++i ) + { + timers_[ i ].reset(); + } +} +} From c7228bd05a0d8de3c89809c59f47384efd34dffc Mon Sep 17 00:00:00 2001 From: Jan Vogelsang Date: Thu, 12 Dec 2024 18:02:35 +0100 Subject: [PATCH 03/11] Minor changes --- CMakeLists.txt | 2 + cmake/ProcessOptions.cmake | 7 + .../guidelines/coding_guidelines_cpp.rst | 4 +- doc/htmldoc/installation/cmake_options.rst | 3 + doc/htmldoc/nest_behavior/built-in_timers.rst | 4 + nestkernel/connection_manager.cpp | 4 +- nestkernel/event_delivery_manager.cpp | 34 ++-- nestkernel/event_delivery_manager.h | 1 - nestkernel/kernel_manager.cpp | 12 +- nestkernel/kernel_manager.h | 15 +- nestkernel/nest_names.cpp | 3 +- nestkernel/nest_names.h | 3 +- nestkernel/per_thread_bool_indicator.cpp | 16 +- nestkernel/simulation_manager.cpp | 66 ++++--- nestkernel/simulation_manager.h | 4 +- nestkernel/source_table.cpp | 4 +- nestkernel/stopwatch.h | 167 +++++++----------- nestkernel/stopwatch_impl.h | 22 --- 18 files changed, 164 insertions(+), 207 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 542f233df2..137a952507 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,6 +68,7 @@ set( with-models OFF CACHE STRING "The models to include as a semicolon-separate set( tics_per_ms "1000.0" CACHE STRING "Specify elementary unit of time [default=1000 tics per ms]." ) set( tics_per_step "100" CACHE STRING "Specify resolution [default=100 tics per step]." ) set( with-detailed-timers OFF CACHE STRING "Build with detailed internal time measurements [default=OFF]. Detailed timers can affect the performance." ) +set( with-mpi-sync-timer OFF CACHE STRING "Build with mpi synchronization barrier and timer [default=OFF]. Can affect the performance." ) set( with-threaded-timers ON CACHE STRING "Build with one internal timer per thread [default=ON]. Multi-threaded timers can affect the performance." ) set( target-bits-split "standard" CACHE STRING "Split of the 64-bit target neuron identifier type [default='standard']. 'standard' is recommended for most users. If running on more than 262144 MPI processes or more than 512 threads, change to 'hpc'." ) @@ -145,6 +146,7 @@ nest_process_with_openmp() nest_process_with_mpi() nest_process_with_detailed_timers() nest_process_with_threaded_timers() +nest_process_with_mpi_sync_timer() nest_process_with_libneurosim() nest_process_with_music() nest_process_with_sionlib() diff --git a/cmake/ProcessOptions.cmake b/cmake/ProcessOptions.cmake index c5170a3940..fad5dfdc1f 100644 --- a/cmake/ProcessOptions.cmake +++ b/cmake/ProcessOptions.cmake @@ -469,6 +469,13 @@ function( NEST_PROCESS_WITH_THREADED_TIMERS ) endif () endfunction() +function( NEST_PROCESS_WITH_MPI_SYNC_TIMER ) + set( MPI_SYNC_TIMER OFF PARENT_SCOPE ) + if ( ${with-mpi-sync-timer} STREQUAL "ON" ) + set( MPI_SYNC_TIMER ON PARENT_SCOPE ) + endif () +endfunction() + function( NEST_PROCESS_WITH_LIBNEUROSIM ) # Find libneurosim set( HAVE_LIBNEUROSIM OFF PARENT_SCOPE ) diff --git a/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst b/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst index 68f2acd1b5..f6dceba360 100644 --- a/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst +++ b/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst @@ -556,7 +556,7 @@ For example, the ``stopwatch.h`` file could look like: } inline nest::Stopwatch::timestamp_t - nest::Stopwatch::elapsed_timestamp() const + nest::Stopwatch::elapsed_us() const { #ifndef DISABLE_TIMING if ( isRunning() ) @@ -622,7 +622,7 @@ For example, the ``stopwatch.h`` file could look like: } inline nest::Stopwatch::timestamp_t - nest::Stopwatch::get_timestamp() + nest::Stopwatch::get_current_time() { // works with: // * hambach (Linux 2.6.32 x86_64) diff --git a/doc/htmldoc/installation/cmake_options.rst b/doc/htmldoc/installation/cmake_options.rst index e118910d01..323d3c6127 100644 --- a/doc/htmldoc/installation/cmake_options.rst +++ b/doc/htmldoc/installation/cmake_options.rst @@ -202,6 +202,9 @@ NEST properties +-----------------------------------------------+----------------------------------------------------------------+ | ``-Dwith-detailed-timers=[OFF|ON]`` | Build with detailed internal time measurements [default=OFF]. | | | Detailed timers can affect the performance. | ++----------------------------------------------------------------------------------------------------------------+ +| ``-Dwith-mpi-sync-timer=[OFF|ON]`` | Build with mpi synchronization barrier and timer [default=OFF].| +| | Can affect the performance. | +-----------------------------------------------+----------------------------------------------------------------+ | ``-Dtarget-bits-split=['standard'|'hpc']`` | Split of the 64-bit target neuron identifier type | | | [default='standard']. 'standard' is recommended for most users.| diff --git a/doc/htmldoc/nest_behavior/built-in_timers.rst b/doc/htmldoc/nest_behavior/built-in_timers.rst index 97ab79e383..b57e0bda31 100644 --- a/doc/htmldoc/nest_behavior/built-in_timers.rst +++ b/doc/htmldoc/nest_behavior/built-in_timers.rst @@ -107,3 +107,7 @@ available as kernel attributes: | |buffers of the corresponding | | | |postsynaptic neurons | | +--------------------------------+----------------------------------+----------------------------------+ + + +with-mpi-sync-timer +with-threaded-timers diff --git a/nestkernel/connection_manager.cpp b/nestkernel/connection_manager.cpp index b67e2cb5df..4001867308 100644 --- a/nestkernel/connection_manager.cpp +++ b/nestkernel/connection_manager.cpp @@ -1801,9 +1801,9 @@ nest::ConnectionManager::collect_compressed_spike_data( const size_t tid ) } // of omp single; implicit barrier source_table_.collect_compressible_sources( tid ); - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp single { source_table_.fill_compressed_spike_data( compressed_spike_data_ ); diff --git a/nestkernel/event_delivery_manager.cpp b/nestkernel/event_delivery_manager.cpp index bd17cd1aae..383903f56f 100644 --- a/nestkernel/event_delivery_manager.cpp +++ b/nestkernel/event_delivery_manager.cpp @@ -416,12 +416,12 @@ EventDeliveryManager::gather_spike_data_( std::vector< SpikeDataT >& send_buffer set_end_marker_( send_buffer_position, send_buffer, local_max_spikes_per_rank ); sw_collocate_spike_data_.stop(); -#if defined( HAVE_MPI ) && defined( TIMER_DETAILED ) + sw_communicate_spike_data_.start(); +#ifdef MPI_SYNC_TIMER kernel().get_mpi_synchronization_stopwatch().start(); kernel().mpi_manager.synchronize(); kernel().get_mpi_synchronization_stopwatch().stop(); #endif - sw_communicate_spike_data_.start(); // Given that we templatize by plain vs offgrid, this if should not be necessary, but ... if ( off_grid_spiking_ ) @@ -797,9 +797,9 @@ EventDeliveryManager::gather_target_data( const size_t tid ) resize_send_recv_buffers_target_data(); } } // of omp master; (no barrier) - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); kernel().connection_manager.restore_source_table_entry_point( tid ); @@ -814,18 +814,13 @@ EventDeliveryManager::gather_target_data( const size_t tid ) set_complete_marker_target_data_( assigned_ranks, send_buffer_position ); } kernel().connection_manager.save_source_table_entry_point( tid ); - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); kernel().connection_manager.clean_source_table( tid ); #pragma omp master { -#if defined( HAVE_MPI ) && defined( TIMER_DETAILED ) - kernel().get_mpi_synchronization_stopwatch().start(); - kernel().mpi_manager.synchronize(); - kernel().get_mpi_synchronization_stopwatch().stop(); -#endif sw_communicate_target_data_.start(); kernel().mpi_manager.communicate_target_data_Alltoall( send_buffer_target_data_, recv_buffer_target_data_ ); sw_communicate_target_data_.stop(); @@ -874,9 +869,9 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) resize_send_recv_buffers_target_data(); } } // of omp master; no barrier - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); TargetSendBufferPosition send_buffer_position( assigned_ranks, kernel().mpi_manager.get_send_recv_count_target_data_per_rank() ); @@ -891,17 +886,12 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) set_complete_marker_target_data_( assigned_ranks, send_buffer_position ); } - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp master { -#if defined( HAVE_MPI ) && defined( TIMER_DETAILED ) - kernel().get_mpi_synchronization_stopwatch().start(); - kernel().mpi_manager.synchronize(); - kernel().get_mpi_synchronization_stopwatch().stop(); -#endif sw_communicate_target_data_.start(); kernel().mpi_manager.communicate_target_data_Alltoall( send_buffer_target_data_, recv_buffer_target_data_ ); sw_communicate_target_data_.stop(); @@ -921,9 +911,9 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) { buffer_size_target_data_has_changed_ = kernel().mpi_manager.increase_buffer_size_target_data(); } // of omp master (no barrier) - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); } } // of while diff --git a/nestkernel/event_delivery_manager.h b/nestkernel/event_delivery_manager.h index 9df0cc5a3e..59e3681646 100644 --- a/nestkernel/event_delivery_manager.h +++ b/nestkernel/event_delivery_manager.h @@ -468,7 +468,6 @@ class EventDeliveryManager : public ManagerInterface // private stop watches for benchmarking purposes // (intended for internal core developers, not for use in the public API) - // TODO JV: Make sure DETAILED_TIMERS is only ever used in stopwatch.h Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_collocate_spike_data_; Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_communicate_spike_data_; Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_communicate_target_data_; diff --git a/nestkernel/kernel_manager.cpp b/nestkernel/kernel_manager.cpp index 846123ed2b..79009cc39d 100644 --- a/nestkernel/kernel_manager.cpp +++ b/nestkernel/kernel_manager.cpp @@ -93,7 +93,8 @@ nest::KernelManager::initialize() manager->initialize( /* adjust_number_of_threads_or_rng_only */ false ); } - sw_omp_synchronization_.reset(); + sw_omp_synchronization_construction_.reset(); + sw_omp_synchronization_simulation_.reset(); sw_mpi_synchronization_.reset(); ++fingerprint_; @@ -110,7 +111,8 @@ nest::KernelManager::prepare() manager->prepare(); } - sw_omp_synchronization_.reset(); + sw_omp_synchronization_construction_.reset(); + sw_omp_synchronization_simulation_.reset(); sw_mpi_synchronization_.reset(); } @@ -177,7 +179,8 @@ nest::KernelManager::change_number_of_threads( size_t new_num_threads ) kernel().event_delivery_manager.reset_timers_for_preparation(); kernel().event_delivery_manager.reset_timers_for_dynamics(); - sw_omp_synchronization_.reset(); + sw_omp_synchronization_construction_.reset(); + sw_omp_synchronization_simulation_.reset(); sw_mpi_synchronization_.reset(); } @@ -202,7 +205,8 @@ nest::KernelManager::get_status( DictionaryDatum& dict ) manager->get_status( dict ); } - sw_omp_synchronization_.output_timer( dict, names::time_omp_synchronization ); + sw_omp_synchronization_construction_.output_timer( dict, names::time_omp_synchronization_construction ); + sw_omp_synchronization_simulation_.output_timer( dict, names::time_omp_synchronization_simulation ); sw_mpi_synchronization_.output_timer( dict, names::time_mpi_synchronization ); } diff --git a/nestkernel/kernel_manager.h b/nestkernel/kernel_manager.h index e06af9a9af..a055ba0e8b 100644 --- a/nestkernel/kernel_manager.h +++ b/nestkernel/kernel_manager.h @@ -288,10 +288,18 @@ class KernelManager NodeManager node_manager; /**@}*/ + //! Get the stopwatch to measure the time each thread is idle during network construction. Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded >& - get_omp_synchronization_stopwatch() + get_omp_synchronization_construction_stopwatch() { - return sw_omp_synchronization_; + return sw_omp_synchronization_construction_; + } + + //! Get the stopwatch to measure the time each thread is idle during simulation. + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded >& + get_omp_synchronization_simulation_stopwatch() + { + return sw_omp_synchronization_simulation_; } Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly >& @@ -307,7 +315,8 @@ class KernelManager bool initialized_; //!< true if the kernel is initialized std::ofstream dump_; //!< for FULL_LOGGING output - Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_omp_synchronization_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_omp_synchronization_construction_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_omp_synchronization_simulation_; Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_mpi_synchronization_; }; diff --git a/nestkernel/nest_names.cpp b/nestkernel/nest_names.cpp index 4d10766098..f2acfb8bf4 100644 --- a/nestkernel/nest_names.cpp +++ b/nestkernel/nest_names.cpp @@ -606,7 +606,8 @@ const Name time_deliver_spike_data( "time_deliver_spike_data" ); const Name time_gather_secondary_data( "time_gather_secondary_data" ); const Name time_gather_spike_data( "time_gather_spike_data" ); const Name time_gather_target_data( "time_gather_target_data" ); -const Name time_omp_synchronization( "time_omp_synchronization" ); +const Name time_omp_synchronization_construction( "time_omp_synchronization_construction" ); +const Name time_omp_synchronization_simulation( "time_omp_synchronization_simulation" ); const Name time_mpi_synchronization( "time_mpi_synchronization" ); const Name time_in_steps( "time_in_steps" ); const Name time_simulate( "time_simulate" ); diff --git a/nestkernel/nest_names.h b/nestkernel/nest_names.h index 701bcb538b..1acfddb70a 100644 --- a/nestkernel/nest_names.h +++ b/nestkernel/nest_names.h @@ -634,7 +634,8 @@ extern const Name time_deliver_spike_data; extern const Name time_gather_secondary_data; extern const Name time_gather_spike_data; extern const Name time_gather_target_data; -extern const Name time_omp_synchronization; +extern const Name time_omp_synchronization_construction; +extern const Name time_omp_synchronization_simulation; extern const Name time_mpi_synchronization; extern const Name time_in_steps; extern const Name time_simulate; diff --git a/nestkernel/per_thread_bool_indicator.cpp b/nestkernel/per_thread_bool_indicator.cpp index c7f158261d..4032a7320f 100644 --- a/nestkernel/per_thread_bool_indicator.cpp +++ b/nestkernel/per_thread_bool_indicator.cpp @@ -65,7 +65,7 @@ PerThreadBoolIndicator::initialize( const size_t num_threads, const bool status bool PerThreadBoolIndicator::all_false() const { - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); // We need two barriers here to ensure that no thread can continue and change the result // before all threads have determined the result. #pragma omp barrier @@ -74,42 +74,42 @@ PerThreadBoolIndicator::all_false() const bool ret = ( are_true_ == 0 ); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); return ret; } bool PerThreadBoolIndicator::all_true() const { - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier bool ret = ( are_true_ == size_ ); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); return ret; } bool PerThreadBoolIndicator::any_false() const { - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier bool ret = ( are_true_ < size_ ); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); return ret; } bool PerThreadBoolIndicator::any_true() const { - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier bool ret = ( are_true_ > 0 ); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); return ret; } diff --git a/nestkernel/simulation_manager.cpp b/nestkernel/simulation_manager.cpp index 95cc0a3a18..a756104e55 100644 --- a/nestkernel/simulation_manager.cpp +++ b/nestkernel/simulation_manager.cpp @@ -733,9 +733,9 @@ nest::SimulationManager::call_update_() void nest::SimulationManager::update_connection_infrastructure( const size_t tid ) { - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); sw_communicate_prepare_.start(); @@ -745,9 +745,9 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) kernel().connection_manager.collect_compressed_spike_data( tid ); sw_gather_target_data_.stop(); - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier // wait for all threads to finish sorting - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp single { @@ -762,11 +762,16 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) if ( kernel().connection_manager.secondary_connections_exist() ) { + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_construction_stopwatch().stop(); + kernel().connection_manager.compute_compressed_secondary_recv_buffer_positions( tid ); - kernel().get_omp_synchronization_stopwatch().start(); + + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); + #pragma omp single { kernel().mpi_manager.communicate_recv_counts_secondary_events(); @@ -799,9 +804,9 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) kernel().connection_manager.compress_secondary_send_buffer_pos( tid ); } - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp single { kernel().connection_manager.clear_compressed_spike_data_map(); @@ -879,9 +884,9 @@ nest::SimulationManager::update_() // MUSIC *before* MUSIC time is advanced // wait until all threads are done -> synchronize - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_run_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_run_stopwatch().stop(); // the following block is executed by the master thread only // the other threads are enforced to wait at the end of the block #pragma omp master @@ -944,9 +949,9 @@ nest::SimulationManager::update_() done.push_back( done_p ); } // parallel section ends, wait until all threads are done -> synchronize - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_simulation_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_simulation_stopwatch().stop(); // the following block is executed by a single thread // the other threads wait at the end of the block @@ -1006,9 +1011,9 @@ nest::SimulationManager::update_() Node* node = i->get_node(); node->update_synaptic_elements( Time( Time::step( clock_.get_steps() + from_step_ ) ).get_ms() ); } - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_simulation_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_simulation_stopwatch().stop(); #pragma omp single { kernel().sp_manager.update_structural_plasticity(); @@ -1045,38 +1050,31 @@ nest::SimulationManager::update_() sw_update_.stop(); // parallel section ends, wait until all threads are done -> synchronize - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_simulation_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_simulation_stopwatch().stop(); // the following block is executed by the master thread only // the other threads are enforced to wait at the end of the block - - // gather and deliver only at end of slice, i.e., end of min_delay step - if ( to_step_ == kernel().connection_manager.get_min_delay() ) +#pragma omp master { - if ( kernel().connection_manager.has_primary_connections() ) + // gather and deliver only at end of slice, i.e., end of min_delay step + if ( to_step_ == kernel().connection_manager.get_min_delay() ) { - sw_gather_spike_data_.start(); -#pragma omp master + if ( kernel().connection_manager.has_primary_connections() ) { + sw_gather_spike_data_.start(); kernel().event_delivery_manager.gather_spike_data(); + sw_gather_spike_data_.stop(); } - sw_gather_spike_data_.stop(); - } - if ( kernel().connection_manager.secondary_connections_exist() ) - { - sw_gather_secondary_data_.start(); -#pragma omp master + if ( kernel().connection_manager.secondary_connections_exist() ) { + sw_gather_secondary_data_.start(); kernel().event_delivery_manager.gather_secondary_events( true ); + sw_gather_secondary_data_.stop(); } - sw_gather_secondary_data_.stop(); } - } -#pragma omp master - { advance_time_(); if ( print_time_ ) @@ -1100,9 +1098,9 @@ nest::SimulationManager::update_() #ifdef HAVE_SIONLIB kernel().io_manager.post_step_hook(); // enforce synchronization after post-step activities of the recording backends - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_run_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_run_stopwatch().stop(); #endif const double end_current_update = sw_simulate_.elapsed(); diff --git a/nestkernel/simulation_manager.h b/nestkernel/simulation_manager.h index c701dd0f74..f446aa9e86 100644 --- a/nestkernel/simulation_manager.h +++ b/nestkernel/simulation_manager.h @@ -231,8 +231,8 @@ class SimulationManager : public ManagerInterface Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > sw_simulate_; Stopwatch< StopwatchVerbosity::Normal, StopwatchType::Threaded > sw_communicate_prepare_; // intended for internal core developers, not for use in the public API - Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_gather_spike_data_; - Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_gather_secondary_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_gather_spike_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_gather_secondary_data_; Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_update_; Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_gather_target_data_; Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_deliver_spike_data_; diff --git a/nestkernel/source_table.cpp b/nestkernel/source_table.cpp index 0e3d4bc072..d9fcd7af1c 100644 --- a/nestkernel/source_table.cpp +++ b/nestkernel/source_table.cpp @@ -230,9 +230,9 @@ nest::SourceTable::compute_buffer_pos_for_unique_secondary_sources( const size_t } } } - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp single { diff --git a/nestkernel/stopwatch.h b/nestkernel/stopwatch.h index ef691573af..72bc35fdb7 100644 --- a/nestkernel/stopwatch.h +++ b/nestkernel/stopwatch.h @@ -32,46 +32,51 @@ #include "dictutils.h" #include #include +#include #include #include namespace nest { -constexpr bool use_detailed_timers = TIMER_DETAILED; -constexpr bool use_threaded_timers = THREADED_TIMERS; - -// TODO JV: Update docs -/*********************************************************************** - * StopwatchBase * - * Accumulates time between start and stop, and provides * - * the elapsed time with different time units. * - * * - * Partly inspired by com.google.common.base.Stopwatch.java * - * Not thread-safe: - Do not share stopwatches among threads. * - * - Let each thread have its own stopwatch. * - * * - * Usage example: * - * StopwatchBase x; * - * x.start(); * - * // ... do computations for 15.34 sec * - * x.stop(); // only pauses stopwatch * - * x.print("Time needed "); // > Time needed 15.34 sec. * - * x.start(); // resumes stopwatch * - * // ... next computations for 11.22 sec * - * x.stop(); * - * x.print("Time needed "); // > Time needed 26,56 sec. * - * x.reset(); // reset to default values * - * x.start(); // starts the stopwatch from 0 * - * // ... computation 5.7 sec * - * x.print("Time "); // > Time 5.7 sec. * - * // ^ intermediate timing without stopping the stopwatch * - * // ... more computations 1.7643 min * - * x.stop(); * - * x.print("Time needed ", StopwatchBase::MINUTES, std::cerr); * - * // > Time needed 1,8593 min. (on cerr) * - * // other units and output streams possible * - ***********************************************************************/ +// TODO JV: Set this variable via cmake instead +#ifdef TIMER_DETAILED +constexpr bool use_detailed_timers = true; +#else +constexpr bool use_detailed_timers = false; +#endif +#ifdef THREADED_TIMERS +constexpr bool use_threaded_timers = true; +#else +constexpr bool use_threaded_timers = false; +#endif + +/******************************************************************************** + * Stopwatch * + * Accumulates time between start and stop, and provides the elapsed time * + * with different time units. Either runs multi-threaded or only on master. * + * * + * Usage example: * + * Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > x; * + * x.start(); * + * // ... do computations for 15.34 sec * + * x.stop(); // only pauses stopwatch * + * x.print("Time needed "); // > Time needed 15.34 sec. * + * x.start(); // resumes stopwatch * + * // ... next computations for 11.22 sec * + * x.stop(); * + * x.print("Time needed "); // > Time needed 26,56 sec. * + * x.reset(); // reset to default values * + * x.start(); // starts the stopwatch from 0 * + * // ... computation 5.7 sec * + * x.print("Time "); // > Time 5.7 sec. * + * // ^ intermediate timing without stopping the stopwatch * + * // ... more computations 1.7643 min * + * x.stop(); * + * x.print("Time needed ", StopwatchBase::MINUTES, std::cerr); * + * // > Time needed 1,8593 min. (on cerr) * + * // other units and output streams possible * + ********************************************************************************/ class StopwatchBase { public: @@ -111,27 +116,13 @@ class StopwatchBase bool isRunning() const; /** - * Returns the time elapsed between the start and stop of the - * stopwatch. If it is running, it returns the time from start - * until now. If the stopwatch is run previously, the previous - * runtime is added. If you want only the last measurement, you - * have to reset the timer, before stating the measurement. + * Returns the time elapsed between the start and stop of the stopwatch in the given unit. If it is running, it + * returns the time from start until now. If the stopwatch is run previously, the previous runtime is added. If you + * want only the last measurement, you have to reset the timer, before stating the measurement. * Does not change the running state. */ double elapsed( timeunit_t timeunit = SECONDS ) const; - /** - * Returns the time elapsed between the start and stop of the - * stopwatch. If it is running, it returns the time from start - * until now. If the stopwatch is run previously, the previous - * runtime is added. If you want only the last measurement, you - * have to reset the timer, before stating the measurement. - * Does not change the running state. - * In contrast to StopwatchBase::elapsed(), only the timestamp is returned, - * that is the number if microseconds as an integer. - */ - timestamp_t elapsed_timestamp() const; - /** * Resets the stopwatch. */ @@ -158,36 +149,36 @@ class StopwatchBase /** * Returns current time in microseconds since EPOCH. */ - static timestamp_t get_timestamp(); + static size_t get_current_time(); }; inline void -nest::StopwatchBase::start() +StopwatchBase::start() { #ifndef DISABLE_TIMING if ( not isRunning() ) { - _prev_elapsed += _end - _beg; // store prev. time, if we resume - _end = _beg = get_timestamp(); // invariant: _end >= _beg - _running = true; // we start running + _prev_elapsed += _end - _beg; // store prev. time, if we resume + _end = _beg = get_current_time(); // invariant: _end >= _beg + _running = true; // we start running } #endif } inline void -nest::StopwatchBase::stop() +StopwatchBase::stop() { #ifndef DISABLE_TIMING if ( isRunning() ) { - _end = get_timestamp(); // invariant: _end >= _beg - _running = false; // we stopped running + _end = get_current_time(); // invariant: _end >= _beg + _running = false; // we stopped running } #endif } inline bool -nest::StopwatchBase::isRunning() const +StopwatchBase::isRunning() const { #ifndef DISABLE_TIMING return _running; @@ -197,36 +188,28 @@ nest::StopwatchBase::isRunning() const } inline double -nest::StopwatchBase::elapsed( timeunit_t timeunit ) const -{ -#ifndef DISABLE_TIMING - return 1.0 * elapsed_timestamp() / timeunit; -#else - return 0.0; -#endif -} - -inline nest::StopwatchBase::timestamp_t -nest::StopwatchBase::elapsed_timestamp() const +StopwatchBase::elapsed( timeunit_t timeunit ) const { #ifndef DISABLE_TIMING + size_t time_elapsed; if ( isRunning() ) { // get intermediate elapsed time; do not change _end, to be const - return get_timestamp() - _beg + _prev_elapsed; + time_elapsed = get_current_time() - _beg + _prev_elapsed; } else { // stopped before, get time of current measurement + last measurements - return _end - _beg + _prev_elapsed; + time_elapsed = _end - _beg + _prev_elapsed; } + return time_elapsed / timeunit; #else - return static_cast< timestamp_t >( 0 ); + return 0.; #endif } inline void -nest::StopwatchBase::reset() +StopwatchBase::reset() { #ifndef DISABLE_TIMING _beg = 0; // invariant: _end >= _beg @@ -237,7 +220,7 @@ nest::StopwatchBase::reset() } inline void -nest::StopwatchBase::print( const char* msg, timeunit_t timeunit, std::ostream& os ) const +StopwatchBase::print( const char* msg, timeunit_t timeunit, std::ostream& os ) const { #ifndef DISABLE_TIMING double e = elapsed( timeunit ); @@ -271,13 +254,14 @@ nest::StopwatchBase::print( const char* msg, timeunit_t timeunit, std::ostream& #endif } -inline nest::StopwatchBase::timestamp_t -nest::StopwatchBase::get_timestamp() +inline size_t +StopwatchBase::get_current_time() { + // We use a monotonic timer to make sure the stopwatch is not influenced by time jumps (e.g. summer/winter time). struct timeval now; gettimeofday( &now, static_cast< struct timezone* >( nullptr ) ); - return ( nest::StopwatchBase::timestamp_t ) now.tv_usec - + ( nest::StopwatchBase::timestamp_t ) now.tv_sec * nest::StopwatchBase::SECONDS; + return ( StopwatchBase::timestamp_t ) now.tv_usec + + ( StopwatchBase::timestamp_t ) now.tv_sec * StopwatchBase::SECONDS; } inline std::ostream& @@ -351,17 +335,6 @@ class Stopwatch return elapsed; } - StopwatchBase::timestamp_t - elapsed_timestamp() const - { - StopwatchBase::timestamp_t elapsed = 0; -#pragma omp master - { - elapsed = timer_.elapsed_timestamp(); - }; - return elapsed; - } - void reset() { @@ -406,11 +379,6 @@ class Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly, std::e { return 0; } - StopwatchBase::timestamp_t - elapsed_timestamp() const - { - return 0; - } void reset() { @@ -450,11 +418,6 @@ class Stopwatch< detailed_timer, { return 0; } - StopwatchBase::timestamp_t - elapsed_timestamp() const - { - return 0; - } void reset() { @@ -489,8 +452,6 @@ class Stopwatch< detailed_timer, double elapsed( StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS ) const; - StopwatchBase::timestamp_t elapsed_timestamp() const; - void reset(); void print( const char* msg = "", diff --git a/nestkernel/stopwatch_impl.h b/nestkernel/stopwatch_impl.h index 0b6a873a58..339a073643 100644 --- a/nestkernel/stopwatch_impl.h +++ b/nestkernel/stopwatch_impl.h @@ -67,28 +67,6 @@ Stopwatch< detailed_timer, return timers_[ kernel().vp_manager.get_thread_id() ].elapsed( timeunit ); } -template < StopwatchVerbosity detailed_timer > -StopwatchBase::timestamp_t -Stopwatch< detailed_timer, - StopwatchType::Threaded, - std::enable_if_t< use_threaded_timers - and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::elapsed_timestamp() const -{ - return timers_[ kernel().vp_manager.get_thread_id() ].elapsed_timestamp(); -} - -template < StopwatchVerbosity detailed_timer > -void -Stopwatch< detailed_timer, - StopwatchType::Threaded, - std::enable_if_t< use_threaded_timers - and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::print( const char* msg, - StopwatchBase::timeunit_t timeunit, - std::ostream& os ) const -{ - timers_[ kernel().vp_manager.get_thread_id() ].print( msg, timeunit, os ); -} - template < StopwatchVerbosity detailed_timer > void Stopwatch< detailed_timer, From 21b3f7e86ef0e5e37a20b907de188ae1544aafd2 Mon Sep 17 00:00:00 2001 From: Jan Vogelsang Date: Fri, 13 Dec 2024 17:14:40 +0100 Subject: [PATCH 04/11] Fixed timers and added cpu timers --- nestkernel/event_delivery_manager.cpp | 8 +- nestkernel/kernel_manager.cpp | 9 +- nestkernel/nest_names.cpp | 16 +++ nestkernel/nest_names.h | 16 +++ nestkernel/node_manager.cpp | 2 +- nestkernel/simulation_manager.cpp | 17 +-- nestkernel/stopwatch.h | 161 ++++++++++++++++---------- nestkernel/stopwatch_impl.h | 42 +++++-- 8 files changed, 187 insertions(+), 84 deletions(-) diff --git a/nestkernel/event_delivery_manager.cpp b/nestkernel/event_delivery_manager.cpp index 383903f56f..d241828c0c 100644 --- a/nestkernel/event_delivery_manager.cpp +++ b/nestkernel/event_delivery_manager.cpp @@ -190,9 +190,11 @@ EventDeliveryManager::get_status( DictionaryDatum& dict ) ( *dict )[ names::spike_buffer_resize_log ] = log_events; send_recv_buffer_resize_log_.to_dict( log_events ); - sw_collocate_spike_data_.output_timer( dict, names::time_collocate_spike_data ); - sw_communicate_spike_data_.output_timer( dict, names::time_communicate_spike_data ); - sw_communicate_target_data_.output_timer( dict, names::time_communicate_target_data ); + sw_collocate_spike_data_.output_timer( dict, names::time_collocate_spike_data, names::time_collocate_spike_data_cpu ); + sw_communicate_spike_data_.output_timer( + dict, names::time_communicate_spike_data, names::time_collocate_spike_data_cpu ); + sw_communicate_target_data_.output_timer( + dict, names::time_communicate_target_data, names::time_communicate_target_data_cpu ); } void diff --git a/nestkernel/kernel_manager.cpp b/nestkernel/kernel_manager.cpp index 79009cc39d..cb37d4175b 100644 --- a/nestkernel/kernel_manager.cpp +++ b/nestkernel/kernel_manager.cpp @@ -111,7 +111,6 @@ nest::KernelManager::prepare() manager->prepare(); } - sw_omp_synchronization_construction_.reset(); sw_omp_synchronization_simulation_.reset(); sw_mpi_synchronization_.reset(); } @@ -205,9 +204,11 @@ nest::KernelManager::get_status( DictionaryDatum& dict ) manager->get_status( dict ); } - sw_omp_synchronization_construction_.output_timer( dict, names::time_omp_synchronization_construction ); - sw_omp_synchronization_simulation_.output_timer( dict, names::time_omp_synchronization_simulation ); - sw_mpi_synchronization_.output_timer( dict, names::time_mpi_synchronization ); + sw_omp_synchronization_construction_.output_timer( + dict, names::time_omp_synchronization_construction, names::time_omp_synchronization_construction_cpu ); + sw_omp_synchronization_simulation_.output_timer( + dict, names::time_omp_synchronization_simulation, names::time_omp_synchronization_simulation_cpu ); + sw_mpi_synchronization_.output_timer( dict, names::time_mpi_synchronization, names::time_mpi_synchronization_cpu ); } void diff --git a/nestkernel/nest_names.cpp b/nestkernel/nest_names.cpp index f2acfb8bf4..f3992392f1 100644 --- a/nestkernel/nest_names.cpp +++ b/nestkernel/nest_names.cpp @@ -596,22 +596,38 @@ const Name threshold_voltage( "threshold_voltage" ); const Name tics_per_ms( "tics_per_ms" ); const Name tics_per_step( "tics_per_step" ); const Name time_collocate_spike_data( "time_collocate_spike_data" ); +const Name time_collocate_spike_data_cpu( "time_collocate_spike_data_cpu" ); const Name time_communicate_prepare( "time_communicate_prepare" ); +const Name time_communicate_prepare_cpu( "time_communicate_prepare_cpu" ); const Name time_communicate_spike_data( "time_communicate_spike_data" ); +const Name time_communicate_spike_data_cpu( "time_communicate_spike_data_cpu" ); const Name time_communicate_target_data( "time_communicate_target_data" ); +const Name time_communicate_target_data_cpu( "time_communicate_target_data_cpu" ); const Name time_construction_connect( "time_construction_connect" ); +const Name time_construction_connect_cpu( "time_construction_connect_cpu" ); const Name time_construction_create( "time_construction_create" ); +const Name time_construction_create_cpu( "time_construction_create_cpu" ); const Name time_deliver_secondary_data( "time_deliver_secondary_data" ); +const Name time_deliver_secondary_data_cpu( "time_deliver_secondary_data_cpu" ); const Name time_deliver_spike_data( "time_deliver_spike_data" ); +const Name time_deliver_spike_data_cpu( "time_deliver_spike_data_cpu" ); const Name time_gather_secondary_data( "time_gather_secondary_data" ); +const Name time_gather_secondary_data_cpu( "time_gather_secondary_data_cpu" ); const Name time_gather_spike_data( "time_gather_spike_data" ); +const Name time_gather_spike_data_cpu( "time_gather_spike_data_cpu" ); const Name time_gather_target_data( "time_gather_target_data" ); +const Name time_gather_target_data_cpu( "time_gather_target_data_cpu" ); const Name time_omp_synchronization_construction( "time_omp_synchronization_construction" ); +const Name time_omp_synchronization_construction_cpu( "time_omp_synchronization_construction_cpu" ); const Name time_omp_synchronization_simulation( "time_omp_synchronization_simulation" ); +const Name time_omp_synchronization_simulation_cpu( "time_omp_synchronization_simulation_cpu" ); const Name time_mpi_synchronization( "time_mpi_synchronization" ); +const Name time_mpi_synchronization_cpu( "time_mpi_synchronization_cpu" ); const Name time_in_steps( "time_in_steps" ); const Name time_simulate( "time_simulate" ); +const Name time_simulate_cpu( "time_simulate_cpu" ); const Name time_update( "time_update" ); +const Name time_update_cpu( "time_update_cpu" ); const Name times( "times" ); const Name to_do( "to_do" ); const Name total_num_virtual_procs( "total_num_virtual_procs" ); diff --git a/nestkernel/nest_names.h b/nestkernel/nest_names.h index 1acfddb70a..759b3b32b4 100644 --- a/nestkernel/nest_names.h +++ b/nestkernel/nest_names.h @@ -624,22 +624,38 @@ extern const Name threshold_voltage; extern const Name tics_per_ms; extern const Name tics_per_step; extern const Name time_collocate_spike_data; +extern const Name time_collocate_spike_data_cpu; extern const Name time_communicate_prepare; +extern const Name time_communicate_prepare_cpu; extern const Name time_communicate_spike_data; +extern const Name time_communicate_spike_data_cpu; extern const Name time_communicate_target_data; +extern const Name time_communicate_target_data_cpu; extern const Name time_construction_connect; +extern const Name time_construction_connect_cpu; extern const Name time_construction_create; +extern const Name time_construction_create_cpu; extern const Name time_deliver_secondary_data; +extern const Name time_deliver_secondary_data_cpu; extern const Name time_deliver_spike_data; +extern const Name time_deliver_spike_data_cpu; extern const Name time_gather_secondary_data; +extern const Name time_gather_secondary_data_cpu; extern const Name time_gather_spike_data; +extern const Name time_gather_spike_data_cpu; extern const Name time_gather_target_data; +extern const Name time_gather_target_data_cpu; extern const Name time_omp_synchronization_construction; +extern const Name time_omp_synchronization_construction_cpu; extern const Name time_omp_synchronization_simulation; +extern const Name time_omp_synchronization_simulation_cpu; extern const Name time_mpi_synchronization; +extern const Name time_mpi_synchronization_cpu; extern const Name time_in_steps; extern const Name time_simulate; +extern const Name time_simulate_cpu; extern const Name time_update; +extern const Name time_update_cpu; extern const Name times; extern const Name to_do; extern const Name total_num_virtual_procs; diff --git a/nestkernel/node_manager.cpp b/nestkernel/node_manager.cpp index 7708a787cb..632b750687 100644 --- a/nestkernel/node_manager.cpp +++ b/nestkernel/node_manager.cpp @@ -786,7 +786,7 @@ void NodeManager::get_status( DictionaryDatum& d ) { def< long >( d, names::network_size, size() ); - sw_construction_create_.output_timer( d, names::time_construction_create ); + sw_construction_create_.output_timer( d, names::time_construction_create, names::time_construction_create_cpu ); } void diff --git a/nestkernel/simulation_manager.cpp b/nestkernel/simulation_manager.cpp index a756104e55..7a94877298 100644 --- a/nestkernel/simulation_manager.cpp +++ b/nestkernel/simulation_manager.cpp @@ -474,14 +474,15 @@ nest::SimulationManager::get_status( DictionaryDatum& d ) def< double >( d, names::min_update_time, min_update_time_ ); def< double >( d, names::max_update_time, max_update_time_ ); - sw_simulate_.output_timer( d, names::time_simulate ); - sw_communicate_prepare_.output_timer( d, names::time_communicate_prepare ); - sw_gather_spike_data_.output_timer( d, names::time_gather_spike_data ); - sw_gather_secondary_data_.output_timer( d, names::time_gather_secondary_data ); - sw_update_.output_timer( d, names::time_update ); - sw_gather_target_data_.output_timer( d, names::time_gather_target_data ); - sw_deliver_spike_data_.output_timer( d, names::time_deliver_spike_data ); - sw_deliver_secondary_data_.output_timer( d, names::time_deliver_secondary_data ); + sw_simulate_.output_timer( d, names::time_simulate, names::time_simulate_cpu ); + sw_communicate_prepare_.output_timer( d, names::time_communicate_prepare, names::time_communicate_prepare_cpu ); + sw_gather_spike_data_.output_timer( d, names::time_gather_spike_data, names::time_gather_spike_data_cpu ); + sw_gather_secondary_data_.output_timer( d, names::time_gather_secondary_data, names::time_gather_secondary_data_cpu ); + sw_update_.output_timer( d, names::time_update, names::time_update_cpu ); + sw_gather_target_data_.output_timer( d, names::time_gather_target_data, names::time_gather_target_data_cpu ); + sw_deliver_spike_data_.output_timer( d, names::time_deliver_spike_data, names::time_deliver_spike_data_cpu ); + sw_deliver_secondary_data_.output_timer( + d, names::time_deliver_secondary_data, names::time_deliver_secondary_data_cpu ); def< double >( d, names::eprop_update_interval, eprop_update_interval_ ); def< double >( d, names::eprop_learning_window, eprop_learning_window_ ); def< bool >( d, names::eprop_reset_neurons_on_update, eprop_reset_neurons_on_update_ ); diff --git a/nestkernel/stopwatch.h b/nestkernel/stopwatch.h index 72bc35fdb7..60c6dba373 100644 --- a/nestkernel/stopwatch.h +++ b/nestkernel/stopwatch.h @@ -73,25 +73,29 @@ constexpr bool use_threaded_timers = false; * // ^ intermediate timing without stopping the stopwatch * * // ... more computations 1.7643 min * * x.stop(); * - * x.print("Time needed ", StopwatchBase::MINUTES, std::cerr); * + * x.print("Time needed ", StopwatchBase< clock_type >::MINUTES, std::cerr); * * // > Time needed 1,8593 min. (on cerr) * * // other units and output streams possible * ********************************************************************************/ +namespace timers +{ +enum timeunit_t : size_t +{ + NANOSEC = 1, + MICROSEC = NANOSEC * 1000, + MILLISEC = MICROSEC * 1000, + SECONDS = MILLISEC * 1000, + MINUTES = SECONDS * 60, + HOURS = MINUTES * 60, + DAYS = HOURS * 24 +}; + +template < size_t clock_type > class StopwatchBase { public: typedef size_t timestamp_t; - enum timeunit_t : size_t - { - MICROSEC = 1, - MILLISEC = MICROSEC * 1000, - SECONDS = MILLISEC * 1000, - MINUTES = SECONDS * 60, - HOURS = MINUTES * 60, - DAYS = HOURS * 24 - }; - /** * Creates a stopwatch that is not running. */ @@ -137,7 +141,7 @@ class StopwatchBase * Convenient method for writing time in seconds * to some ostream. */ - friend std::ostream& operator<<( std::ostream& os, const StopwatchBase& stopwatch ); + // friend std::ostream& operator<<( std::ostream& os, const StopwatchBase< clock_type >& stopwatch ); private: #ifndef DISABLE_TIMING @@ -152,8 +156,9 @@ class StopwatchBase static size_t get_current_time(); }; +template < size_t clock_type > inline void -StopwatchBase::start() +StopwatchBase< clock_type >::start() { #ifndef DISABLE_TIMING if ( not isRunning() ) @@ -165,8 +170,9 @@ StopwatchBase::start() #endif } +template < size_t clock_type > inline void -StopwatchBase::stop() +StopwatchBase< clock_type >::stop() { #ifndef DISABLE_TIMING if ( isRunning() ) @@ -177,8 +183,9 @@ StopwatchBase::stop() #endif } +template < size_t clock_type > inline bool -StopwatchBase::isRunning() const +StopwatchBase< clock_type >::isRunning() const { #ifndef DISABLE_TIMING return _running; @@ -187,8 +194,9 @@ StopwatchBase::isRunning() const #endif } +template < size_t clock_type > inline double -StopwatchBase::elapsed( timeunit_t timeunit ) const +StopwatchBase< clock_type >::elapsed( timeunit_t timeunit ) const { #ifndef DISABLE_TIMING size_t time_elapsed; @@ -202,14 +210,15 @@ StopwatchBase::elapsed( timeunit_t timeunit ) const // stopped before, get time of current measurement + last measurements time_elapsed = _end - _beg + _prev_elapsed; } - return time_elapsed / timeunit; + return static_cast< double >( time_elapsed ) / timeunit; #else return 0.; #endif } +template < size_t clock_type > inline void -StopwatchBase::reset() +StopwatchBase< clock_type >::reset() { #ifndef DISABLE_TIMING _beg = 0; // invariant: _end >= _beg @@ -219,8 +228,9 @@ StopwatchBase::reset() #endif } +template < size_t clock_type > inline void -StopwatchBase::print( const char* msg, timeunit_t timeunit, std::ostream& os ) const +StopwatchBase< clock_type >::print( const char* msg, timeunit_t timeunit, std::ostream& os ) const { #ifndef DISABLE_TIMING double e = elapsed( timeunit ); @@ -254,21 +264,24 @@ StopwatchBase::print( const char* msg, timeunit_t timeunit, std::ostream& os ) c #endif } +template < size_t clock_type > inline size_t -StopwatchBase::get_current_time() +StopwatchBase< clock_type >::get_current_time() { // We use a monotonic timer to make sure the stopwatch is not influenced by time jumps (e.g. summer/winter time). - struct timeval now; - gettimeofday( &now, static_cast< struct timezone* >( nullptr ) ); - return ( StopwatchBase::timestamp_t ) now.tv_usec - + ( StopwatchBase::timestamp_t ) now.tv_sec * StopwatchBase::SECONDS; + timespec now; + clock_gettime( clock_type, &now ); + return now.tv_nsec + now.tv_sec * timeunit_t::SECONDS; } +/*template< size_t clock_type > inline std::ostream& -operator<<( std::ostream& os, const StopwatchBase& stopwatch ) +operator<<( std::ostream& os, const StopwatchBase< clock_type >& stopwatch ) { - stopwatch.print( "", StopwatchBase::timeunit_t::SECONDS, os ); + stopwatch.print( "", timeunit_t::SECONDS, os ); return os; +}*/ + } enum StopwatchVerbosity @@ -293,24 +306,24 @@ template < StopwatchVerbosity detailed_timer, StopwatchType threaded_timer, type class Stopwatch { public: - void - output_timer( DictionaryDatum& d, const Name& name ) - { - def< double >( d, name, timer_.elapsed() ); - } - void start() { #pragma omp master - timer_.start(); + { + walltime_timer_.start(); + cputime_timer_.start(); + } } void stop() { #pragma omp master - timer_.stop(); + { + walltime_timer_.stop(); + cputime_timer_.stop(); + } } bool @@ -319,18 +332,18 @@ class Stopwatch bool isRunning = false; #pragma omp master { - isRunning = timer_.isRunning(); + isRunning = walltime_timer_.isRunning(); }; return isRunning; } double - elapsed( StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS ) const + elapsed( timers::timeunit_t timeunit = timers::timeunit_t::SECONDS ) const { double elapsed = 0.; #pragma omp master { - elapsed = timer_.elapsed( timeunit ); + elapsed = walltime_timer_.elapsed( timeunit ); }; return elapsed; } @@ -339,20 +352,31 @@ class Stopwatch reset() { #pragma omp master - timer_.reset(); + { + walltime_timer_.reset(); + cputime_timer_.reset(); + } } void print( const char* msg = "", - StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS, + timers::timeunit_t timeunit = timers::timeunit_t::SECONDS, std::ostream& os = std::cout ) const { #pragma omp master - timer_.print( msg, timeunit, os ); + walltime_timer_.print( msg, timeunit, os ); + } + + void + output_timer( DictionaryDatum& d, const Name& walltime_name, const Name& cputime_name ) + { + def< double >( d, walltime_name, walltime_timer_.elapsed() ); + def< double >( d, cputime_name, cputime_timer_.elapsed() ); } private: - StopwatchBase timer_; + timers::StopwatchBase< CLOCK_MONOTONIC > walltime_timer_; + timers::StopwatchBase< CLOCK_THREAD_CPUTIME_ID > cputime_timer_; }; /** If the user deactivated detailed timers, Stopwatch instance with the detailed flag will become an empty Stopwatch, @@ -361,6 +385,7 @@ class Stopwatch template <> class Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly, std::enable_if< not use_detailed_timers > > { +public: void start() { @@ -375,7 +400,7 @@ class Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly, std::e return false; } double - elapsed( StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS ) const + elapsed( timers::timeunit_t = timers::timeunit_t::SECONDS ) const { return 0; } @@ -384,9 +409,11 @@ class Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly, std::e { } void - print( const char* msg = "", - StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS, - std::ostream& os = std::cout ) const + print( const char* = "", timers::timeunit_t = timers::timeunit_t::SECONDS, std::ostream& = std::cout ) const + { + } + void + output_timer( DictionaryDatum&, const Name&, const Name& ) { } }; @@ -400,6 +427,7 @@ class Stopwatch< detailed_timer, std::enable_if_t< use_threaded_timers and ( detailed_timer == StopwatchVerbosity::Detailed and not use_detailed_timers ) > > { +public: void start() { @@ -414,7 +442,7 @@ class Stopwatch< detailed_timer, return false; } double - elapsed( StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS ) const + elapsed( timers::timeunit_t = timers::timeunit_t::SECONDS ) const { return 0; } @@ -423,9 +451,11 @@ class Stopwatch< detailed_timer, { } void - print( const char* msg = "", - StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS, - std::ostream& os = std::cout ) const + print( const char* = "", timers::timeunit_t = timers::timeunit_t::SECONDS, std::ostream& = std::cout ) const + { + } + void + output_timer( DictionaryDatum&, const Name&, const Name& ) { } }; @@ -435,31 +465,40 @@ class Stopwatch< detailed_timer, std::enable_if_t< use_threaded_timers and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > > { public: - void - output_timer( DictionaryDatum& d, const Name& name ) - { - std::vector< double > times( timers_.size() ); - std::transform( - timers_.begin(), timers_.end(), times.begin(), []( const StopwatchBase& timer ) { return timer.elapsed(); } ); - def< ArrayDatum >( d, name, ArrayDatum( times ) ); - } - void start(); void stop(); bool isRunning() const; - double elapsed( StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS ) const; + double elapsed( timers::timeunit_t timeunit = timers::timeunit_t::SECONDS ) const; void reset(); void print( const char* msg = "", - StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS, + timers::timeunit_t timeunit = timers::timeunit_t::SECONDS, std::ostream& os = std::cout ) const; + void + output_timer( DictionaryDatum& d, const Name& walltime_name, const Name& cputime_name ) + { + std::vector< double > wall_times( walltime_timers_.size() ); + std::transform( walltime_timers_.begin(), + walltime_timers_.end(), + wall_times.begin(), + []( const timers::StopwatchBase< CLOCK_MONOTONIC >& timer ) { return timer.elapsed(); } ); + def< ArrayDatum >( d, walltime_name, ArrayDatum( wall_times ) ); + std::vector< double > cpu_times( cputime_timers_.size() ); + std::transform( cputime_timers_.begin(), + cputime_timers_.end(), + cpu_times.begin(), + []( const timers::StopwatchBase< CLOCK_THREAD_CPUTIME_ID >& timer ) { return timer.elapsed(); } ); + def< ArrayDatum >( d, cputime_name, ArrayDatum( cpu_times ) ); + } + private: - std::vector< StopwatchBase > timers_; + std::vector< timers::StopwatchBase< CLOCK_MONOTONIC > > walltime_timers_; + std::vector< timers::StopwatchBase< CLOCK_THREAD_CPUTIME_ID > > cputime_timers_; }; } /* namespace timer */ diff --git a/nestkernel/stopwatch_impl.h b/nestkernel/stopwatch_impl.h index 339a073643..b467b598e8 100644 --- a/nestkernel/stopwatch_impl.h +++ b/nestkernel/stopwatch_impl.h @@ -33,7 +33,10 @@ Stopwatch< detailed_timer, std::enable_if_t< use_threaded_timers and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::start() { - timers_[ kernel().vp_manager.get_thread_id() ].start(); + kernel().vp_manager.assert_thread_parallel(); + + walltime_timers_[ kernel().vp_manager.get_thread_id() ].start(); + cputime_timers_[ kernel().vp_manager.get_thread_id() ].start(); } template < StopwatchVerbosity detailed_timer > @@ -43,7 +46,10 @@ Stopwatch< detailed_timer, std::enable_if_t< use_threaded_timers and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::stop() { - timers_[ kernel().vp_manager.get_thread_id() ].stop(); + kernel().vp_manager.assert_thread_parallel(); + + walltime_timers_[ kernel().vp_manager.get_thread_id() ].stop(); + cputime_timers_[ kernel().vp_manager.get_thread_id() ].stop(); } template < StopwatchVerbosity detailed_timer > @@ -53,7 +59,9 @@ Stopwatch< detailed_timer, std::enable_if_t< use_threaded_timers and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::isRunning() const { - return timers_[ kernel().vp_manager.get_thread_id() ].isRunning(); + kernel().vp_manager.assert_thread_parallel(); + + return walltime_timers_[ kernel().vp_manager.get_thread_id() ].isRunning(); } template < StopwatchVerbosity detailed_timer > @@ -61,10 +69,26 @@ double Stopwatch< detailed_timer, StopwatchType::Threaded, std::enable_if_t< use_threaded_timers - and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::elapsed( StopwatchBase::timeunit_t + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::elapsed( timers::timeunit_t timeunit ) const { - return timers_[ kernel().vp_manager.get_thread_id() ].elapsed( timeunit ); + kernel().vp_manager.assert_thread_parallel(); + + return walltime_timers_[ kernel().vp_manager.get_thread_id() ].elapsed( timeunit ); +} + +template < StopwatchVerbosity detailed_timer > +void +Stopwatch< detailed_timer, + StopwatchType::Threaded, + std::enable_if_t< use_threaded_timers + and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::print( const char* msg, + timers::timeunit_t timeunit, + std::ostream& os ) const +{ + kernel().vp_manager.assert_thread_parallel(); + + walltime_timers_[ kernel().vp_manager.get_thread_id() ].print( msg, timeunit, os ); } template < StopwatchVerbosity detailed_timer > @@ -74,11 +98,15 @@ Stopwatch< detailed_timer, std::enable_if_t< use_threaded_timers and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::reset() { + kernel().vp_manager.assert_single_threaded(); + const size_t num_threads = kernel().vp_manager.get_num_threads(); - timers_.resize( num_threads ); + walltime_timers_.resize( num_threads ); + cputime_timers_.resize( num_threads ); for ( size_t i = 0; i < num_threads; ++i ) { - timers_[ i ].reset(); + walltime_timers_[ i ].reset(); + cputime_timers_[ i ].reset(); } } } From d60db69de9449f757c89562c0fc003b0a6112303 Mon Sep 17 00:00:00 2001 From: Jan Vogelsang Date: Mon, 16 Dec 2024 16:01:07 +0100 Subject: [PATCH 05/11] Fixed timer outputting --- nestkernel/connection_manager.cpp | 2 +- nestkernel/event_delivery_manager.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nestkernel/connection_manager.cpp b/nestkernel/connection_manager.cpp index 4001867308..648d6c01ea 100644 --- a/nestkernel/connection_manager.cpp +++ b/nestkernel/connection_manager.cpp @@ -227,7 +227,7 @@ nest::ConnectionManager::get_status( DictionaryDatum& dict ) def< bool >( dict, names::keep_source_table, keep_source_table_ ); def< bool >( dict, names::use_compressed_spikes, use_compressed_spikes_ ); - def< double >( dict, names::time_construction_connect, sw_construction_connect.elapsed() ); + sw_construction_connect.output_timer( dict, names::time_construction_connect, names::time_construction_connect_cpu ); ArrayDatum connection_rules; for ( auto const& element : *connruledict_ ) diff --git a/nestkernel/event_delivery_manager.cpp b/nestkernel/event_delivery_manager.cpp index d241828c0c..ede6812272 100644 --- a/nestkernel/event_delivery_manager.cpp +++ b/nestkernel/event_delivery_manager.cpp @@ -192,7 +192,7 @@ EventDeliveryManager::get_status( DictionaryDatum& dict ) sw_collocate_spike_data_.output_timer( dict, names::time_collocate_spike_data, names::time_collocate_spike_data_cpu ); sw_communicate_spike_data_.output_timer( - dict, names::time_communicate_spike_data, names::time_collocate_spike_data_cpu ); + dict, names::time_communicate_spike_data, names::time_communicate_spike_data_cpu ); sw_communicate_target_data_.output_timer( dict, names::time_communicate_target_data, names::time_communicate_target_data_cpu ); } From 88e0653d9ad3287a2efd4d9586f2f0fc3e7d5319 Mon Sep 17 00:00:00 2001 From: Jan Vogelsang Date: Mon, 16 Dec 2024 16:45:50 +0100 Subject: [PATCH 06/11] Fixed MUSIC build --- nestkernel/simulation_manager.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nestkernel/simulation_manager.cpp b/nestkernel/simulation_manager.cpp index 7a94877298..acd980d8da 100644 --- a/nestkernel/simulation_manager.cpp +++ b/nestkernel/simulation_manager.cpp @@ -885,9 +885,9 @@ nest::SimulationManager::update_() // MUSIC *before* MUSIC time is advanced // wait until all threads are done -> synchronize - kernel().get_omp_synchronization_run_stopwatch().start(); + kernel().get_omp_synchronization_simulation_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_run_stopwatch().stop(); + kernel().get_omp_synchronization_simulation_stopwatch().stop(); // the following block is executed by the master thread only // the other threads are enforced to wait at the end of the block #pragma omp master @@ -1099,9 +1099,9 @@ nest::SimulationManager::update_() #ifdef HAVE_SIONLIB kernel().io_manager.post_step_hook(); // enforce synchronization after post-step activities of the recording backends - kernel().get_omp_synchronization_run_stopwatch().start(); + kernel().get_omp_synchronization_simulation_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_run_stopwatch().stop(); + kernel().get_omp_synchronization_simulation_stopwatch().stop(); #endif const double end_current_update = sw_simulate_.elapsed(); From 3d90363bdf6ff81fdf31418f644effe1463bd811 Mon Sep 17 00:00:00 2001 From: Jan Vogelsang Date: Mon, 16 Dec 2024 17:05:32 +0100 Subject: [PATCH 07/11] Fixed MPI sync timer --- cmake/ConfigureSummary.cmake | 7 +++++++ libnestutil/config.h.in | 3 +++ 2 files changed, 10 insertions(+) diff --git a/cmake/ConfigureSummary.cmake b/cmake/ConfigureSummary.cmake index c0b7e24e8b..686edb64bb 100644 --- a/cmake/ConfigureSummary.cmake +++ b/cmake/ConfigureSummary.cmake @@ -157,6 +157,13 @@ function( NEST_PRINT_CONFIG_SUMMARY ) message( "Threaded timers : No" ) endif () + message( "" ) + if ( THREADED_TIMERS ) + message( "MPI sync timer : Yes" ) + else () + message( "MPI sync timer : No" ) + endif () + message( "" ) if ( HAVE_MUSIC ) message( "Use MUSIC : Yes (MUSIC ${MUSIC_VERSION})" ) diff --git a/libnestutil/config.h.in b/libnestutil/config.h.in index ddf3921773..70998df6f3 100644 --- a/libnestutil/config.h.in +++ b/libnestutil/config.h.in @@ -185,6 +185,9 @@ /* Whether to use one NEST internal timer per thread */ #cmakedefine THREADED_TIMERS 1 +/* Whether to use the mpi synchronization timer (including an additional barrier) */ +#cmakedefine MPI_SYNC_TIMER 1 + /* Whether to do full logging */ #cmakedefine ENABLE_FULL_LOGGING 1 From 217adc7fe8345d8ebf328ce83966a7f96dcb9123 Mon Sep 17 00:00:00 2001 From: Jan Vogelsang Date: Mon, 16 Dec 2024 17:34:00 +0100 Subject: [PATCH 08/11] Updated timer docs --- doc/htmldoc/nest_behavior/built-in_timers.rst | 77 ++++++++++++------- 1 file changed, 48 insertions(+), 29 deletions(-) diff --git a/doc/htmldoc/nest_behavior/built-in_timers.rst b/doc/htmldoc/nest_behavior/built-in_timers.rst index b57e0bda31..62b61e72a4 100644 --- a/doc/htmldoc/nest_behavior/built-in_timers.rst +++ b/doc/htmldoc/nest_behavior/built-in_timers.rst @@ -6,11 +6,9 @@ Built-in timers Basic timers ------------ -Basic built-in timers keep track of the time NEST spent for network -construction and actual simulation (propagation of the network -state). These timers are active in all simulations with NEST, and the -measured times can be checked by querying the corresponding kernel -attributes. For example: +Basic built-in timers keep track of the time NEST spent for network construction and actual simulation (propagation of +the network state). These timers are active in all simulations with NEST, and the measured times can be checked by +querying the corresponding kernel attributes. For example: :: @@ -33,19 +31,14 @@ The following basic time measurements are available: .. note :: - While preparing the actual simulation after network construction, - NEST needs to build the pre-synaptic part of the connection - infrastructure, which requires MPI communication (`Jordan et - al. 2018 `__). This - happens only for the first call to ``Simulate()`` unless - connectivity changed in the meantime, and it may cause significant - overhead by adding to ``time_simulate``. Therefore, the cumulative - time NEST spent for building the pre-synaptic connection - infrastructure is also tracked by a basic timer and available as - kernel attribute ``time_communicate_prepare``. + While preparing the actual simulation after network construction, NEST needs to build the pre-synaptic part of the + connection infrastructure, which requires MPI communication (`Jordan et al. 2018 + `__). This happens only for the first call to ``Simulate()`` unless + connectivity changed in the meantime, and it may cause significant overhead by adding to ``time_simulate``. + Therefore, the cumulative time NEST spent for building the pre-synaptic connection infrastructure is also tracked by + a basic timer and available as kernel attribute ``time_communicate_prepare``. -In the context of NEST performance monitoring, other useful kernel -attributes are: +In the context of NEST performance monitoring, other useful kernel attributes are: +-----------------------+----------------------------------+ |Name |Explanation | @@ -60,22 +53,17 @@ attributes are: .. note :: - ``nest.ResetKernel()`` resets all time measurements as well as - ``biological_time`` and ``local_spike_counter``. + ``nest.ResetKernel()`` resets all time measurements as well as ``biological_time`` and ``local_spike_counter``. Detailed timers --------------- -Detailed built-in timers can be activated (and again deactivated) -prior to compilation through the CMake flag -``-Dwith-detailed-timers=ON``. They provide further insights into the -time NEST spends in different phases of the simulation cycle, but they -can impact the runtime. Therefore, detailed timers are by default -inactive. +Detailed built-in timers can be activated (and again deactivated) prior to compilation through the CMake flag +``-Dwith-detailed-timers=ON``. They provide further insights into the time NEST spends in different phases of the +simulation cycle, but they can impact the runtime. Therefore, detailed timers are by default inactive. -If detailed timers are active, the following time measurements are -available as kernel attributes: +If detailed timers are active, the following time measurements are available as kernel attributes: +--------------------------------+----------------------------------+----------------------------------+ |Name |Explanation |Part of | @@ -107,7 +95,38 @@ available as kernel attributes: | |buffers of the corresponding | | | |postsynaptic neurons | | +--------------------------------+----------------------------------+----------------------------------+ +|``time_communicate_spike_data`` |Time for communicating spikes |``time_gather_spike_data`` | +| |between compute nodes | | ++--------------------------------+----------------------------------+----------------------------------+ +|``time_communicate_spike_data`` |Time for communicating spikes |``time_gather_spike_data`` | +| |between compute nodes | | ++--------------------------------+----------------------------------+----------------------------------+ +MPI synchronization timer +------------------------- +In order to measure synchronization time between multiple MPI processes, an additional timer can be activated on demand +via the ``-Dwith-mpi-sync-timer=ON`` CMake flag. This timer measures the time between the end of a process' update phase +(i.e., neuron state propagation) and start of collective communication of spikes between all MPI processes. This timer +adds an additional MPI barrier right before the start of communication, which might affect performance. + ++-----------------------------+---------------------------------------+ +|Name |Explanation | ++=============================+=======================================+ +|``time_mpi_synchronization`` |Time spent waiting for other processes.| ++-----------------------------+---------------------------------------+ + +Multi-threaded timers +--------------------- +In previous NEST versions, only the master thread measured timers. Since NEST 3.9, all timers that are recorded in a +parallel (multi-threaded) environment are recorded by each thread individually. + +The legacy timer behavior can be restored via the ``-Dwith-threaded-timers=OFF`` CMake flag. + +Wall-time vs. CPU-time +------------------------- +All timers in NEST measure the actual wall-time spent between starting and stopping the timer. In order to only measure +time spent on calculations, there is an additional variant for each of the timers above, suffixed with ``_cpu``. They +can be accessed in the exact same way. For example: +:: -with-mpi-sync-timer -with-threaded-timers + nest.time_simulate_cpu From 940ed7cb9ed6a58070af4acfed99451bea31f6b4 Mon Sep 17 00:00:00 2001 From: Jan Vogelsang <47158055+JanVogelsang@users.noreply.github.com> Date: Mon, 16 Dec 2024 17:50:22 +0100 Subject: [PATCH 09/11] Updated timer documentation --- doc/htmldoc/nest_behavior/built-in_timers.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/doc/htmldoc/nest_behavior/built-in_timers.rst b/doc/htmldoc/nest_behavior/built-in_timers.rst index 62b61e72a4..77191fd518 100644 --- a/doc/htmldoc/nest_behavior/built-in_timers.rst +++ b/doc/htmldoc/nest_behavior/built-in_timers.rst @@ -95,11 +95,9 @@ If detailed timers are active, the following time measurements are available as | |buffers of the corresponding | | | |postsynaptic neurons | | +--------------------------------+----------------------------------+----------------------------------+ -|``time_communicate_spike_data`` |Time for communicating spikes |``time_gather_spike_data`` | -| |between compute nodes | | +|``time_omp_synchronization_construction`` |Synchronization time of threads during network construction. |``time_construction_create``, ``time_construction_connect``, ``time_communicate_prepare`` | +--------------------------------+----------------------------------+----------------------------------+ -|``time_communicate_spike_data`` |Time for communicating spikes |``time_gather_spike_data`` | -| |between compute nodes | | +|``time_omp_synchronization_simulation`` |Synchronization time of threads during simulation. |``time_simulate`` | +--------------------------------+----------------------------------+----------------------------------+ MPI synchronization timer From 488cd09a1c74b3e09e460e7a6756681e66e6b9c5 Mon Sep 17 00:00:00 2001 From: Jan Vogelsang <47158055+JanVogelsang@users.noreply.github.com> Date: Mon, 16 Dec 2024 17:53:27 +0100 Subject: [PATCH 10/11] Fixed stopwatch compatibility with C standard --- nestkernel/stopwatch.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/nestkernel/stopwatch.h b/nestkernel/stopwatch.h index 60c6dba373..ffed7aa57c 100644 --- a/nestkernel/stopwatch.h +++ b/nestkernel/stopwatch.h @@ -90,7 +90,7 @@ enum timeunit_t : size_t DAYS = HOURS * 24 }; -template < size_t clock_type > +template < clockid_t clock_type > class StopwatchBase { public: @@ -156,7 +156,7 @@ class StopwatchBase static size_t get_current_time(); }; -template < size_t clock_type > +template < clockid_t clock_type > inline void StopwatchBase< clock_type >::start() { @@ -170,7 +170,7 @@ StopwatchBase< clock_type >::start() #endif } -template < size_t clock_type > +template < clockid_t clock_type > inline void StopwatchBase< clock_type >::stop() { @@ -183,7 +183,7 @@ StopwatchBase< clock_type >::stop() #endif } -template < size_t clock_type > +template < clockid_t clock_type > inline bool StopwatchBase< clock_type >::isRunning() const { @@ -194,7 +194,7 @@ StopwatchBase< clock_type >::isRunning() const #endif } -template < size_t clock_type > +template < clockid_t clock_type > inline double StopwatchBase< clock_type >::elapsed( timeunit_t timeunit ) const { @@ -216,7 +216,7 @@ StopwatchBase< clock_type >::elapsed( timeunit_t timeunit ) const #endif } -template < size_t clock_type > +template < clockid_t clock_type > inline void StopwatchBase< clock_type >::reset() { @@ -228,7 +228,7 @@ StopwatchBase< clock_type >::reset() #endif } -template < size_t clock_type > +template < clockid_t clock_type > inline void StopwatchBase< clock_type >::print( const char* msg, timeunit_t timeunit, std::ostream& os ) const { @@ -264,7 +264,7 @@ StopwatchBase< clock_type >::print( const char* msg, timeunit_t timeunit, std::o #endif } -template < size_t clock_type > +template < clockid_t clock_type > inline size_t StopwatchBase< clock_type >::get_current_time() { @@ -274,13 +274,13 @@ StopwatchBase< clock_type >::get_current_time() return now.tv_nsec + now.tv_sec * timeunit_t::SECONDS; } -/*template< size_t clock_type > +template< clockid_t clock_type > inline std::ostream& operator<<( std::ostream& os, const StopwatchBase< clock_type >& stopwatch ) { stopwatch.print( "", timeunit_t::SECONDS, os ); return os; -}*/ +} } From 80bc6edc098e110e8087607664f329d3781aa015 Mon Sep 17 00:00:00 2001 From: Jan Vogelsang Date: Tue, 17 Dec 2024 14:57:51 +0100 Subject: [PATCH 11/11] Merged master and fixed formatting --- nestkernel/stopwatch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nestkernel/stopwatch.h b/nestkernel/stopwatch.h index ffed7aa57c..786a7d6ceb 100644 --- a/nestkernel/stopwatch.h +++ b/nestkernel/stopwatch.h @@ -274,7 +274,7 @@ StopwatchBase< clock_type >::get_current_time() return now.tv_nsec + now.tv_sec * timeunit_t::SECONDS; } -template< clockid_t clock_type > +template < clockid_t clock_type > inline std::ostream& operator<<( std::ostream& os, const StopwatchBase< clock_type >& stopwatch ) {