From 05f862a02277c3df230b52ed266beb1df809a9fa Mon Sep 17 00:00:00 2001 From: Bill Chen Date: Fri, 2 Aug 2024 16:15:09 +0800 Subject: [PATCH] Fix positions for pausing/resuming perf counters. Signed-off-by: Bill Chen --- CMakeLists.txt | 2 +- collectors/perf.cpp | 66 +++++++++++++++++++++++++++------------------ collectors/perf.hpp | 11 ++++++++ interface.cpp | 17 +++++------- 4 files changed, 58 insertions(+), 38 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fb1713b..5a38710 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ set(SRC_ROOT ".") option(VULKAN_LAYER "Build Vulkan layer" OFF) option(BUILD_STATIC "Build static version" ON) -add_definitions(-std=c++11 -Wall -fno-strict-aliasing -ggdb -Wno-deprecated-declarations) +add_definitions(-std=c++11 -Wall -Werror -fno-strict-aliasing -ggdb -Wno-deprecated-declarations) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2") set(COLLECTOR_INCLUDES diff --git a/collectors/perf.cpp b/collectors/perf.cpp index a2cca30..47633b9 100644 --- a/collectors/perf.cpp +++ b/collectors/perf.cpp @@ -443,7 +443,44 @@ bool PerfCollector::collect(int64_t now) return true; } +bool PerfCollector::perf_counter_pause() { +#if defined(__aarch64__) + asm volatile("mrs %0, PMCNTENSET_EL0" : "=r" (PMCNTENSET_EL0_safe)); + // stop counters for arm64 + asm volatile("mrs %0, PMCR_EL0" : "=r" (PMCR_EL0_safe)); + asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe & 0xFFFFFFFFFFFFFFFE)); +#elif defined(__arm__) + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(PMCNTENSET_EL0_safe)); + // stop counters for arm32 + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(PMCR_EL0_safe)); + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe & 0xFFFFFFFE)); +#endif + return true; +} + +bool PerfCollector::perf_counter_resume() { +#if defined(__aarch64__) + // start counters for arm64 + asm volatile("msr PMCNTENSET_EL0, %0" : : "r" (PMCNTENSET_EL0_safe)); + asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe)); +#elif defined(__arm__) + // start counters for arm32 + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(PMCNTENSET_EL0_safe)); + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe)); +#endif + return true; +} + + bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id, int32_t flags) { +#if defined(__x86_64__) + if (!attempt_collect_scope_x64) { + attempt_collect_scope_x64 = true; + DBG_LOG("WARNING: Frequent invocation of collect_scope on x64 devices may introduce " + "significant overhead to the kernel perf counter data.\n"); + } +#endif + if (!perf_counter_pause()) return false; if (!mCollecting) return false; struct snapshot snap; if (flags & COLLECT_REPLAY_THREADS || flags & COLLECT_ALL_THREADS) @@ -482,10 +519,12 @@ bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id, int32_t f } } last_collect_scope_flags = flags; + if (!perf_counter_resume()) return false; return true; } bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags) { + if (!perf_counter_pause()) return false; if (!mCollecting) return false; if (last_collect_scope_flags != flags) { DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id); @@ -537,6 +576,7 @@ bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id, int32_t fl t.update_data_scope(func_id, snap_start, snap_stop); } } + if (!perf_counter_resume()) return false; return false; } @@ -749,21 +789,6 @@ struct snapshot event_context::collect(int64_t now) struct snapshot event_context::collect_scope(int64_t now, uint16_t func_id, bool stopping) { - -#if defined(__aarch64__) - // stop counters for arm64 - uint64_t PMCNTENSET_EL0_safe; - uint64_t PMCR_EL0_safe; - asm volatile("mrs %0, PMCR_EL0" : "=r" (PMCR_EL0_safe)); - asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe & 0xFFFFFFFFFFFFFFFE)); -#elif defined(__arm__) - // stop counters for arm32 - uint64_t PMCNTENSET_EL0_safe; - uint64_t PMCR_EL0_safe; - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(PMCR_EL0_safe)); - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe & 0xFFFFFFFE)); -#endif - if (stopping && last_snap_func_id != func_id) { DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id); } @@ -775,17 +800,6 @@ struct snapshot event_context::collect_scope(int64_t now, uint16_t func_id, bool last_snap_func_id = func_id; last_snap = snap; } - -#if defined(__aarch64__) - // start counters for arm64 - asm volatile("msr PMCNTENSET_EL0, %0" : : "r" (PMCNTENSET_EL0_safe)); - asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe)); -#elif defined(__arm__) - // start counters for arm32 - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(PMCNTENSET_EL0_safe)); - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe)); -#endif - return snap; } diff --git a/collectors/perf.hpp b/collectors/perf.hpp index df29639..db7e90a 100644 --- a/collectors/perf.hpp +++ b/collectors/perf.hpp @@ -130,6 +130,11 @@ class event_context int fd; // Record accumulated values for update_data_scope, where the index of the vector is the uint16_t func_id. std::vector scope_values; + + counter() { + scope_values.reserve(512); + scope_values.resize(512, 0); + } }; int group; @@ -160,6 +165,8 @@ class PerfCollector : public Collector /// Collector functions for perapi perf instrumentations. virtual bool collect_scope_start(int64_t now, uint16_t func_id, int32_t flags); virtual bool collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags); + bool perf_counter_pause(); + bool perf_counter_resume(); private: void create_perf_thread(); @@ -176,6 +183,10 @@ class PerfCollector : public Collector std::map> mClocks; // device_name -> clock_vector int last_collect_scope_flags = 0; + bool attempt_collect_scope_x64 = false; + uint64_t PMCNTENSET_EL0_safe = 0; + uint64_t PMCR_EL0_safe = 0; + struct perf_thread { perf_thread(const int tid, const std::string &name): tid(tid), name(name), eventCtx{} {} diff --git a/interface.cpp b/interface.cpp index 6151e13..ca26000 100644 --- a/interface.cpp +++ b/interface.cpp @@ -451,8 +451,9 @@ void Collection::collect(std::vector custom) } void Collection::collect_scope_start(uint16_t label, int32_t flags) { - const int64_t now = getTime(); - mScopeStartTime = now; + // Not getting the current time as it introduces huge kernel cycle overhead to the perf collector. + const int64_t now = 0; + // mScopeStartTime = now; for (Collector* c : mRunning) { if (!c->isThreaded()) @@ -460,17 +461,12 @@ void Collection::collect_scope_start(uint16_t label, int32_t flags) { c->collect_scope_start(now, label, flags); } } - mScopeStarted = true; } void Collection::collect_scope_stop(uint16_t label, int32_t flags) { - // A collect_scope_start and collect_scope_end pair is considered as one sample. - if (!mScopeStarted) { - DBG_LOG("WARNING: collect_scope_stop called without a corresponding collect_scope_start.\n"); - return; - } - const int64_t now = getTime(); - // Timing is ignored to avoid extreme large json outputs. + // Not getting the current time as it introduces huge kernel cycle overhead to the perf collector. + const int64_t now = 0; + // Timing is not enabled to avoid extreme large json outputs. // mTiming.push_back(now - mScopeStartTime); for (Collector* c : mRunning) { @@ -479,7 +475,6 @@ void Collection::collect_scope_stop(uint16_t label, int32_t flags) { c->collect_scope_stop(now, label, flags); } } - mScopeStarted = false; } Json::Value Collection::results()