From 845807501a966ba2aff65da782dc9d4225afdb31 Mon Sep 17 00:00:00 2001 From: Zhonghang Chi Date: Tue, 14 Jan 2025 11:17:29 +0800 Subject: [PATCH] refine per api function 1. Replace the reading syscall with getting event counter registers directly. 2. Support arm32 architecture event counter regiters. 3. Only collect the perf data for current threads. 4. Modify the test8 to enable our new per api function. 5. Forbid per api function on ANDROID platform. --- collectors/collector_utility.hpp | 2 - collectors/perf.cpp | 185 ++++++++++++++----------------- collectors/perf.hpp | 55 +++++---- interface.cpp | 120 +++++++++++--------- interface.hpp | 14 +-- test.cpp | 160 ++++++++++++-------------- 6 files changed, 255 insertions(+), 281 deletions(-) diff --git a/collectors/collector_utility.hpp b/collectors/collector_utility.hpp index ed14d04..6411afb 100644 --- a/collectors/collector_utility.hpp +++ b/collectors/collector_utility.hpp @@ -2,7 +2,6 @@ // This module includes utilities for writing collectors -#include #include #include "interface.hpp" @@ -16,7 +15,6 @@ std::string getMidgardInstrOutputPath(); // Hack to workaround strange missing support for std::to_string in Android #ifdef __ANDROID__ -#include #include template diff --git a/collectors/perf.cpp b/collectors/perf.cpp index 0c4894c..38b78de 100644 --- a/collectors/perf.cpp +++ b/collectors/perf.cpp @@ -1,23 +1,18 @@ #include "perf.hpp" #include -#include #include #include -#include +#include +#include +#include +#include #include -#include #if !defined(ANDROID) #include #else #include "perf_event.h" #endif -#include -#include -#include -#include -#include -#include static std::map> EVENTS = { {0, { {"CPUInstructionRetired", PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, false, false, hw_cnt_length::b32, false}, @@ -79,8 +74,22 @@ static inline uint64_t makeup_booker_ci_config(int nodetype, int eventid, int by return config; } -PerfCollector::PerfCollector(const Json::Value& config, const std::string& name) : Collector(config, name) +PerfCollector::PerfCollector(const Json::Value& config, const std::string& name, bool enablePerapiPerf) : Collector(config, name) { +mEnablePerapiPerf = enablePerapiPerf; +// libcollector doesn't support any per api function on ANDROID platforms. +#if defined(ANDROID) || defined(__ANDROID__) + mEnablePerapiPerf = false; +#elif defined(__aarch64__) || defined(__arm__) + if (mEnablePerapiPerf) + { + volatile uint64_t pmcr_el0; + asm volatile("mrs %0, PMCR_EL0" : "=r"(pmcr_el0)); + pmu_counter_bits = ((pmcr_el0 & 0x80) == 0x80 ? 64 : 32); + DBG_LOG("pmu counter bits are: %u\n", pmu_counter_bits); + DBG_LOG("pmcr_el0 is: %lu\n", pmcr_el0); + } +#endif struct event leader = {"CPUCycleCount", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, false, false, hw_cnt_length::b32}; bool leaderOnce = true; @@ -88,6 +97,7 @@ PerfCollector::PerfCollector(const Json::Value& config, const std::string& name) mInherit = mConfig.get("inherit", 1).asInt(); leader.inherited = mInherit; + leader.cspmu = false; leader.device = "single"; if ((0 <= mSet) && (mSet <= 3)) @@ -206,7 +216,7 @@ PerfCollector::PerfCollector(const Json::Value& config, const std::string& name) } } - mAllThread = mConfig.get("allthread", true).asBool(); + mAllThread = mConfig.get("allthread", !mEnablePerapiPerf).asBool(); } static inline long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, @@ -249,9 +259,12 @@ static int add_event(const struct event &e, int tid, int cpu, int group = -1) bool PerfCollector::init() { create_perf_thread(); - for (perf_thread& t : mReplayThreads) { + if (mEnablePerapiPerf) + { + t.eventCtx.setEnablePerApi(); + } t.eventCtx.init(mEvents[t.device_name], t.tid, -1); } @@ -415,36 +428,7 @@ bool PerfCollector::collect(int64_t now) return true; } -bool PerfCollector::perf_counter_pause() { -#if defined(__aarch64__) - asm volatile("mrs %0, PMCNTENSET_EL0" : "=r" (PMCNTENSET_EL0_safe)); - // stop counters for arm64 - asm volatile("mrs %0, PMCR_EL0" : "=r" (PMCR_EL0_safe)); - asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe & 0xFFFFFFFFFFFFFFFE)); -#elif defined(__arm__) - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(PMCNTENSET_EL0_safe)); - // stop counters for arm32 - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(PMCR_EL0_safe)); - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe & 0xFFFFFFFE)); -#endif - return true; -} - -bool PerfCollector::perf_counter_resume() { -#if defined(__aarch64__) - // start counters for arm64 - asm volatile("msr PMCNTENSET_EL0, %0" : : "r" (PMCNTENSET_EL0_safe)); - asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe)); -#elif defined(__arm__) - // start counters for arm32 - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(PMCNTENSET_EL0_safe)); - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe)); -#endif - return true; -} - - -bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id, int32_t flags) { +bool PerfCollector::collect_scope_start(uint16_t func_id, int32_t flags, int tid) { #if defined(__x86_64__) if (!attempt_collect_scope_x64) { attempt_collect_scope_x64 = true; @@ -452,44 +436,23 @@ bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id, int32_t f "significant overhead to the kernel perf counter data.\n"); } #endif - if (!perf_counter_pause()) return false; if (!mCollecting) return false; struct snapshot snap; if (flags & COLLECT_REPLAY_THREADS || flags & COLLECT_ALL_THREADS) { - for (perf_thread &t : mReplayThreads) - { - t.eventCtx.collect_scope(now, func_id, false); - } - } - if (flags & COLLECT_BG_THREADS || flags & COLLECT_ALL_THREADS) - { - for (perf_thread &t : mBgThreads) + for (auto &thread: mReplayThreads) { - t.eventCtx.collect_scope(now, func_id, false); - } - } - if (flags & COLLECT_BOOKER_THREADS || flags & COLLECT_ALL_THREADS) - { - for (perf_thread &t : mBookerThread) - { - t.eventCtx.collect_scope(now, func_id, false); - } - } - if (flags & COLLECT_CSPMU_THREADS || flags & COLLECT_ALL_THREADS) - { - for (perf_thread &t : mCSPMUThreads) - { - t.eventCtx.collect_scope(now, func_id, false); - } + if (thread.tid == tid) + { + thread.eventCtx.collect_scope(func_id, false, get_pmu_bits()); + } + } } last_collect_scope_flags = flags; - if (!perf_counter_resume()) return false; return true; } -bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags) { - if (!perf_counter_pause()) return false; +bool PerfCollector::collect_scope_stop(uint16_t func_id, int32_t flags, int tid) { if (!mCollecting) return false; if (last_collect_scope_flags != flags) { DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id); @@ -498,42 +461,17 @@ bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id, int32_t fl struct snapshot snap_start, snap_stop; if (flags & COLLECT_REPLAY_THREADS || flags & COLLECT_ALL_THREADS) { - for (perf_thread &t : mReplayThreads) - { - snap_start = t.eventCtx.last_snap; - snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.update_data_scope(func_id, snap_start, snap_stop); - } - } - if (flags & COLLECT_BG_THREADS || flags & COLLECT_ALL_THREADS) - { - for (perf_thread &t : mBgThreads) - { - snap_start = t.eventCtx.last_snap; - snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.update_data_scope(func_id, snap_start, snap_stop); - } - } - if (flags & COLLECT_BOOKER_THREADS || flags & COLLECT_ALL_THREADS) - { - for (perf_thread &t : mBookerThread) + for (auto &thread: mReplayThreads) { - snap_start = t.eventCtx.last_snap; - snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.update_data_scope(func_id, snap_start, snap_stop); - } - } - if (flags & COLLECT_CSPMU_THREADS || flags & COLLECT_ALL_THREADS) - { - for (perf_thread &t : mCSPMUThreads) - { - snap_start = t.eventCtx.last_snap; - snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.update_data_scope(func_id, snap_start, snap_stop); + if (thread.tid == tid) + { + snap_start = thread.eventCtx.last_snap; + snap_stop = thread.eventCtx.collect_scope(func_id, true, get_pmu_bits()); + thread.update_data_scope(func_id, snap_start, snap_stop); + } } } - if (!perf_counter_resume()) return false; - return false; + return true; } bool PerfCollector::postprocess(const std::vector& timing) @@ -672,6 +610,15 @@ bool event_context::deinit() return true; } + +#define F_BIT_0 ((uint32_t)0x00000001) +#define F_BIT_2 ((uint32_t)0x00000004) +#define F_BIT_3 ((uint32_t)0x00000008) +#define CINSTRP_ARMV8_PMCR_E ((unsigned long long)F_BIT_0) /* Enable all counters */ +#define CINSTRP_ARMV8_PMCR_C ((unsigned long long)F_BIT_2) /* Cycle counter reset */ +#define CINSTRP_ARMV8_PMCR_R ((unsigned long long)F_BIT_3) /* Cycle counter reset */ + + bool event_context::start() { if (ioctl(group, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) @@ -684,6 +631,24 @@ bool event_context::start() perror("ioctl PERF_EVENT_IOC_ENABLE"); return false; } + +#if !defined(ANDROID) && !defined(__ANDROID__) + if (getEnablePerApi()) + { + volatile uint64_t el0_access = 0; +#if defined(__aarch64__) + asm volatile("mrs %0, PMUSERENR_EL0" : "=r"(el0_access)); +#elif defined(__arm__) + asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(el0_access)); +#endif + if ((el0_access & (CINSTRP_ARMV8_PMCR_E | CINSTRP_ARMV8_PMCR_C | CINSTRP_ARMV8_PMCR_R)) != (CINSTRP_ARMV8_PMCR_E | CINSTRP_ARMV8_PMCR_C | CINSTRP_ARMV8_PMCR_R)) + { + DBG_LOG("EL0 access to PMU is required! Please set the appropriate bits in PMUSERENR_EL0. Current settings: %08x\n", (uint32_t)el0_access); + exit(EXIT_FAILURE); + } + } +#endif + return true; } @@ -732,13 +697,25 @@ struct snapshot event_context::collect(int64_t now) return snap; } -struct snapshot event_context::collect_scope(int64_t now, uint16_t func_id, bool stopping) +struct snapshot event_context::collect_scope(uint16_t func_id, bool stopping, uint8_t pmu_bits) { if (stopping && last_snap_func_id != func_id) { DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id); + exit(EXIT_FAILURE); } struct snapshot snap; +#if defined(__aarch64__) + if (pmu_bits == 32) + { + asm volatile("mrs %0, PMCCNTR_EL0" : "=r"(snap.values[0])); + } + else + { + asm volatile("mrs %0, PMEVCNTR2_EL0" : "=r"(snap.values[0])); + } +#else if (read(group, &snap, sizeof(snap)) == -1) perror("read"); +#endif if (stopping) { last_snap_func_id = -1; } else { diff --git a/collectors/perf.hpp b/collectors/perf.hpp index 3d7496d..c0c2ba2 100644 --- a/collectors/perf.hpp +++ b/collectors/perf.hpp @@ -1,11 +1,13 @@ #pragma once #include "collector_utility.hpp" -#include "interface.hpp" +// #include "interface.hpp" #include #include #include #include +#include +#include enum hw_cnt_length { @@ -50,8 +52,8 @@ enum collect_scope_flags: int32_t struct snapshot { snapshot() : size(0) {} - uint64_t size; - long long values[8] = {0}; + unsigned long size; + unsigned long values[8] = {0}; }; struct event { @@ -72,6 +74,7 @@ class event_context public: event_context() { + mEnablePerapiPerf = false; group = -1; last_snap_func_id = -1; } @@ -82,14 +85,18 @@ class event_context bool start(); struct snapshot collect(int64_t now); - struct snapshot collect_scope(int64_t now, uint16_t func_id, bool stopping); + struct snapshot collect_scope(uint16_t func_id, bool stopping, uint8_t pmu_bits); // If not -1, then we are in the middle of collect_scope_start/stop. uint16_t last_snap_func_id; struct snapshot last_snap; + int group_core; bool stop(); bool deinit(); + int getGroup() { return group; }; + bool getEnablePerApi() { return mEnablePerapiPerf; }; + void setEnablePerApi() { mEnablePerapiPerf = true; }; inline void update_data(const struct snapshot &snap, CollectorValueResults &result) { @@ -97,26 +104,24 @@ class event_context result[mCounters[i].name].push_back(snap.values[i]); } - inline void update_data_scope(uint16_t func_id, bool is_calling, struct snapshot &snap_start, struct snapshot &snap_end, CollectorValueResults &result) + inline void update_data_scope(uint16_t func_id, struct snapshot &snap_start, struct snapshot &snap_end, CollectorValueResults &result) { if (!mValueResults) mValueResults = &result; - long long diff_acc = 0; + uint64_t diff_acc = 0; for (unsigned int i = 0; i < mCounters.size(); i++) { - long long diff = snap_end.values[i] - snap_start.values[i]; + uint64_t diff = snap_end.values[i] - snap_start.values[i]; if (mCounters[i].scope_values.size() <= func_id) { mCounters[i].scope_values.resize(std::min(func_id * 2 + 1, UINT16_MAX - 1), 0); } mCounters[i].scope_values[func_id] += diff; diff_acc += diff; } - if (diff_acc > 0 && is_calling) { + if (diff_acc > 0) { if (scope_num_calls.size() <= func_id) { scope_num_calls.resize(std::min(func_id * 2 + 1, UINT16_MAX - 1), 0); } scope_num_calls[func_id]++; - } - if (diff_acc > 0) { - if (scope_num_with_perf.size() <= func_id) { + if (scope_num_with_perf.size() <= func_id) { scope_num_with_perf.resize(std::min(func_id * 2 + 1, UINT16_MAX - 1), 0); } scope_num_with_perf[func_id]++; @@ -129,7 +134,7 @@ class event_context std::string name; int fd; // Record accumulated values for update_data_scope, where the index of the vector is the uint16_t func_id. - std::vector scope_values; + std::vector scope_values; counter() { scope_values.reserve(512); @@ -144,12 +149,13 @@ class event_context // Record number of scope calls that actually triggered the collect_scope (happen in 1 thread that calls the collection method) std::vector scope_num_calls; CollectorValueResults *mValueResults = nullptr; + bool mEnablePerapiPerf; }; class PerfCollector : public Collector { public: - PerfCollector(const Json::Value& config, const std::string& name); + PerfCollector(const Json::Value& config, const std::string& name, bool enablePerapiPerf = false); virtual bool init() override; virtual bool deinit() override; @@ -162,11 +168,11 @@ class PerfCollector : public Collector virtual bool postprocess(const std::vector& timing) override; virtual void summarize() override; + uint8_t get_pmu_bits() { return pmu_counter_bits; } + /// Collector functions for perapi perf instrumentations. - virtual bool collect_scope_start(int64_t now, uint16_t func_id, int32_t flags) override; - virtual bool collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags) override; - bool perf_counter_pause(); - bool perf_counter_resume(); + virtual bool collect_scope_start(uint16_t func_id, int32_t flags, int tid) override; + virtual bool collect_scope_stop(uint16_t func_id, int32_t flags, int tid) override; private: void create_perf_thread(); @@ -176,6 +182,8 @@ class PerfCollector : public Collector int mSet = -1; int mInherit = 1; bool mAllThread = true; + bool mEnablePerapiPerf = false; + uint8_t pmu_counter_bits; std::vector mBookerEvents; std::map> mEvents; std::map> mCSPMUEvents; @@ -197,8 +205,7 @@ class PerfCollector : public Collector void update_data_scope(uint16_t func_id, struct snapshot& snap_start, struct snapshot& snap_end) { - pid_t cur_tid = syscall(SYS_gettid); - eventCtx.update_data_scope(func_id, cur_tid == tid, snap_start, snap_end, mResultsPerThread); + eventCtx.update_data_scope(func_id, snap_start, snap_end, mResultsPerThread); } void clear() @@ -217,16 +224,16 @@ class PerfCollector : public Collector v[pair.first] = Json::arrayValue; unsigned int index = 0; - int64_t total = 0; + uint64_t total = 0; for (const CollectorValue& cv : pair.second.data()) { - int64_t s = cv.i64; - if (need_sum) s += value[pair.first][index++].asInt64(); - v[pair.first].append((Json::Value::Int64)s); + uint64_t s = cv.u64; + if (need_sum) s += value[pair.first][index++].asUInt64(); + v[pair.first].append((Json::Value::UInt64)s); total += s; } value[pair.first] = v[pair.first]; - value["SUM"][pair.first] = (Json::Value::Int64)total; + value["SUM"][pair.first] = (Json::Value::UInt64)total; } } diff --git a/interface.cpp b/interface.cpp index ca26000..a91c696 100644 --- a/interface.cpp +++ b/interface.cpp @@ -4,9 +4,8 @@ #include #include #include -#include #include -#include +#include #ifndef __APPLE__ #include "collectors/perf.hpp" @@ -188,7 +187,7 @@ SysfsCollector::~SysfsCollector() // ---------- COLLECTION ---------- -Collection::Collection(const std::string& config_str) +Collection::Collection(const std::string& config_str, bool enablePerapiPerf) : mEnablePerapiPerf(enablePerapiPerf) { Json::Value jsonConfig; @@ -204,56 +203,66 @@ Collection::Collection(const std::string& config_str) init_from_json(jsonConfig); } -Collection::Collection(const Json::Value& config) : mConfig(config) +Collection::Collection(const Json::Value& config, bool enablePerapiPerf) : mEnablePerapiPerf(enablePerapiPerf), mConfig(config) { init_from_json(config); } void Collection::init_from_json(const Json::Value& config) { - #ifndef __APPLE__ - mCollectors.push_back(new PerfCollector(config, "perf")); - mCollectors.push_back(new SysfsCollector(config, "battery_temperature", - { "/sys/class/power_supply/battery/temp", - "/sys/devices/platform/android-battery/power_supply/android-battery/temp", // Nexus 10 - "/sys/class/power_supply/battery/batt_temp" })); // teclast tpad-1 - mCollectors.push_back(new CPUFreqCollector(config, "cpufreq")); - mCollectors.push_back(new SysfsCollector(config, "memfreq", - { "/sys/class/devfreq/exynos5-busfreq-mif/cur_freq", // note 3 - "/sys/class/devfreq/exynos5-devfreq-mif/cur_freq", // note 4 - "/sys/devices/17000010.devfreq_mif/devfreq/17000010.devfreq_mif/cur_freq" })); // Mali S7 - mCollectors.push_back(new SysfsCollector(config, "memfreqdisplay", - { "/sys/devices/17000030.devfreq_disp/devfreq/17000030.devfreq_disp/cur_freq" })); // Mali S7 - mCollectors.push_back(new SysfsCollector(config, "memfreqint", - { "/sys/class/devfreq/exynos5-busfreq-int/cur_freq", // note 3 - "/sys/class/devfreq/exynos5-devfreq-int/cur_freq", // note 4 - "/sys/devices/17000020.devfreq_int/devfreq/17000020.devfreq_int/cur_freq" })); // Mali S7 - mCollectors.push_back(new SysfsCollector(config, "gpu_active_time", - { "/sys/devices/platform/mali.0/power/runtime_active_time", // mali - "/sys/devices/platform/pvrsrvkm.0/power/runtime_active_time", // power-vr - "/sys/devices/virtual/graphics/fb0/power/runtime_active_time" }, // adreno - true)); // accumulative value - mCollectors.push_back(new SysfsCollector(config, "gpu_suspended_time", - { "/sys/devices/platform/mali.0/power/runtime_suspended_time", // mali - "/sys/devices/platform/pvrsrvkm.0/power/runtime_suspended_time", // power-vr - "/sys/devices/virtual/graphics/fb0/power/runtime_suspended_time" }, // adreno (but only for framebuffer zero!) - true)); // accumulative value - mCollectors.push_back(new SysfsCollector(config, "cpufreqtrans", - { "/sys/devices/system/cpu/cpu0/cpufreq/stats/total_trans" }, - true)); // accumulative value - if (config.isMember("debug") && config["debug"].asBool()) mDebug = true; +#ifndef __APPLE__ + if (mEnablePerapiPerf) + { + mCollectors.push_back(new PerfCollector(config, "perf", true)); + } + else + { + mCollectors.push_back(new PerfCollector(config, "perf")); + mCollectors.push_back(new SysfsCollector(config, "battery_temperature", + { "/sys/class/power_supply/battery/temp", + "/sys/devices/platform/android-battery/power_supply/android-battery/temp", // Nexus 10 + "/sys/class/power_supply/battery/batt_temp" })); // teclast tpad-1 + mCollectors.push_back(new CPUFreqCollector(config, "cpufreq")); + mCollectors.push_back(new SysfsCollector(config, "memfreq", + { "/sys/class/devfreq/exynos5-busfreq-mif/cur_freq", // note 3 + "/sys/class/devfreq/exynos5-devfreq-mif/cur_freq", // note 4 + "/sys/devices/17000010.devfreq_mif/devfreq/17000010.devfreq_mif/cur_freq" })); // Mali S7 + mCollectors.push_back(new SysfsCollector(config, "memfreqdisplay", + { "/sys/devices/17000030.devfreq_disp/devfreq/17000030.devfreq_disp/cur_freq" })); // Mali S7 + mCollectors.push_back(new SysfsCollector(config, "memfreqint", + { "/sys/class/devfreq/exynos5-busfreq-int/cur_freq", // note 3 + "/sys/class/devfreq/exynos5-devfreq-int/cur_freq", // note 4 + "/sys/devices/17000020.devfreq_int/devfreq/17000020.devfreq_int/cur_freq" })); // Mali S7 + mCollectors.push_back(new SysfsCollector(config, "gpu_active_time", + { "/sys/devices/platform/mali.0/power/runtime_active_time", // mali + "/sys/devices/platform/pvrsrvkm.0/power/runtime_active_time", // power-vr + "/sys/devices/virtual/graphics/fb0/power/runtime_active_time" }, // adreno + true)); // accumulative value + mCollectors.push_back(new SysfsCollector(config, "gpu_suspended_time", + { "/sys/devices/platform/mali.0/power/runtime_suspended_time", // mali + "/sys/devices/platform/pvrsrvkm.0/power/runtime_suspended_time", // power-vr + "/sys/devices/virtual/graphics/fb0/power/runtime_suspended_time" }, // adreno (but only for framebuffer zero!) + true)); // accumulative value + mCollectors.push_back(new SysfsCollector(config, "cpufreqtrans", + { "/sys/devices/system/cpu/cpu0/cpufreq/stats/total_trans" }, + true)); // accumulative value + if (config.isMember("debug") && config["debug"].asBool()) mDebug = true; #if defined(ANDROID) || defined(__ANDROID__) - mCollectors.push_back(new StreamlineCollector(config, "streamline")); + mCollectors.push_back(new StreamlineCollector(config, "streamline")); #endif - mCollectors.push_back(new MemoryCollector(config, "memory")); - mCollectors.push_back(new CPUTemperatureCollector(config, "cputemp")); - mCollectors.push_back(new GPUFreqCollector(config, "gpufreq")); - mCollectors.push_back(new PowerDataCollector(config, "power")); - mCollectors.push_back(new FerretCollector(config, "ferret")); - mCollectors.push_back(new ProcFSStatCollector(config, "procfs")); - mCollectors.push_back(new MaliCounterCollector(config, "malicounters")); + mCollectors.push_back(new MemoryCollector(config, "memory")); + mCollectors.push_back(new CPUTemperatureCollector(config, "cputemp")); + mCollectors.push_back(new GPUFreqCollector(config, "gpufreq")); + mCollectors.push_back(new PowerDataCollector(config, "power")); + mCollectors.push_back(new FerretCollector(config, "ferret")); + mCollectors.push_back(new ProcFSStatCollector(config, "procfs")); + mCollectors.push_back(new MaliCounterCollector(config, "malicounters")); + // Various specializations + mCollectorMap["battery_temperature"]->doubleTransform(0.1); // divide by 10 and store as float + } #endif - mCollectors.push_back(new RusageCollector(config, "rusage")); + if (!mEnablePerapiPerf) + mCollectors.push_back(new RusageCollector(config, "rusage")); for (Collector* c : mCollectors) { @@ -264,9 +273,6 @@ void Collection::init_from_json(const Json::Value& config) } mCollectorMap[c->name()] = c; } - - // Various specializations - mCollectorMap["battery_temperature"]->doubleTransform(0.1); // divide by 10 and store as float } Collection::~Collection() @@ -388,7 +394,14 @@ void Collection::start(const std::vector& headers) if (c->isThreaded()) { c->finished = false; - c->thread = std::thread(&Collector::loop, c); + if (mEnablePerapiPerf) + { + c->thread = std::thread(std::function()); + } + else + { + c->thread = std::thread(&Collector::loop, c); + } int failure = pthread_setname_np( c->thread.native_handle(), c->name().c_str()); @@ -450,29 +463,26 @@ void Collection::collect(std::vector custom) } } -void Collection::collect_scope_start(uint16_t label, int32_t flags) { +void Collection::collect_scope_start(uint16_t label, int32_t flags, int tid) { // Not getting the current time as it introduces huge kernel cycle overhead to the perf collector. - const int64_t now = 0; - // mScopeStartTime = now; for (Collector* c : mRunning) { if (!c->isThreaded()) { - c->collect_scope_start(now, label, flags); + c->collect_scope_start(label, flags, tid); } } } -void Collection::collect_scope_stop(uint16_t label, int32_t flags) { +void Collection::collect_scope_stop(uint16_t label, int32_t flags, int tid) { // Not getting the current time as it introduces huge kernel cycle overhead to the perf collector. - const int64_t now = 0; // Timing is not enabled to avoid extreme large json outputs. // mTiming.push_back(now - mScopeStartTime); for (Collector* c : mRunning) { if (!c->isThreaded()) { - c->collect_scope_stop(now, label, flags); + c->collect_scope_stop(label, flags, tid); } } } diff --git a/interface.hpp b/interface.hpp index dce31cb..d7d6697 100644 --- a/interface.hpp +++ b/interface.hpp @@ -2,7 +2,6 @@ #include #include -#include #include #include #include @@ -90,8 +89,8 @@ class Collector virtual bool stop() { mCollecting = false; return true; } virtual bool postprocess(const std::vector& timing); virtual bool collect( int64_t ) = 0; - virtual bool collect_scope_start( int64_t now, uint16_t func_id, int flags ) {return true; }; - virtual bool collect_scope_stop( int64_t now, uint16_t func_id, int flags ) { return true; }; + virtual bool collect_scope_start( uint16_t func_id, int flags, int tid) {return true; }; + virtual bool collect_scope_stop( uint16_t func_id, int flags, int tid) { return true; }; virtual bool collecting() const { return mCollecting; } virtual const std::string& name() const { return mName; } virtual bool available() = 0; @@ -187,8 +186,8 @@ class SysfsCollector : public Collector class Collection { public: - Collection(const std::string& config_str); - Collection(const Json::Value& config); + Collection(const std::string& config_str, bool enablePerapiPerf = false); + Collection(const Json::Value& config, bool enablePerapiPerf = false); ~Collection(); /// Return a list of functional collectors for this platform. @@ -256,11 +255,11 @@ class Collection /// Sample periodical data for per API instrumentation. Call this method before the payload /// execution. Currently only used for perf collector. - void collect_scope_start(uint16_t label, int32_t flags); + void collect_scope_start(uint16_t label, int32_t flags, int tid); /// Sample periodical data for per API instrumentation. Call this method after the payload /// execution. Currently only used for perf collector. - void collect_scope_stop(uint16_t label, int32_t flags); + void collect_scope_stop(uint16_t label, int32_t flags, int tid); /// Get the results as JSON Json::Value results(); @@ -271,6 +270,7 @@ class Collection void init_from_json(const Json::Value& config); bool running = false; + bool mEnablePerapiPerf = false; Json::Value mConfig; std::vector mCollectors; std::vector mRunning; diff --git a/test.cpp b/test.cpp index dc4220d..5c56a2d 100644 --- a/test.cpp +++ b/test.cpp @@ -264,108 +264,90 @@ static void test7() c.writeCSV("excel.csv"); } -class Test8 { +class Test8 +{ public: + Test8() : test8_ready(false) {} - Test8() : test8_ready(false) {} - - ~Test8() { - delete c; - } - - void run() { - printf("[test 8]: Testing collect_scope for the perf collector...\n"); - std::vector threads; + ~Test8() + { + delete c; + } - // Specification: - // https://github.com/ARM-software/patrace/blob/master/patrace/doc/manual.md#generating-cpu-load-with-perf-collector - std::string collectorConfig = R"( + void run() { - "perf": { - "set": 4, - "event": [ - { - "name": "CPUCyclesUser", - "type": 4, - "config": 17, - "excludeKernel": true - }, - { - "name": "CPUCyclesKernel", - "type": 4, - "config": 17, - "excludeUser": true - }, - { - "name": "CPUInstructionUser", - "type": 4, - "config": 8, - "excludeKernel": true - }, - { - "name": "CPUInstructionKernel", - "type": 4, - "config": 8, - "excludeUser": true - } - ], - } - })"; - Json::Value config; - std::stringstream(collectorConfig) >> config; + printf("[test 8]: Testing collect_scope for the perf collector...\n"); + std::vector threads; + + // Specification: + // https://github.com/ARM-software/patrace/blob/master/patrace/doc/manual.md#generating-cpu-load-with-perf-collector + std::string collectorConfig = R"( + { + "perf": { + "set": 4, + "inherit": 0, + "events": [ + { + "name": "CPUCycleCount", + "device": "armv8_pmuv3", + "counterLen64bit": 1, + "config": 17 + } + ] + } + })"; + Json::Value config; + std::stringstream(collectorConfig) >> config; - threads.emplace_back(&Test8::test8_worker, this, "patrace-1", 1000, 0); - threads.emplace_back(&Test8::test8_worker, this, "patrace-2", 1000, 1); - threads.emplace_back(&Test8::test8_worker, this, "mali-1", 100, 2); - threads.emplace_back(&Test8::test8_worker, this, "mali-2", 100, 3); + threads.emplace_back(&Test8::test8_worker, this, "patrace-1", 1000, 0); + threads.emplace_back(&Test8::test8_worker, this, "patrace-2", 1000, 1); - c = new Collection(config); - c->initialize(); - c->start(); - test8_ready.store(true); - test8_cv.notify_all(); - for (auto &t : threads) - t.join(); - c->stop(); + c = new Collection(config, true); + c->initialize(); + c->start(); + test8_ready.store(true); + test8_cv.notify_all(); + for (auto &t : threads) + t.join(); + c->stop(); - Json::Value results = c->results(); - Json::StyledWriter writer; - std::string data = writer.write(results); - printf("Results:\n%s", data.c_str()); - c->writeJSON("results_collect_scope.json"); - } + Json::Value results = c->results(); + Json::StyledWriter writer; + std::string data = writer.write(results); + printf("Results:\n%s", data.c_str()); + c->writeJSON("results_collect_scope.json"); + } private: - void test8_worker(std::string const &thread_name, int ops, int scope_label_offset) { - prctl(PR_SET_NAME, (unsigned long)thread_name.c_str(), 0, 0, 0); - std::unique_lock lk(test8_mtx); - test8_cv.wait(lk, [this] { return test8_ready.load(); }); - printf("Thread %s started.\n", thread_name.c_str()); + void test8_worker(std::string const &thread_name, int ops, int scope_label_offset) + { + prctl(PR_SET_NAME, (unsigned long)thread_name.c_str(), 0, 0, 0); + std::unique_lock lk(test8_mtx); + test8_cv.wait(lk, [this] + { return test8_ready.load(); }); + printf("Thread %s started.\n", thread_name.c_str()); - auto payload = [](int ops) { - int tmp = 1; - for (int i = 0; i < ops; i++) - tmp *= rand(); - }; + auto payload = [](int ops) + { + int tmp = 1; + for (int i = 0; i < ops; i++) + tmp *= rand(); + }; - if (strncmp(thread_name.c_str(), "patrace", 7) == 0) { - c->collect_scope_start(0 + scope_label_offset, COLLECT_REPLAY_THREADS); - payload(1000); - c->collect_scope_stop(0 + scope_label_offset, COLLECT_REPLAY_THREADS); - } + if (strncmp(thread_name.c_str(), "patrace", 7) == 0) + { + c->collect_scope_start(0 + scope_label_offset, COLLECT_REPLAY_THREADS, scope_label_offset + 1); + payload(1000); + c->collect_scope_stop(0 + scope_label_offset, COLLECT_REPLAY_THREADS, scope_label_offset + 1); + } - if (strncmp(thread_name.c_str(), "mali", 4) == 0) { - c->collect_scope_start(1 + scope_label_offset, COLLECT_BG_THREADS); - payload(1000); - c->collect_scope_stop(1 + scope_label_offset, COLLECT_BG_THREADS); - } - printf("Thread %s finished.\n", thread_name.c_str()); - } + printf("Thread %s finished.\n", thread_name.c_str()); + } - Collection *c; - std::atomic test8_ready; - std::condition_variable test8_cv; - std::mutex test8_mtx; + Collection *c; + std::atomic test8_ready; + std::condition_variable test8_cv; + std::mutex test8_mtx; }; int main()