diff --git a/collectors/collector_utility.hpp b/collectors/collector_utility.hpp index ed14d04..6411afb 100644 --- a/collectors/collector_utility.hpp +++ b/collectors/collector_utility.hpp @@ -2,7 +2,6 @@ // This module includes utilities for writing collectors -#include #include #include "interface.hpp" @@ -16,7 +15,6 @@ std::string getMidgardInstrOutputPath(); // Hack to workaround strange missing support for std::to_string in Android #ifdef __ANDROID__ -#include #include template diff --git a/collectors/perf.cpp b/collectors/perf.cpp index 0c4894c..38b78de 100644 --- a/collectors/perf.cpp +++ b/collectors/perf.cpp @@ -1,23 +1,18 @@ #include "perf.hpp" #include -#include #include #include -#include +#include +#include +#include +#include #include -#include #if !defined(ANDROID) #include #else #include "perf_event.h" #endif -#include -#include -#include -#include -#include -#include static std::map> EVENTS = { {0, { {"CPUInstructionRetired", PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, false, false, hw_cnt_length::b32, false}, @@ -79,8 +74,22 @@ static inline uint64_t makeup_booker_ci_config(int nodetype, int eventid, int by return config; } -PerfCollector::PerfCollector(const Json::Value& config, const std::string& name) : Collector(config, name) +PerfCollector::PerfCollector(const Json::Value& config, const std::string& name, bool enablePerapiPerf) : Collector(config, name) { +mEnablePerapiPerf = enablePerapiPerf; +// libcollector doesn't support any per api function on ANDROID platforms. +#if defined(ANDROID) || defined(__ANDROID__) + mEnablePerapiPerf = false; +#elif defined(__aarch64__) || defined(__arm__) + if (mEnablePerapiPerf) + { + volatile uint64_t pmcr_el0; + asm volatile("mrs %0, PMCR_EL0" : "=r"(pmcr_el0)); + pmu_counter_bits = ((pmcr_el0 & 0x80) == 0x80 ? 64 : 32); + DBG_LOG("pmu counter bits are: %u\n", pmu_counter_bits); + DBG_LOG("pmcr_el0 is: %lu\n", pmcr_el0); + } +#endif struct event leader = {"CPUCycleCount", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, false, false, hw_cnt_length::b32}; bool leaderOnce = true; @@ -88,6 +97,7 @@ PerfCollector::PerfCollector(const Json::Value& config, const std::string& name) mInherit = mConfig.get("inherit", 1).asInt(); leader.inherited = mInherit; + leader.cspmu = false; leader.device = "single"; if ((0 <= mSet) && (mSet <= 3)) @@ -206,7 +216,7 @@ PerfCollector::PerfCollector(const Json::Value& config, const std::string& name) } } - mAllThread = mConfig.get("allthread", true).asBool(); + mAllThread = mConfig.get("allthread", !mEnablePerapiPerf).asBool(); } static inline long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, @@ -249,9 +259,12 @@ static int add_event(const struct event &e, int tid, int cpu, int group = -1) bool PerfCollector::init() { create_perf_thread(); - for (perf_thread& t : mReplayThreads) { + if (mEnablePerapiPerf) + { + t.eventCtx.setEnablePerApi(); + } t.eventCtx.init(mEvents[t.device_name], t.tid, -1); } @@ -415,36 +428,7 @@ bool PerfCollector::collect(int64_t now) return true; } -bool PerfCollector::perf_counter_pause() { -#if defined(__aarch64__) - asm volatile("mrs %0, PMCNTENSET_EL0" : "=r" (PMCNTENSET_EL0_safe)); - // stop counters for arm64 - asm volatile("mrs %0, PMCR_EL0" : "=r" (PMCR_EL0_safe)); - asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe & 0xFFFFFFFFFFFFFFFE)); -#elif defined(__arm__) - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(PMCNTENSET_EL0_safe)); - // stop counters for arm32 - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(PMCR_EL0_safe)); - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe & 0xFFFFFFFE)); -#endif - return true; -} - -bool PerfCollector::perf_counter_resume() { -#if defined(__aarch64__) - // start counters for arm64 - asm volatile("msr PMCNTENSET_EL0, %0" : : "r" (PMCNTENSET_EL0_safe)); - asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe)); -#elif defined(__arm__) - // start counters for arm32 - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(PMCNTENSET_EL0_safe)); - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe)); -#endif - return true; -} - - -bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id, int32_t flags) { +bool PerfCollector::collect_scope_start(uint16_t func_id, int32_t flags, int tid) { #if defined(__x86_64__) if (!attempt_collect_scope_x64) { attempt_collect_scope_x64 = true; @@ -452,44 +436,23 @@ bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id, int32_t f "significant overhead to the kernel perf counter data.\n"); } #endif - if (!perf_counter_pause()) return false; if (!mCollecting) return false; struct snapshot snap; if (flags & COLLECT_REPLAY_THREADS || flags & COLLECT_ALL_THREADS) { - for (perf_thread &t : mReplayThreads) - { - t.eventCtx.collect_scope(now, func_id, false); - } - } - if (flags & COLLECT_BG_THREADS || flags & COLLECT_ALL_THREADS) - { - for (perf_thread &t : mBgThreads) + for (auto &thread: mReplayThreads) { - t.eventCtx.collect_scope(now, func_id, false); - } - } - if (flags & COLLECT_BOOKER_THREADS || flags & COLLECT_ALL_THREADS) - { - for (perf_thread &t : mBookerThread) - { - t.eventCtx.collect_scope(now, func_id, false); - } - } - if (flags & COLLECT_CSPMU_THREADS || flags & COLLECT_ALL_THREADS) - { - for (perf_thread &t : mCSPMUThreads) - { - t.eventCtx.collect_scope(now, func_id, false); - } + if (thread.tid == tid) + { + thread.eventCtx.collect_scope(func_id, false, get_pmu_bits()); + } + } } last_collect_scope_flags = flags; - if (!perf_counter_resume()) return false; return true; } -bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags) { - if (!perf_counter_pause()) return false; +bool PerfCollector::collect_scope_stop(uint16_t func_id, int32_t flags, int tid) { if (!mCollecting) return false; if (last_collect_scope_flags != flags) { DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id); @@ -498,42 +461,17 @@ bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id, int32_t fl struct snapshot snap_start, snap_stop; if (flags & COLLECT_REPLAY_THREADS || flags & COLLECT_ALL_THREADS) { - for (perf_thread &t : mReplayThreads) - { - snap_start = t.eventCtx.last_snap; - snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.update_data_scope(func_id, snap_start, snap_stop); - } - } - if (flags & COLLECT_BG_THREADS || flags & COLLECT_ALL_THREADS) - { - for (perf_thread &t : mBgThreads) - { - snap_start = t.eventCtx.last_snap; - snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.update_data_scope(func_id, snap_start, snap_stop); - } - } - if (flags & COLLECT_BOOKER_THREADS || flags & COLLECT_ALL_THREADS) - { - for (perf_thread &t : mBookerThread) + for (auto &thread: mReplayThreads) { - snap_start = t.eventCtx.last_snap; - snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.update_data_scope(func_id, snap_start, snap_stop); - } - } - if (flags & COLLECT_CSPMU_THREADS || flags & COLLECT_ALL_THREADS) - { - for (perf_thread &t : mCSPMUThreads) - { - snap_start = t.eventCtx.last_snap; - snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.update_data_scope(func_id, snap_start, snap_stop); + if (thread.tid == tid) + { + snap_start = thread.eventCtx.last_snap; + snap_stop = thread.eventCtx.collect_scope(func_id, true, get_pmu_bits()); + thread.update_data_scope(func_id, snap_start, snap_stop); + } } } - if (!perf_counter_resume()) return false; - return false; + return true; } bool PerfCollector::postprocess(const std::vector& timing) @@ -672,6 +610,15 @@ bool event_context::deinit() return true; } + +#define F_BIT_0 ((uint32_t)0x00000001) +#define F_BIT_2 ((uint32_t)0x00000004) +#define F_BIT_3 ((uint32_t)0x00000008) +#define CINSTRP_ARMV8_PMCR_E ((unsigned long long)F_BIT_0) /* Enable all counters */ +#define CINSTRP_ARMV8_PMCR_C ((unsigned long long)F_BIT_2) /* Cycle counter reset */ +#define CINSTRP_ARMV8_PMCR_R ((unsigned long long)F_BIT_3) /* Cycle counter reset */ + + bool event_context::start() { if (ioctl(group, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) @@ -684,6 +631,24 @@ bool event_context::start() perror("ioctl PERF_EVENT_IOC_ENABLE"); return false; } + +#if !defined(ANDROID) && !defined(__ANDROID__) + if (getEnablePerApi()) + { + volatile uint64_t el0_access = 0; +#if defined(__aarch64__) + asm volatile("mrs %0, PMUSERENR_EL0" : "=r"(el0_access)); +#elif defined(__arm__) + asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(el0_access)); +#endif + if ((el0_access & (CINSTRP_ARMV8_PMCR_E | CINSTRP_ARMV8_PMCR_C | CINSTRP_ARMV8_PMCR_R)) != (CINSTRP_ARMV8_PMCR_E | CINSTRP_ARMV8_PMCR_C | CINSTRP_ARMV8_PMCR_R)) + { + DBG_LOG("EL0 access to PMU is required! Please set the appropriate bits in PMUSERENR_EL0. Current settings: %08x\n", (uint32_t)el0_access); + exit(EXIT_FAILURE); + } + } +#endif + return true; } @@ -732,13 +697,25 @@ struct snapshot event_context::collect(int64_t now) return snap; } -struct snapshot event_context::collect_scope(int64_t now, uint16_t func_id, bool stopping) +struct snapshot event_context::collect_scope(uint16_t func_id, bool stopping, uint8_t pmu_bits) { if (stopping && last_snap_func_id != func_id) { DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id); + exit(EXIT_FAILURE); } struct snapshot snap; +#if defined(__aarch64__) + if (pmu_bits == 32) + { + asm volatile("mrs %0, PMCCNTR_EL0" : "=r"(snap.values[0])); + } + else + { + asm volatile("mrs %0, PMEVCNTR2_EL0" : "=r"(snap.values[0])); + } +#else if (read(group, &snap, sizeof(snap)) == -1) perror("read"); +#endif if (stopping) { last_snap_func_id = -1; } else { diff --git a/collectors/perf.hpp b/collectors/perf.hpp index 3d7496d..c0c2ba2 100644 --- a/collectors/perf.hpp +++ b/collectors/perf.hpp @@ -1,11 +1,13 @@ #pragma once #include "collector_utility.hpp" -#include "interface.hpp" +// #include "interface.hpp" #include #include #include #include +#include +#include enum hw_cnt_length { @@ -50,8 +52,8 @@ enum collect_scope_flags: int32_t struct snapshot { snapshot() : size(0) {} - uint64_t size; - long long values[8] = {0}; + unsigned long size; + unsigned long values[8] = {0}; }; struct event { @@ -72,6 +74,7 @@ class event_context public: event_context() { + mEnablePerapiPerf = false; group = -1; last_snap_func_id = -1; } @@ -82,14 +85,18 @@ class event_context bool start(); struct snapshot collect(int64_t now); - struct snapshot collect_scope(int64_t now, uint16_t func_id, bool stopping); + struct snapshot collect_scope(uint16_t func_id, bool stopping, uint8_t pmu_bits); // If not -1, then we are in the middle of collect_scope_start/stop. uint16_t last_snap_func_id; struct snapshot last_snap; + int group_core; bool stop(); bool deinit(); + int getGroup() { return group; }; + bool getEnablePerApi() { return mEnablePerapiPerf; }; + void setEnablePerApi() { mEnablePerapiPerf = true; }; inline void update_data(const struct snapshot &snap, CollectorValueResults &result) { @@ -97,26 +104,24 @@ class event_context result[mCounters[i].name].push_back(snap.values[i]); } - inline void update_data_scope(uint16_t func_id, bool is_calling, struct snapshot &snap_start, struct snapshot &snap_end, CollectorValueResults &result) + inline void update_data_scope(uint16_t func_id, struct snapshot &snap_start, struct snapshot &snap_end, CollectorValueResults &result) { if (!mValueResults) mValueResults = &result; - long long diff_acc = 0; + uint64_t diff_acc = 0; for (unsigned int i = 0; i < mCounters.size(); i++) { - long long diff = snap_end.values[i] - snap_start.values[i]; + uint64_t diff = snap_end.values[i] - snap_start.values[i]; if (mCounters[i].scope_values.size() <= func_id) { mCounters[i].scope_values.resize(std::min(func_id * 2 + 1, UINT16_MAX - 1), 0); } mCounters[i].scope_values[func_id] += diff; diff_acc += diff; } - if (diff_acc > 0 && is_calling) { + if (diff_acc > 0) { if (scope_num_calls.size() <= func_id) { scope_num_calls.resize(std::min(func_id * 2 + 1, UINT16_MAX - 1), 0); } scope_num_calls[func_id]++; - } - if (diff_acc > 0) { - if (scope_num_with_perf.size() <= func_id) { + if (scope_num_with_perf.size() <= func_id) { scope_num_with_perf.resize(std::min(func_id * 2 + 1, UINT16_MAX - 1), 0); } scope_num_with_perf[func_id]++; @@ -129,7 +134,7 @@ class event_context std::string name; int fd; // Record accumulated values for update_data_scope, where the index of the vector is the uint16_t func_id. - std::vector scope_values; + std::vector scope_values; counter() { scope_values.reserve(512); @@ -144,12 +149,13 @@ class event_context // Record number of scope calls that actually triggered the collect_scope (happen in 1 thread that calls the collection method) std::vector scope_num_calls; CollectorValueResults *mValueResults = nullptr; + bool mEnablePerapiPerf; }; class PerfCollector : public Collector { public: - PerfCollector(const Json::Value& config, const std::string& name); + PerfCollector(const Json::Value& config, const std::string& name, bool enablePerapiPerf = false); virtual bool init() override; virtual bool deinit() override; @@ -162,11 +168,11 @@ class PerfCollector : public Collector virtual bool postprocess(const std::vector& timing) override; virtual void summarize() override; + uint8_t get_pmu_bits() { return pmu_counter_bits; } + /// Collector functions for perapi perf instrumentations. - virtual bool collect_scope_start(int64_t now, uint16_t func_id, int32_t flags) override; - virtual bool collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags) override; - bool perf_counter_pause(); - bool perf_counter_resume(); + virtual bool collect_scope_start(uint16_t func_id, int32_t flags, int tid) override; + virtual bool collect_scope_stop(uint16_t func_id, int32_t flags, int tid) override; private: void create_perf_thread(); @@ -176,6 +182,8 @@ class PerfCollector : public Collector int mSet = -1; int mInherit = 1; bool mAllThread = true; + bool mEnablePerapiPerf = false; + uint8_t pmu_counter_bits; std::vector mBookerEvents; std::map> mEvents; std::map> mCSPMUEvents; @@ -197,8 +205,7 @@ class PerfCollector : public Collector void update_data_scope(uint16_t func_id, struct snapshot& snap_start, struct snapshot& snap_end) { - pid_t cur_tid = syscall(SYS_gettid); - eventCtx.update_data_scope(func_id, cur_tid == tid, snap_start, snap_end, mResultsPerThread); + eventCtx.update_data_scope(func_id, snap_start, snap_end, mResultsPerThread); } void clear() @@ -217,16 +224,16 @@ class PerfCollector : public Collector v[pair.first] = Json::arrayValue; unsigned int index = 0; - int64_t total = 0; + uint64_t total = 0; for (const CollectorValue& cv : pair.second.data()) { - int64_t s = cv.i64; - if (need_sum) s += value[pair.first][index++].asInt64(); - v[pair.first].append((Json::Value::Int64)s); + uint64_t s = cv.u64; + if (need_sum) s += value[pair.first][index++].asUInt64(); + v[pair.first].append((Json::Value::UInt64)s); total += s; } value[pair.first] = v[pair.first]; - value["SUM"][pair.first] = (Json::Value::Int64)total; + value["SUM"][pair.first] = (Json::Value::UInt64)total; } } diff --git a/interface.cpp b/interface.cpp index ca26000..a91c696 100644 --- a/interface.cpp +++ b/interface.cpp @@ -4,9 +4,8 @@ #include #include #include -#include #include -#include +#include #ifndef __APPLE__ #include "collectors/perf.hpp" @@ -188,7 +187,7 @@ SysfsCollector::~SysfsCollector() // ---------- COLLECTION ---------- -Collection::Collection(const std::string& config_str) +Collection::Collection(const std::string& config_str, bool enablePerapiPerf) : mEnablePerapiPerf(enablePerapiPerf) { Json::Value jsonConfig; @@ -204,56 +203,66 @@ Collection::Collection(const std::string& config_str) init_from_json(jsonConfig); } -Collection::Collection(const Json::Value& config) : mConfig(config) +Collection::Collection(const Json::Value& config, bool enablePerapiPerf) : mEnablePerapiPerf(enablePerapiPerf), mConfig(config) { init_from_json(config); } void Collection::init_from_json(const Json::Value& config) { - #ifndef __APPLE__ - mCollectors.push_back(new PerfCollector(config, "perf")); - mCollectors.push_back(new SysfsCollector(config, "battery_temperature", - { "/sys/class/power_supply/battery/temp", - "/sys/devices/platform/android-battery/power_supply/android-battery/temp", // Nexus 10 - "/sys/class/power_supply/battery/batt_temp" })); // teclast tpad-1 - mCollectors.push_back(new CPUFreqCollector(config, "cpufreq")); - mCollectors.push_back(new SysfsCollector(config, "memfreq", - { "/sys/class/devfreq/exynos5-busfreq-mif/cur_freq", // note 3 - "/sys/class/devfreq/exynos5-devfreq-mif/cur_freq", // note 4 - "/sys/devices/17000010.devfreq_mif/devfreq/17000010.devfreq_mif/cur_freq" })); // Mali S7 - mCollectors.push_back(new SysfsCollector(config, "memfreqdisplay", - { "/sys/devices/17000030.devfreq_disp/devfreq/17000030.devfreq_disp/cur_freq" })); // Mali S7 - mCollectors.push_back(new SysfsCollector(config, "memfreqint", - { "/sys/class/devfreq/exynos5-busfreq-int/cur_freq", // note 3 - "/sys/class/devfreq/exynos5-devfreq-int/cur_freq", // note 4 - "/sys/devices/17000020.devfreq_int/devfreq/17000020.devfreq_int/cur_freq" })); // Mali S7 - mCollectors.push_back(new SysfsCollector(config, "gpu_active_time", - { "/sys/devices/platform/mali.0/power/runtime_active_time", // mali - "/sys/devices/platform/pvrsrvkm.0/power/runtime_active_time", // power-vr - "/sys/devices/virtual/graphics/fb0/power/runtime_active_time" }, // adreno - true)); // accumulative value - mCollectors.push_back(new SysfsCollector(config, "gpu_suspended_time", - { "/sys/devices/platform/mali.0/power/runtime_suspended_time", // mali - "/sys/devices/platform/pvrsrvkm.0/power/runtime_suspended_time", // power-vr - "/sys/devices/virtual/graphics/fb0/power/runtime_suspended_time" }, // adreno (but only for framebuffer zero!) - true)); // accumulative value - mCollectors.push_back(new SysfsCollector(config, "cpufreqtrans", - { "/sys/devices/system/cpu/cpu0/cpufreq/stats/total_trans" }, - true)); // accumulative value - if (config.isMember("debug") && config["debug"].asBool()) mDebug = true; +#ifndef __APPLE__ + if (mEnablePerapiPerf) + { + mCollectors.push_back(new PerfCollector(config, "perf", true)); + } + else + { + mCollectors.push_back(new PerfCollector(config, "perf")); + mCollectors.push_back(new SysfsCollector(config, "battery_temperature", + { "/sys/class/power_supply/battery/temp", + "/sys/devices/platform/android-battery/power_supply/android-battery/temp", // Nexus 10 + "/sys/class/power_supply/battery/batt_temp" })); // teclast tpad-1 + mCollectors.push_back(new CPUFreqCollector(config, "cpufreq")); + mCollectors.push_back(new SysfsCollector(config, "memfreq", + { "/sys/class/devfreq/exynos5-busfreq-mif/cur_freq", // note 3 + "/sys/class/devfreq/exynos5-devfreq-mif/cur_freq", // note 4 + "/sys/devices/17000010.devfreq_mif/devfreq/17000010.devfreq_mif/cur_freq" })); // Mali S7 + mCollectors.push_back(new SysfsCollector(config, "memfreqdisplay", + { "/sys/devices/17000030.devfreq_disp/devfreq/17000030.devfreq_disp/cur_freq" })); // Mali S7 + mCollectors.push_back(new SysfsCollector(config, "memfreqint", + { "/sys/class/devfreq/exynos5-busfreq-int/cur_freq", // note 3 + "/sys/class/devfreq/exynos5-devfreq-int/cur_freq", // note 4 + "/sys/devices/17000020.devfreq_int/devfreq/17000020.devfreq_int/cur_freq" })); // Mali S7 + mCollectors.push_back(new SysfsCollector(config, "gpu_active_time", + { "/sys/devices/platform/mali.0/power/runtime_active_time", // mali + "/sys/devices/platform/pvrsrvkm.0/power/runtime_active_time", // power-vr + "/sys/devices/virtual/graphics/fb0/power/runtime_active_time" }, // adreno + true)); // accumulative value + mCollectors.push_back(new SysfsCollector(config, "gpu_suspended_time", + { "/sys/devices/platform/mali.0/power/runtime_suspended_time", // mali + "/sys/devices/platform/pvrsrvkm.0/power/runtime_suspended_time", // power-vr + "/sys/devices/virtual/graphics/fb0/power/runtime_suspended_time" }, // adreno (but only for framebuffer zero!) + true)); // accumulative value + mCollectors.push_back(new SysfsCollector(config, "cpufreqtrans", + { "/sys/devices/system/cpu/cpu0/cpufreq/stats/total_trans" }, + true)); // accumulative value + if (config.isMember("debug") && config["debug"].asBool()) mDebug = true; #if defined(ANDROID) || defined(__ANDROID__) - mCollectors.push_back(new StreamlineCollector(config, "streamline")); + mCollectors.push_back(new StreamlineCollector(config, "streamline")); #endif - mCollectors.push_back(new MemoryCollector(config, "memory")); - mCollectors.push_back(new CPUTemperatureCollector(config, "cputemp")); - mCollectors.push_back(new GPUFreqCollector(config, "gpufreq")); - mCollectors.push_back(new PowerDataCollector(config, "power")); - mCollectors.push_back(new FerretCollector(config, "ferret")); - mCollectors.push_back(new ProcFSStatCollector(config, "procfs")); - mCollectors.push_back(new MaliCounterCollector(config, "malicounters")); + mCollectors.push_back(new MemoryCollector(config, "memory")); + mCollectors.push_back(new CPUTemperatureCollector(config, "cputemp")); + mCollectors.push_back(new GPUFreqCollector(config, "gpufreq")); + mCollectors.push_back(new PowerDataCollector(config, "power")); + mCollectors.push_back(new FerretCollector(config, "ferret")); + mCollectors.push_back(new ProcFSStatCollector(config, "procfs")); + mCollectors.push_back(new MaliCounterCollector(config, "malicounters")); + // Various specializations + mCollectorMap["battery_temperature"]->doubleTransform(0.1); // divide by 10 and store as float + } #endif - mCollectors.push_back(new RusageCollector(config, "rusage")); + if (!mEnablePerapiPerf) + mCollectors.push_back(new RusageCollector(config, "rusage")); for (Collector* c : mCollectors) { @@ -264,9 +273,6 @@ void Collection::init_from_json(const Json::Value& config) } mCollectorMap[c->name()] = c; } - - // Various specializations - mCollectorMap["battery_temperature"]->doubleTransform(0.1); // divide by 10 and store as float } Collection::~Collection() @@ -388,7 +394,14 @@ void Collection::start(const std::vector& headers) if (c->isThreaded()) { c->finished = false; - c->thread = std::thread(&Collector::loop, c); + if (mEnablePerapiPerf) + { + c->thread = std::thread(std::function()); + } + else + { + c->thread = std::thread(&Collector::loop, c); + } int failure = pthread_setname_np( c->thread.native_handle(), c->name().c_str()); @@ -450,29 +463,26 @@ void Collection::collect(std::vector custom) } } -void Collection::collect_scope_start(uint16_t label, int32_t flags) { +void Collection::collect_scope_start(uint16_t label, int32_t flags, int tid) { // Not getting the current time as it introduces huge kernel cycle overhead to the perf collector. - const int64_t now = 0; - // mScopeStartTime = now; for (Collector* c : mRunning) { if (!c->isThreaded()) { - c->collect_scope_start(now, label, flags); + c->collect_scope_start(label, flags, tid); } } } -void Collection::collect_scope_stop(uint16_t label, int32_t flags) { +void Collection::collect_scope_stop(uint16_t label, int32_t flags, int tid) { // Not getting the current time as it introduces huge kernel cycle overhead to the perf collector. - const int64_t now = 0; // Timing is not enabled to avoid extreme large json outputs. // mTiming.push_back(now - mScopeStartTime); for (Collector* c : mRunning) { if (!c->isThreaded()) { - c->collect_scope_stop(now, label, flags); + c->collect_scope_stop(label, flags, tid); } } } diff --git a/interface.hpp b/interface.hpp index dce31cb..d7d6697 100644 --- a/interface.hpp +++ b/interface.hpp @@ -2,7 +2,6 @@ #include #include -#include #include #include #include @@ -90,8 +89,8 @@ class Collector virtual bool stop() { mCollecting = false; return true; } virtual bool postprocess(const std::vector& timing); virtual bool collect( int64_t ) = 0; - virtual bool collect_scope_start( int64_t now, uint16_t func_id, int flags ) {return true; }; - virtual bool collect_scope_stop( int64_t now, uint16_t func_id, int flags ) { return true; }; + virtual bool collect_scope_start( uint16_t func_id, int flags, int tid) {return true; }; + virtual bool collect_scope_stop( uint16_t func_id, int flags, int tid) { return true; }; virtual bool collecting() const { return mCollecting; } virtual const std::string& name() const { return mName; } virtual bool available() = 0; @@ -187,8 +186,8 @@ class SysfsCollector : public Collector class Collection { public: - Collection(const std::string& config_str); - Collection(const Json::Value& config); + Collection(const std::string& config_str, bool enablePerapiPerf = false); + Collection(const Json::Value& config, bool enablePerapiPerf = false); ~Collection(); /// Return a list of functional collectors for this platform. @@ -256,11 +255,11 @@ class Collection /// Sample periodical data for per API instrumentation. Call this method before the payload /// execution. Currently only used for perf collector. - void collect_scope_start(uint16_t label, int32_t flags); + void collect_scope_start(uint16_t label, int32_t flags, int tid); /// Sample periodical data for per API instrumentation. Call this method after the payload /// execution. Currently only used for perf collector. - void collect_scope_stop(uint16_t label, int32_t flags); + void collect_scope_stop(uint16_t label, int32_t flags, int tid); /// Get the results as JSON Json::Value results(); @@ -271,6 +270,7 @@ class Collection void init_from_json(const Json::Value& config); bool running = false; + bool mEnablePerapiPerf = false; Json::Value mConfig; std::vector mCollectors; std::vector mRunning; diff --git a/test.cpp b/test.cpp index dc4220d..5c56a2d 100644 --- a/test.cpp +++ b/test.cpp @@ -264,108 +264,90 @@ static void test7() c.writeCSV("excel.csv"); } -class Test8 { +class Test8 +{ public: + Test8() : test8_ready(false) {} - Test8() : test8_ready(false) {} - - ~Test8() { - delete c; - } - - void run() { - printf("[test 8]: Testing collect_scope for the perf collector...\n"); - std::vector threads; + ~Test8() + { + delete c; + } - // Specification: - // https://github.com/ARM-software/patrace/blob/master/patrace/doc/manual.md#generating-cpu-load-with-perf-collector - std::string collectorConfig = R"( + void run() { - "perf": { - "set": 4, - "event": [ - { - "name": "CPUCyclesUser", - "type": 4, - "config": 17, - "excludeKernel": true - }, - { - "name": "CPUCyclesKernel", - "type": 4, - "config": 17, - "excludeUser": true - }, - { - "name": "CPUInstructionUser", - "type": 4, - "config": 8, - "excludeKernel": true - }, - { - "name": "CPUInstructionKernel", - "type": 4, - "config": 8, - "excludeUser": true - } - ], - } - })"; - Json::Value config; - std::stringstream(collectorConfig) >> config; + printf("[test 8]: Testing collect_scope for the perf collector...\n"); + std::vector threads; + + // Specification: + // https://github.com/ARM-software/patrace/blob/master/patrace/doc/manual.md#generating-cpu-load-with-perf-collector + std::string collectorConfig = R"( + { + "perf": { + "set": 4, + "inherit": 0, + "events": [ + { + "name": "CPUCycleCount", + "device": "armv8_pmuv3", + "counterLen64bit": 1, + "config": 17 + } + ] + } + })"; + Json::Value config; + std::stringstream(collectorConfig) >> config; - threads.emplace_back(&Test8::test8_worker, this, "patrace-1", 1000, 0); - threads.emplace_back(&Test8::test8_worker, this, "patrace-2", 1000, 1); - threads.emplace_back(&Test8::test8_worker, this, "mali-1", 100, 2); - threads.emplace_back(&Test8::test8_worker, this, "mali-2", 100, 3); + threads.emplace_back(&Test8::test8_worker, this, "patrace-1", 1000, 0); + threads.emplace_back(&Test8::test8_worker, this, "patrace-2", 1000, 1); - c = new Collection(config); - c->initialize(); - c->start(); - test8_ready.store(true); - test8_cv.notify_all(); - for (auto &t : threads) - t.join(); - c->stop(); + c = new Collection(config, true); + c->initialize(); + c->start(); + test8_ready.store(true); + test8_cv.notify_all(); + for (auto &t : threads) + t.join(); + c->stop(); - Json::Value results = c->results(); - Json::StyledWriter writer; - std::string data = writer.write(results); - printf("Results:\n%s", data.c_str()); - c->writeJSON("results_collect_scope.json"); - } + Json::Value results = c->results(); + Json::StyledWriter writer; + std::string data = writer.write(results); + printf("Results:\n%s", data.c_str()); + c->writeJSON("results_collect_scope.json"); + } private: - void test8_worker(std::string const &thread_name, int ops, int scope_label_offset) { - prctl(PR_SET_NAME, (unsigned long)thread_name.c_str(), 0, 0, 0); - std::unique_lock lk(test8_mtx); - test8_cv.wait(lk, [this] { return test8_ready.load(); }); - printf("Thread %s started.\n", thread_name.c_str()); + void test8_worker(std::string const &thread_name, int ops, int scope_label_offset) + { + prctl(PR_SET_NAME, (unsigned long)thread_name.c_str(), 0, 0, 0); + std::unique_lock lk(test8_mtx); + test8_cv.wait(lk, [this] + { return test8_ready.load(); }); + printf("Thread %s started.\n", thread_name.c_str()); - auto payload = [](int ops) { - int tmp = 1; - for (int i = 0; i < ops; i++) - tmp *= rand(); - }; + auto payload = [](int ops) + { + int tmp = 1; + for (int i = 0; i < ops; i++) + tmp *= rand(); + }; - if (strncmp(thread_name.c_str(), "patrace", 7) == 0) { - c->collect_scope_start(0 + scope_label_offset, COLLECT_REPLAY_THREADS); - payload(1000); - c->collect_scope_stop(0 + scope_label_offset, COLLECT_REPLAY_THREADS); - } + if (strncmp(thread_name.c_str(), "patrace", 7) == 0) + { + c->collect_scope_start(0 + scope_label_offset, COLLECT_REPLAY_THREADS, scope_label_offset + 1); + payload(1000); + c->collect_scope_stop(0 + scope_label_offset, COLLECT_REPLAY_THREADS, scope_label_offset + 1); + } - if (strncmp(thread_name.c_str(), "mali", 4) == 0) { - c->collect_scope_start(1 + scope_label_offset, COLLECT_BG_THREADS); - payload(1000); - c->collect_scope_stop(1 + scope_label_offset, COLLECT_BG_THREADS); - } - printf("Thread %s finished.\n", thread_name.c_str()); - } + printf("Thread %s finished.\n", thread_name.c_str()); + } - Collection *c; - std::atomic test8_ready; - std::condition_variable test8_cv; - std::mutex test8_mtx; + Collection *c; + std::atomic test8_ready; + std::condition_variable test8_cv; + std::mutex test8_mtx; }; int main()