From 848ad731a7f8af039752d6762c3f08e9aa3f7a22 Mon Sep 17 00:00:00 2001 From: Ke Xiang Date: Wed, 18 Dec 2024 10:34:04 +0100 Subject: [PATCH] Fix multiple pmu support 1. unify the single and multiple pmu event struct, as well as the thread struct. 2. remove the default group event from multiple pmu --- collectors/perf.cpp | 223 ++++++++++++++++---------------------------- collectors/perf.hpp | 18 ++-- 2 files changed, 88 insertions(+), 153 deletions(-) diff --git a/collectors/perf.cpp b/collectors/perf.cpp index 47633b9..0c4894c 100644 --- a/collectors/perf.cpp +++ b/collectors/perf.cpp @@ -81,19 +81,25 @@ static inline uint64_t makeup_booker_ci_config(int nodetype, int eventid, int by PerfCollector::PerfCollector(const Json::Value& config, const std::string& name) : Collector(config, name) { - struct event leader = {"CPUCycleCount", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES}; + struct event leader = {"CPUCycleCount", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, false, false, hw_cnt_length::b32}; + bool leaderOnce = true; mSet = mConfig.get("set", -1).asInt(); mInherit = mConfig.get("inherit", 1).asInt(); leader.inherited = mInherit; - mEvents.push_back(leader); + leader.device = "single"; if ((0 <= mSet) && (mSet <= 3)) { DBG_LOG("Using reserved CPU counter set number %d, this will fail on non-ARM CPU's except set 0.\n", mSet); - for (const struct event& e : EVENTS[mSet]) - mEvents.push_back(e); + mEvents.emplace(leader.device, std::vector{leader}); + for (struct event& e : EVENTS[mSet]) + { + e.device = leader.device; + e.inherited = mInherit; + mEvents[leader.device].push_back(e); + } } else if (mConfig.isMember("event")) { @@ -106,7 +112,7 @@ PerfCollector::PerfCollector(const Json::Value& config, const std::string& name) if ( !item.isMember("name") || (!item.isMember("type")&&!item.isMember("device")) || !item.isMember("config") ) { - DBG_LOG("perf event does not specify name, tpye or config, skip this event!\n"); + DBG_LOG("perf event does not specify name, config, tpye or device, skip this event!\n"); continue; } e.name = item.get("name", "").asString(); @@ -119,33 +125,8 @@ PerfCollector::PerfCollector(const Json::Value& config, const std::string& name) e.device = item.get("device", "").asString(); e.inherited = mInherit; - if (e.booker_ci) - { // booker-ci counter - int eventid = item.get("config", 0).asInt(); - std::string type = item.get("nodetype", " ").asString(); - int nodetype = NodeTypes[type]; - int bynodeid = item.get("bynodeid", 0).asInt(); - - if (bynodeid) - { - Json::Value nodeIdArray = item["nodeid"]; - for (Json::ArrayIndex idx = 0; idx < nodeIdArray.size(); idx++) - { - struct event nodeEvent = e; - uint64_t nodeid = nodeIdArray[idx].asUInt64(); - nodeEvent.config = makeup_booker_ci_config(nodetype, eventid, 1, nodeid); - nodeEvent.name = item.get("name", "").asString() + "_node" + _to_string(nodeid); - mBookerEvents.push_back(nodeEvent); - } - } - else - { - e.config = makeup_booker_ci_config(nodetype, eventid); - mBookerEvents.push_back(e); - } - } - else if(e.device!="") - {//for d9000, CPU cores on different PMU + if(e.device!="") + { //for d9000, CPU cores on different PMU e.config = item.get("config", 0).asUInt64(); auto type_string = e.device; @@ -159,36 +140,68 @@ PerfCollector::PerfCollector(const Json::Value& config, const std::string& name) if (e.cspmu) { e.name = e.device+"_"+e.name; - if (mCSPMUEvents.count(e.type)) + if (mCSPMUEvents.count(e.device)) { - mCSPMUEvents[e.type].push_back(e); + mCSPMUEvents[e.device].push_back(e); } else { - mCSPMUEvents.emplace(e.type,std::vector{e}); + mCSPMUEvents.emplace(e.device, std::vector{e}); } } else { - if (mMultiPMUEvents.count(e.type)) + if (mEvents.count(e.device)) { - mMultiPMUEvents[e.type].push_back(e); + mEvents[e.device].push_back(e); } else { - mMultiPMUEvents.emplace(e.type,std::vector{e}); + mEvents.emplace(e.device, std::vector{e}); } } } else { - DBG_LOG("Error: wrong device name, could not find correspoding event type, event skipped\n"); + DBG_LOG("Error: wrong device name %s, could not find correspoding event type, event skipped\n", event_type_filename.c_str()); + } + } + else if (e.booker_ci) + { // booker-ci counter + int eventid = item.get("config", 0).asInt(); + std::string type = item.get("nodetype", " ").asString(); + int nodetype = NodeTypes[type]; + int bynodeid = item.get("bynodeid", 0).asInt(); + + if (bynodeid) + { + Json::Value nodeIdArray = item["nodeid"]; + for (Json::ArrayIndex idx = 0; idx < nodeIdArray.size(); idx++) + { + struct event nodeEvent = e; + uint64_t nodeid = nodeIdArray[idx].asUInt64(); + nodeEvent.config = makeup_booker_ci_config(nodetype, eventid, 1, nodeid); + nodeEvent.name = item.get("name", "").asString() + "_node" + _to_string(nodeid); + mBookerEvents.push_back(nodeEvent); + } + } + else + { + e.config = makeup_booker_ci_config(nodetype, eventid); + mBookerEvents.push_back(e); } } else { + if (leaderOnce) + { + leader.len = e.len; + mEvents.emplace(leader.device, std::vector{leader}); + leaderOnce = false; + } + e.device = leader.device; e.config = item.get("config", 0).asUInt64(); - mEvents.push_back(e); + mEvents[e.device].push_back(e); } } } @@ -235,40 +248,20 @@ static int add_event(const struct event &e, int tid, int cpu, int group = -1) bool PerfCollector::init() { - if (mEvents.size() == 0) - { - DBG_LOG("None perf event counter.\n"); - return false; - } - create_perf_thread(); - int i=0; + for (perf_thread& t : mReplayThreads) { - t.eventCtx.init(mEvents, t.tid, -1); - for (auto& et : mMultiPMUEvents) - { - mMultiPMUThreads[i].eventCtx.init(et.second, mMultiPMUThreads[i].tid, -1); - i++; - } + t.eventCtx.init(mEvents[t.device_name], t.tid, -1); } for (perf_thread& t : mBgThreads) { - t.eventCtx.init(mEvents, t.tid, -1); - for (auto& et : mMultiPMUEvents) - { - mMultiPMUThreads[i].eventCtx.init(et.second, mMultiPMUThreads[i].tid, -1); - i++; - } + t.eventCtx.init(mEvents[t.device_name], t.tid, -1); } - int n = 0; - for (auto& iter : mCSPMUEvents) - { - mCSPMUThreads[n].eventCtx.init(iter.second, -1, 0); - n++; - } + for (perf_thread& t: mCSPMUThreads) + t.eventCtx.init(mCSPMUEvents[t.device_name], -1, 0); for (perf_thread& t : mBookerThread) t.eventCtx.init(mBookerEvents, -1, 0); @@ -290,12 +283,6 @@ bool PerfCollector::deinit() t.clear(); } - for (perf_thread& t : mMultiPMUThreads) - { - t.eventCtx.deinit(); - t.clear(); - } - for (perf_thread& t : mBookerThread) { t.eventCtx.deinit(); @@ -308,14 +295,14 @@ bool PerfCollector::deinit() t.clear(); } - mEvents.clear(); mBookerEvents.clear(); - for (auto& et : mMultiPMUEvents) et.second.clear(); + for (auto& et : mEvents) et.second.clear(); for (auto& et : mCSPMUEvents) et.second.clear(); + mEvents.clear(); + mCSPMUEvents.clear(); mReplayThreads.clear(); mBgThreads.clear(); - mMultiPMUThreads.clear(); mBookerThread.clear(); mCSPMUThreads.clear(); mClocks.clear(); @@ -338,20 +325,16 @@ bool PerfCollector::start() if (!t.eventCtx.start()) return false; - for (perf_thread& t : mMultiPMUThreads) - if (!t.eventCtx.start()) - return false; - for (perf_thread& t: mBookerThread) if (!t.eventCtx.start()) return false; for (perf_thread& t: mCSPMUThreads) - { - if (!t.eventCtx.start()) - return false; - mClocks.emplace(t.device_name, std::vector{}); - } + { + if (!t.eventCtx.start()) + return false; + mClocks.emplace(t.device_name, std::vector{}); + } mCollecting = true; return true; } @@ -381,11 +364,6 @@ bool PerfCollector::stop() t.eventCtx.stop(); } - for (perf_thread& t : mMultiPMUThreads) - { - t.eventCtx.stop(); - } - for (perf_thread& t : mBookerThread) { t.eventCtx.stop(); @@ -418,12 +396,6 @@ bool PerfCollector::collect(int64_t now) t.update_data(snap); } - for (perf_thread& t : mMultiPMUThreads) - { - snap = t.eventCtx.collect(now); - t.update_data(snap); - } - for (perf_thread& t : mBookerThread) { snap = t.eventCtx.collect(now); @@ -497,13 +469,6 @@ bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id, int32_t f t.eventCtx.collect_scope(now, func_id, false); } } - if (flags & COLLECT_MULTI_PMU_THREADS || flags & COLLECT_ALL_THREADS) - { - for (perf_thread &t : mMultiPMUThreads) - { - t.eventCtx.collect_scope(now, func_id, false); - } - } if (flags & COLLECT_BOOKER_THREADS || flags & COLLECT_ALL_THREADS) { for (perf_thread &t : mBookerThread) @@ -549,15 +514,6 @@ bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id, int32_t fl t.update_data_scope(func_id, snap_start, snap_stop); } } - if (flags & COLLECT_MULTI_PMU_THREADS || flags & COLLECT_ALL_THREADS) - { - for (perf_thread &t : mMultiPMUThreads) - { - snap_start = t.eventCtx.last_snap; - snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.update_data_scope(func_id, snap_start, snap_stop); - } - } if (flags & COLLECT_BOOKER_THREADS || flags & COLLECT_ALL_THREADS) { for (perf_thread &t : mBookerThread) @@ -589,24 +545,22 @@ bool PerfCollector::postprocess(const std::vector& timing) Json::Value replayValue; replayValue["CCthread"] = "replayMainThreads"; - int i=0; + for (perf_thread& t : mReplayThreads) { Json::Value perf_threadValue; perf_threadValue["CCthread"] = t.name.c_str(); + if (strcmp(t.device_name.c_str(), "single")) // excluding the default "single" since it's a fake deviceName + perf_threadValue["device"] = t.device_name.c_str(); t.postprocess(perf_threadValue); t.postprocess(replayValue); - for (unsigned int j =0; j& timing) { Json::Value perf_threadValue; perf_threadValue["CCthread"] = t.name.c_str(); + if (strcmp(t.device_name.c_str(), "single")) + perf_threadValue["device"] = t.device_name.c_str(); t.postprocess(perf_threadValue); t.postprocess(bgValue); t.postprocess(allValue); - for (unsigned int j =0; j &events, int tid, int cpu) +bool event_context::init(const std::vector &events, int tid, int cpu) { struct counter grp; grp.fd = group = add_event(events[0], tid, cpu); @@ -827,12 +772,9 @@ void PerfCollector::create_perf_thread() if(!mCSPMUEvents.empty()) { - int i=0; - for (auto pair : mCSPMUEvents) + for (const auto &pair : mCSPMUEvents) { - mCSPMUThreads.emplace_back(getpid(), current_pName); - mCSPMUThreads[i].device_name = pair.second[0].device; - i++; + mCSPMUThreads.emplace_back(getpid(), current_pName, pair.first); } return; } @@ -850,19 +792,14 @@ void PerfCollector::create_perf_thread() std::string thread_name = getThreadName(tid); if (!strncmp(thread_name.c_str(), "patrace-", 8)) { - mReplayThreads.emplace_back(tid, thread_name); //each group of MultiPMUEvents have a thread - for (unsigned int i =0; i &events, int tid, int cpu); + bool init(const std::vector &events, const int tid, const int cpu); bool start(); struct snapshot collect(int64_t now); @@ -91,7 +91,7 @@ class event_context bool stop(); bool deinit(); - inline void update_data(struct snapshot &snap, CollectorValueResults &result) + inline void update_data(const struct snapshot &snap, CollectorValueResults &result) { for (unsigned int i = 0; i < mCounters.size(); i++) result[mCounters[i].name].push_back(snap.values[i]); @@ -163,8 +163,8 @@ class PerfCollector : public Collector virtual void summarize() override; /// Collector functions for perapi perf instrumentations. - virtual bool collect_scope_start(int64_t now, uint16_t func_id, int32_t flags); - virtual bool collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags); + virtual bool collect_scope_start(int64_t now, uint16_t func_id, int32_t flags) override; + virtual bool collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags) override; bool perf_counter_pause(); bool perf_counter_resume(); @@ -176,10 +176,9 @@ class PerfCollector : public Collector int mSet = -1; int mInherit = 1; bool mAllThread = true; - std::vector mEvents; std::vector mBookerEvents; - std::map> mMultiPMUEvents; - std::map> mCSPMUEvents; + std::map> mEvents; + std::map> mCSPMUEvents; std::map> mClocks; // device_name -> clock_vector int last_collect_scope_flags = 0; @@ -189,7 +188,7 @@ class PerfCollector : public Collector struct perf_thread { - perf_thread(const int tid, const std::string &name): tid(tid), name(name), eventCtx{} {} + perf_thread(const int tid, const std::string &name, const std::string &device_name=""): tid(tid), name(name), device_name(device_name), eventCtx{} {} void update_data(struct snapshot& snap) { @@ -241,14 +240,13 @@ class PerfCollector : public Collector const int tid; const std::string name; + const std::string device_name; event_context eventCtx; CollectorValueResults mResultsPerThread; - std::string device_name; }; std::vector mReplayThreads; std::vector mBgThreads; std::vector mBookerThread; - std::vector mMultiPMUThreads; std::vector mCSPMUThreads; };