Skip to content

Commit

Permalink
Use inline asm for more accurate perf collection.
Browse files Browse the repository at this point in the history
Added flag parameter in collect_scope_start/stop to control what threads
to collect.

Signed-off-by: Bill Chen <bill.chen@arm.com>
  • Loading branch information
jtchen2k committed Jul 31, 2024
1 parent 5acc385 commit 60b3aa7
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 55 deletions.
141 changes: 104 additions & 37 deletions collectors/perf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,59 +443,99 @@ bool PerfCollector::collect(int64_t now)
return true;
}

bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id) {
bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id, int32_t flags) {
if (!mCollecting) return false;
struct snapshot snap;
for (perf_thread& t : mReplayThreads)
if (flags & COLLECT_REPLAY_THREADS || flags & COLLECT_ALL_THREADS)
{
t.eventCtx.collect_scope(now, func_id, false);
for (perf_thread &t : mReplayThreads)
{
t.eventCtx.collect_scope(now, func_id, false);
}
}
for (perf_thread& t : mBgThreads)
if (flags & COLLECT_BG_THREADS || flags & COLLECT_ALL_THREADS)
{
t.eventCtx.collect_scope(now, func_id, false);
for (perf_thread &t : mBgThreads)
{
t.eventCtx.collect_scope(now, func_id, false);
}
}
for (perf_thread& t : mMultiPMUThreads)
if (flags & COLLECT_MULTI_PMU_THREADS || flags & COLLECT_ALL_THREADS)
{
t.eventCtx.collect_scope(now, func_id, false);
for (perf_thread &t : mMultiPMUThreads)
{
t.eventCtx.collect_scope(now, func_id, false);
}
}
for (perf_thread& t : mBookerThread)
if (flags & COLLECT_BOOKER_THREADS || flags & COLLECT_ALL_THREADS)
{
t.eventCtx.collect_scope(now, func_id, false);
for (perf_thread &t : mBookerThread)
{
t.eventCtx.collect_scope(now, func_id, false);
}
}
for (perf_thread& t : mCSPMUThreads)
if (flags & COLLECT_CSPMU_THREADS || flags & COLLECT_ALL_THREADS)
{
t.eventCtx.collect_scope(now, func_id, false);
for (perf_thread &t : mCSPMUThreads)
{
t.eventCtx.collect_scope(now, func_id, false);
}
}
last_collect_scope_flags = flags;
return true;
}

bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id) {
bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags) {
if (!mCollecting) return false;
if (last_collect_scope_flags != flags) {
DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id);
return false;
}
struct snapshot snap_start, snap_stop;
for (perf_thread &t : mReplayThreads) {
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
}
for (perf_thread &t : mBgThreads) {
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
}
for (perf_thread &t : mMultiPMUThreads) {
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
}
for (perf_thread &t : mBookerThread) {
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
}
for (perf_thread &t : mCSPMUThreads) {
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
if (flags & COLLECT_REPLAY_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mReplayThreads)
{
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
}
}
if (flags & COLLECT_BG_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mBgThreads)
{
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
}
}
if (flags & COLLECT_MULTI_PMU_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mMultiPMUThreads)
{
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
}
}
if (flags & COLLECT_BOOKER_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mBookerThread)
{
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
}
}
if (flags & COLLECT_CSPMU_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mCSPMUThreads)
{
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
}
}
return false;
}
Expand Down Expand Up @@ -707,7 +747,23 @@ struct snapshot event_context::collect(int64_t now)
return snap;
}

struct snapshot event_context::collect_scope(int64_t now, uint16_t func_id, bool stopping) {
struct snapshot event_context::collect_scope(int64_t now, uint16_t func_id, bool stopping)
{

#if defined(__aarch64__)
// stop counters for arm64
uint64_t PMCNTENSET_EL0_safe;
uint64_t PMCR_EL0_safe;
asm volatile("mrs %0, PMCR_EL0" : "=r" (PMCR_EL0_safe));
asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe & 0xFFFFFFFFFFFFFFFE));
#elif defined(__arm__)
// stop counters for arm32
uint64_t PMCNTENSET_EL0_safe;
uint64_t PMCR_EL0_safe;
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(PMCR_EL0_safe));
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe & 0xFFFFFFFE));
#endif

if (stopping && last_snap_func_id != func_id) {
DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id);
}
Expand All @@ -719,6 +775,17 @@ struct snapshot event_context::collect_scope(int64_t now, uint16_t func_id, bool
last_snap_func_id = func_id;
last_snap = snap;
}

#if defined(__aarch64__)
// start counters for arm64
asm volatile("msr PMCNTENSET_EL0, %0" : : "r" (PMCNTENSET_EL0_safe));
asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe));
#elif defined(__arm__)
// start counters for arm32
asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(PMCNTENSET_EL0_safe));
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe));
#endif

return snap;
}

Expand Down
16 changes: 14 additions & 2 deletions collectors/perf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@ enum cmn_node_type
CMN_TYPE_WP = 0x7770,
};

enum collect_scope_flags: int32_t
{
COLLECT_NOOP = 0x00,
COLLECT_ALL_THREADS = 0x01,
COLLECT_REPLAY_THREADS = 0x01 << 1,
COLLECT_BG_THREADS = 0x01 << 2,
COLLECT_MULTI_PMU_THREADS = 0x01 << 3,
COLLECT_BOOKER_THREADS = 0x01 << 4,
COLLECT_CSPMU_THREADS = 0x01 << 5,
};

struct snapshot {
snapshot() : size(0) {}

Expand Down Expand Up @@ -147,8 +158,8 @@ class PerfCollector : public Collector
virtual void summarize() override;

/// Collector functions for perapi perf instrumentations.
virtual bool collect_scope_start(int64_t now, uint16_t func_id);
virtual bool collect_scope_stop(int64_t now, uint16_t func_id);
virtual bool collect_scope_start(int64_t now, uint16_t func_id, int32_t flags);
virtual bool collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags);

private:
void create_perf_thread();
Expand All @@ -163,6 +174,7 @@ class PerfCollector : public Collector
std::map<int, std::vector<struct event>> mMultiPMUEvents;
std::map<int, std::vector<struct event>> mCSPMUEvents;
std::map<std::string, std::vector<struct timespec>> mClocks; // device_name -> clock_vector
int last_collect_scope_flags = 0;

struct perf_thread
{
Expand Down
8 changes: 4 additions & 4 deletions interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,20 +450,20 @@ void Collection::collect(std::vector<int64_t> custom)
}
}

void Collection::collect_scope_start(uint16_t label) {
void Collection::collect_scope_start(uint16_t label, int32_t flags) {
const int64_t now = getTime();
mScopeStartTime = now;
for (Collector* c : mRunning)
{
if (!c->isThreaded())
{
c->collect_scope_start(now, label);
c->collect_scope_start(now, label, flags);
}
}
mScopeStarted = true;
}

void Collection::collect_scope_stop(uint16_t label) {
void Collection::collect_scope_stop(uint16_t label, int32_t flags) {
// A collect_scope_start and collect_scope_end pair is considered as one sample.
if (!mScopeStarted) {
DBG_LOG("WARNING: collect_scope_stop called without a corresponding collect_scope_start.\n");
Expand All @@ -476,7 +476,7 @@ void Collection::collect_scope_stop(uint16_t label) {
{
if (!c->isThreaded())
{
c->collect_scope_stop(now, label);
c->collect_scope_stop(now, label, flags);
}
}
mScopeStarted = false;
Expand Down
8 changes: 4 additions & 4 deletions interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ class Collector
virtual bool stop() { mCollecting = false; return true; }
virtual bool postprocess(const std::vector<int64_t>& timing);
virtual bool collect( int64_t ) = 0;
virtual bool collect_scope_start( int64_t now, uint16_t func_id) {return true; };
virtual bool collect_scope_stop( int64_t now, uint16_t func_id) { return true; };
virtual bool collect_scope_start( int64_t now, uint16_t func_id, int flags ) {return true; };
virtual bool collect_scope_stop( int64_t now, uint16_t func_id, int flags ) { return true; };
virtual bool collecting() const { return mCollecting; }
virtual const std::string& name() const { return mName; }
virtual bool available() = 0;
Expand Down Expand Up @@ -256,11 +256,11 @@ class Collection

/// Sample periodical data for per API instrumentation. Call this method before the payload
/// execution. Currently only used for perf collector.
void collect_scope_start(uint16_t label);
void collect_scope_start(uint16_t label, int32_t flags);

/// Sample periodical data for per API instrumentation. Call this method after the payload
/// execution. Currently only used for perf collector.
void collect_scope_stop(uint16_t label);
void collect_scope_stop(uint16_t label, int32_t flags);

/// Get the results as JSON
Json::Value results();
Expand Down
27 changes: 19 additions & 8 deletions test.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#include "interface.hpp"
#include "collectors/perf.hpp"

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <mutex>
#include <memory>
#include <condition_variable>

#include "json/writer.h"
Expand Down Expand Up @@ -267,6 +269,10 @@ class Test8 {

Test8() : test8_ready(false) {}

~Test8() {
delete c;
}

void run() {
printf("[test 8]: Testing collect_scope for the perf collector...\n");
std::vector<std::thread> threads;
Expand Down Expand Up @@ -342,14 +348,18 @@ class Test8 {
tmp *= rand();
};

c->collect_scope_start(0 + scope_label_offset);
payload(10);
c->collect_scope_stop(0 + scope_label_offset);
c->collect_scope_start(5 + scope_label_offset);
payload(1000);
c->collect_scope_stop(5 + scope_label_offset);
if (strncmp(thread_name.c_str(), "patrace", 7) == 0) {
c->collect_scope_start(0 + scope_label_offset, COLLECT_REPLAY_THREADS);
payload(1000);
c->collect_scope_stop(0 + scope_label_offset, COLLECT_REPLAY_THREADS);
}

if (strncmp(thread_name.c_str(), "mali", 4) == 0) {
c->collect_scope_start(1 + scope_label_offset, COLLECT_BG_THREADS);
payload(1000);
c->collect_scope_stop(1 + scope_label_offset, COLLECT_BG_THREADS);
}
printf("Thread %s finished.\n", thread_name.c_str());
// usleep(1e5);
}

Collection *c;
Expand All @@ -369,7 +379,8 @@ int main()
test5();
test6();
test7(); // summarized results
(new Test8())->run();
auto test8 = std::unique_ptr<Test8>(new Test8());
test8->run();
printf("ALL DONE!\n");
return 0;
}

0 comments on commit 60b3aa7

Please sign in to comment.