Skip to content

Commit

Permalink
Merge pull request #18 from billchen2k/billc/collect-scope
Browse files Browse the repository at this point in the history
Add collect_scope_start/stop to measure perf counters for a code fragment
  • Loading branch information
per-mathisen-arm authored Aug 1, 2024
2 parents d629076 + 60b3aa7 commit ac59e6c
Show file tree
Hide file tree
Showing 5 changed files with 391 additions and 7 deletions.
173 changes: 168 additions & 5 deletions collectors/perf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ PerfCollector::PerfCollector(const Json::Value& config, const std::string& name)
struct event leader = {"CPUCycleCount", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES};

mSet = mConfig.get("set", -1).asInt();
mInherit = mConfig.get("inherit", 1).asInt();
mInherit = mConfig.get("inherit", 1).asInt();

leader.inherited = mInherit;
mEvents.push_back(leader);

Expand Down Expand Up @@ -145,7 +145,7 @@ PerfCollector::PerfCollector(const Json::Value& config, const std::string& name)
}
}
else if(e.device!="")
{//for d9000, CPU cores on different PMU
{//for d9000, CPU cores on different PMU
e.config = item.get("config", 0).asUInt64();
auto type_string = e.device;

Expand Down Expand Up @@ -405,7 +405,6 @@ bool PerfCollector::collect(int64_t now)
{
if (!mCollecting)
return false;

struct snapshot snap;
for (perf_thread& t : mReplayThreads)
{
Expand Down Expand Up @@ -444,6 +443,103 @@ bool PerfCollector::collect(int64_t now)
return true;
}

bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id, int32_t flags) {
if (!mCollecting) return false;
struct snapshot snap;
if (flags & COLLECT_REPLAY_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mReplayThreads)
{
t.eventCtx.collect_scope(now, func_id, false);
}
}
if (flags & COLLECT_BG_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mBgThreads)
{
t.eventCtx.collect_scope(now, func_id, false);
}
}
if (flags & COLLECT_MULTI_PMU_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mMultiPMUThreads)
{
t.eventCtx.collect_scope(now, func_id, false);
}
}
if (flags & COLLECT_BOOKER_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mBookerThread)
{
t.eventCtx.collect_scope(now, func_id, false);
}
}
if (flags & COLLECT_CSPMU_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mCSPMUThreads)
{
t.eventCtx.collect_scope(now, func_id, false);
}
}
last_collect_scope_flags = flags;
return true;
}

bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags) {
if (!mCollecting) return false;
if (last_collect_scope_flags != flags) {
DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id);
return false;
}
struct snapshot snap_start, snap_stop;
if (flags & COLLECT_REPLAY_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mReplayThreads)
{
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
}
}
if (flags & COLLECT_BG_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mBgThreads)
{
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
}
}
if (flags & COLLECT_MULTI_PMU_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mMultiPMUThreads)
{
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
}
}
if (flags & COLLECT_BOOKER_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mBookerThread)
{
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
}
}
if (flags & COLLECT_CSPMU_THREADS || flags & COLLECT_ALL_THREADS)
{
for (perf_thread &t : mCSPMUThreads)
{
snap_start = t.eventCtx.last_snap;
snap_stop = t.eventCtx.collect_scope(now, func_id, true);
t.update_data_scope(func_id, snap_start, snap_stop);
}
}
return false;
}

bool PerfCollector::postprocess(const std::vector<int64_t>& timing)
{
Json::Value v;
Expand Down Expand Up @@ -521,7 +617,7 @@ bool PerfCollector::postprocess(const std::vector<int64_t>& timing)
}
mCustomResult["thread_data"].append(perf_threadValue);
}

mCustomResult["thread_data"].append(bgValue);
mCustomResult["thread_data"].append(allValue);
}
Expand Down Expand Up @@ -614,9 +710,34 @@ bool event_context::stop()
return false;
}

for (struct counter& c : mCounters)
{
if (c.scope_values.size() > 0 && mValueResults != nullptr)
{
std::string name = c.name + ":ScopeSum";
for (unsigned int i = 0; i < c.scope_values.size(); i++)
{
(*mValueResults)[name].push_back(c.scope_values[i]);
}
}
}

std::string name_num_func_calls = "CCthread:ScopeNumCalls";
for (unsigned int i = 0; i < scope_num_calls.size(); i++)
{
(*mValueResults)[name_num_func_calls].push_back(scope_num_calls[i]);
}

std::string name_num_calls = "CCthread:ScopeNumWithPerf";
for (unsigned int i = 0; i < scope_num_with_perf.size(); i++)
{
(*mValueResults)[name_num_calls].push_back(scope_num_with_perf[i]);
}

return true;
}

// Collect and reset the perf counters to 0.
struct snapshot event_context::collect(int64_t now)
{
struct snapshot snap;
Expand All @@ -626,6 +747,48 @@ struct snapshot event_context::collect(int64_t now)
return snap;
}

struct snapshot event_context::collect_scope(int64_t now, uint16_t func_id, bool stopping)
{

#if defined(__aarch64__)
// stop counters for arm64
uint64_t PMCNTENSET_EL0_safe;
uint64_t PMCR_EL0_safe;
asm volatile("mrs %0, PMCR_EL0" : "=r" (PMCR_EL0_safe));
asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe & 0xFFFFFFFFFFFFFFFE));
#elif defined(__arm__)
// stop counters for arm32
uint64_t PMCNTENSET_EL0_safe;
uint64_t PMCR_EL0_safe;
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(PMCR_EL0_safe));
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe & 0xFFFFFFFE));
#endif

if (stopping && last_snap_func_id != func_id) {
DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id);
}
struct snapshot snap;
if (read(group, &snap, sizeof(snap)) == -1) perror("read");
if (stopping) {
last_snap_func_id = -1;
} else {
last_snap_func_id = func_id;
last_snap = snap;
}

#if defined(__aarch64__)
// start counters for arm64
asm volatile("msr PMCNTENSET_EL0, %0" : : "r" (PMCNTENSET_EL0_safe));
asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe));
#elif defined(__arm__)
// start counters for arm32
asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(PMCNTENSET_EL0_safe));
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe));
#endif

return snap;
}

static std::string getThreadName(int tid)
{
std::stringstream comm_path;
Expand Down
68 changes: 67 additions & 1 deletion collectors/perf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
#include "collector_utility.hpp"
#include "interface.hpp"
#include <map>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>

enum hw_cnt_length
{
Expand Down Expand Up @@ -33,6 +36,17 @@ enum cmn_node_type
CMN_TYPE_WP = 0x7770,
};

enum collect_scope_flags: int32_t
{
COLLECT_NOOP = 0x00,
COLLECT_ALL_THREADS = 0x01,
COLLECT_REPLAY_THREADS = 0x01 << 1,
COLLECT_BG_THREADS = 0x01 << 2,
COLLECT_MULTI_PMU_THREADS = 0x01 << 3,
COLLECT_BOOKER_THREADS = 0x01 << 4,
COLLECT_CSPMU_THREADS = 0x01 << 5,
};

struct snapshot {
snapshot() : size(0) {}

Expand All @@ -59,13 +73,21 @@ class event_context
event_context()
{
group = -1;
last_snap_func_id = -1;
}

~event_context() {}

bool init(std::vector<struct event> &events, int tid, int cpu);
bool start();
struct snapshot collect(int64_t now);

struct snapshot collect_scope(int64_t now, uint16_t func_id, bool stopping);

// If not -1, then we are in the middle of collect_scope_start/stop.
uint16_t last_snap_func_id;
struct snapshot last_snap;

bool stop();
bool deinit();

Expand All @@ -75,15 +97,48 @@ class event_context
result[mCounters[i].name].push_back(snap.values[i]);
}

inline void update_data_scope(uint16_t func_id, bool is_calling, struct snapshot &snap_start, struct snapshot &snap_end, CollectorValueResults &result)
{
if (!mValueResults) mValueResults = &result;
long long diff_acc = 0;
for (unsigned int i = 0; i < mCounters.size(); i++) {
long long diff = snap_end.values[i] - snap_start.values[i];
if (mCounters[i].scope_values.size() <= func_id) {
mCounters[i].scope_values.resize(std::min(func_id * 2 + 1, UINT16_MAX - 1), 0);
}
mCounters[i].scope_values[func_id] += diff;
diff_acc += diff;
}
if (diff_acc > 0 && is_calling) {
if (scope_num_calls.size() <= func_id) {
scope_num_calls.resize(std::min(func_id * 2 + 1, UINT16_MAX - 1), 0);
}
scope_num_calls[func_id]++;
}
if (diff_acc > 0) {
if (scope_num_with_perf.size() <= func_id) {
scope_num_with_perf.resize(std::min(func_id * 2 + 1, UINT16_MAX - 1), 0);
}
scope_num_with_perf[func_id]++;
}
}

private:
struct counter
{
std::string name;
int fd;
// Record accumulated values for update_data_scope, where the index of the vector is the uint16_t func_id.
std::vector<long long> scope_values;
};

int group;
std::vector<struct counter> mCounters;
// Record number of scope calls with perf counter incremental greater than 0 (can happen in multiple bg threads)
std::vector<int32_t> scope_num_with_perf;
// Record number of scope calls that actually triggered the collect_scope (happen in 1 thread that calls the collection method)
std::vector<int32_t> scope_num_calls;
CollectorValueResults *mValueResults = nullptr;
};

class PerfCollector : public Collector
Expand All @@ -102,7 +157,11 @@ class PerfCollector : public Collector
virtual bool postprocess(const std::vector<int64_t>& timing) override;
virtual void summarize() override;

private:
/// Collector functions for perapi perf instrumentations.
virtual bool collect_scope_start(int64_t now, uint16_t func_id, int32_t flags);
virtual bool collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags);

private:
void create_perf_thread();
void saveResultsFile();

Expand All @@ -115,6 +174,7 @@ class PerfCollector : public Collector
std::map<int, std::vector<struct event>> mMultiPMUEvents;
std::map<int, std::vector<struct event>> mCSPMUEvents;
std::map<std::string, std::vector<struct timespec>> mClocks; // device_name -> clock_vector
int last_collect_scope_flags = 0;

struct perf_thread
{
Expand All @@ -125,6 +185,12 @@ class PerfCollector : public Collector
eventCtx.update_data(snap, mResultsPerThread);
}

void update_data_scope(uint16_t func_id, struct snapshot& snap_start, struct snapshot& snap_end)
{
pid_t cur_tid = syscall(SYS_gettid);
eventCtx.update_data_scope(func_id, cur_tid == tid, snap_start, snap_end, mResultsPerThread);
}

void clear()
{
for (auto& pair : mResultsPerThread)
Expand Down
32 changes: 32 additions & 0 deletions interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,38 @@ void Collection::collect(std::vector<int64_t> custom)
}
}

void Collection::collect_scope_start(uint16_t label, int32_t flags) {
const int64_t now = getTime();
mScopeStartTime = now;
for (Collector* c : mRunning)
{
if (!c->isThreaded())
{
c->collect_scope_start(now, label, flags);
}
}
mScopeStarted = true;
}

void Collection::collect_scope_stop(uint16_t label, int32_t flags) {
// A collect_scope_start and collect_scope_end pair is considered as one sample.
if (!mScopeStarted) {
DBG_LOG("WARNING: collect_scope_stop called without a corresponding collect_scope_start.\n");
return;
}
const int64_t now = getTime();
// Timing is ignored to avoid extreme large json outputs.
// mTiming.push_back(now - mScopeStartTime);
for (Collector* c : mRunning)
{
if (!c->isThreaded())
{
c->collect_scope_stop(now, label, flags);
}
}
mScopeStarted = false;
}

Json::Value Collection::results()
{
Json::Value results;
Expand Down
Loading

0 comments on commit ac59e6c

Please sign in to comment.