Skip to content

Commit

Permalink
Fix positions for pausing/resuming perf counters.
Browse files Browse the repository at this point in the history
Signed-off-by: Bill Chen <[email protected]>
  • Loading branch information
jtchen2k committed Aug 14, 2024
1 parent ac59e6c commit 05f862a
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 38 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ set(SRC_ROOT ".")
option(VULKAN_LAYER "Build Vulkan layer" OFF)
option(BUILD_STATIC "Build static version" ON)

add_definitions(-std=c++11 -Wall -fno-strict-aliasing -ggdb -Wno-deprecated-declarations)
add_definitions(-std=c++11 -Wall -Werror -fno-strict-aliasing -ggdb -Wno-deprecated-declarations)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2")
set(COLLECTOR_INCLUDES
Expand Down
66 changes: 40 additions & 26 deletions collectors/perf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,44 @@ bool PerfCollector::collect(int64_t now)
return true;
}

bool PerfCollector::perf_counter_pause() {
#if defined(__aarch64__)
asm volatile("mrs %0, PMCNTENSET_EL0" : "=r" (PMCNTENSET_EL0_safe));
// stop counters for arm64
asm volatile("mrs %0, PMCR_EL0" : "=r" (PMCR_EL0_safe));
asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe & 0xFFFFFFFFFFFFFFFE));
#elif defined(__arm__)
asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(PMCNTENSET_EL0_safe));
// stop counters for arm32
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(PMCR_EL0_safe));
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe & 0xFFFFFFFE));
#endif
return true;
}

bool PerfCollector::perf_counter_resume() {
#if defined(__aarch64__)
// start counters for arm64
asm volatile("msr PMCNTENSET_EL0, %0" : : "r" (PMCNTENSET_EL0_safe));
asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe));
#elif defined(__arm__)
// start counters for arm32
asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(PMCNTENSET_EL0_safe));
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe));
#endif
return true;
}


bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id, int32_t flags) {
#if defined(__x86_64__)
if (!attempt_collect_scope_x64) {
attempt_collect_scope_x64 = true;
DBG_LOG("WARNING: Frequent invocation of collect_scope on x64 devices may introduce "
"significant overhead to the kernel perf counter data.\n");
}
#endif
if (!perf_counter_pause()) return false;
if (!mCollecting) return false;
struct snapshot snap;
if (flags & COLLECT_REPLAY_THREADS || flags & COLLECT_ALL_THREADS)
Expand Down Expand Up @@ -482,10 +519,12 @@ bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id, int32_t f
}
}
last_collect_scope_flags = flags;
if (!perf_counter_resume()) return false;
return true;
}

bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags) {
if (!perf_counter_pause()) return false;
if (!mCollecting) return false;
if (last_collect_scope_flags != flags) {
DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id);
Expand Down Expand Up @@ -537,6 +576,7 @@ bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id, int32_t fl
t.update_data_scope(func_id, snap_start, snap_stop);
}
}
if (!perf_counter_resume()) return false;
return false;
}

Expand Down Expand Up @@ -749,21 +789,6 @@ struct snapshot event_context::collect(int64_t now)

struct snapshot event_context::collect_scope(int64_t now, uint16_t func_id, bool stopping)
{

#if defined(__aarch64__)
// stop counters for arm64
uint64_t PMCNTENSET_EL0_safe;
uint64_t PMCR_EL0_safe;
asm volatile("mrs %0, PMCR_EL0" : "=r" (PMCR_EL0_safe));
asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe & 0xFFFFFFFFFFFFFFFE));
#elif defined(__arm__)
// stop counters for arm32
uint64_t PMCNTENSET_EL0_safe;
uint64_t PMCR_EL0_safe;
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(PMCR_EL0_safe));
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe & 0xFFFFFFFE));
#endif

if (stopping && last_snap_func_id != func_id) {
DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id);
}
Expand All @@ -775,17 +800,6 @@ struct snapshot event_context::collect_scope(int64_t now, uint16_t func_id, bool
last_snap_func_id = func_id;
last_snap = snap;
}

#if defined(__aarch64__)
// start counters for arm64
asm volatile("msr PMCNTENSET_EL0, %0" : : "r" (PMCNTENSET_EL0_safe));
asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe));
#elif defined(__arm__)
// start counters for arm32
asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(PMCNTENSET_EL0_safe));
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe));
#endif

return snap;
}

Expand Down
11 changes: 11 additions & 0 deletions collectors/perf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ class event_context
int fd;
// Record accumulated values for update_data_scope, where the index of the vector is the uint16_t func_id.
std::vector<long long> scope_values;

counter() {
scope_values.reserve(512);
scope_values.resize(512, 0);
}
};

int group;
Expand Down Expand Up @@ -160,6 +165,8 @@ class PerfCollector : public Collector
/// Collector functions for perapi perf instrumentations.
virtual bool collect_scope_start(int64_t now, uint16_t func_id, int32_t flags);
virtual bool collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags);
bool perf_counter_pause();
bool perf_counter_resume();

private:
void create_perf_thread();
Expand All @@ -176,6 +183,10 @@ class PerfCollector : public Collector
std::map<std::string, std::vector<struct timespec>> mClocks; // device_name -> clock_vector
int last_collect_scope_flags = 0;

bool attempt_collect_scope_x64 = false;
uint64_t PMCNTENSET_EL0_safe = 0;
uint64_t PMCR_EL0_safe = 0;

struct perf_thread
{
perf_thread(const int tid, const std::string &name): tid(tid), name(name), eventCtx{} {}
Expand Down
17 changes: 6 additions & 11 deletions interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -451,26 +451,22 @@ void Collection::collect(std::vector<int64_t> custom)
}

void Collection::collect_scope_start(uint16_t label, int32_t flags) {
const int64_t now = getTime();
mScopeStartTime = now;
// Not getting the current time as it introduces huge kernel cycle overhead to the perf collector.
const int64_t now = 0;
// mScopeStartTime = now;
for (Collector* c : mRunning)
{
if (!c->isThreaded())
{
c->collect_scope_start(now, label, flags);
}
}
mScopeStarted = true;
}

void Collection::collect_scope_stop(uint16_t label, int32_t flags) {
// A collect_scope_start and collect_scope_end pair is considered as one sample.
if (!mScopeStarted) {
DBG_LOG("WARNING: collect_scope_stop called without a corresponding collect_scope_start.\n");
return;
}
const int64_t now = getTime();
// Timing is ignored to avoid extreme large json outputs.
// Not getting the current time as it introduces huge kernel cycle overhead to the perf collector.
const int64_t now = 0;
// Timing is not enabled to avoid extreme large json outputs.
// mTiming.push_back(now - mScopeStartTime);
for (Collector* c : mRunning)
{
Expand All @@ -479,7 +475,6 @@ void Collection::collect_scope_stop(uint16_t label, int32_t flags) {
c->collect_scope_stop(now, label, flags);
}
}
mScopeStarted = false;
}

Json::Value Collection::results()
Expand Down

0 comments on commit 05f862a

Please sign in to comment.