Skip to content

Commit

Permalink
profile
Browse files Browse the repository at this point in the history
  • Loading branch information
chenhu-wang committed Aug 1, 2024
1 parent 847a614 commit 8817f25
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 1 deletion.
10 changes: 9 additions & 1 deletion src/plugins/intel_cpu/src/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include <utility>
#include <vector>

#include "utils/my_profiler.hpp"

#include "edge.h"
#include "graph_dumper.h"
#include "graph_optimizer.h"
Expand Down Expand Up @@ -1124,12 +1126,17 @@ void Graph::PullOutputData(std::unordered_map<std::size_t, ov::SoPtr<ITensor>>&
}

void Graph::InferStatic(SyncInferRequest* request) {
auto _prof0 = MY_PROFILE("::InferStatic#" + std::to_string(infer_count));
for (const auto& node : m_executableGraphNodes) {
VERBOSE(node, getConfig().debugCaps.verbose);
PERF(node, getConfig().collectPerfCounters);

if (request)
request->throw_if_canceled();

auto _prof = MY_PROFILE_ARGS(node->getTypeStr(),
{{"Name", node->getName()}, {"Impl", node->getPrimitiveDescriptorType()}});

ExecuteNode(node, m_stream);
}
}
Expand Down Expand Up @@ -1458,7 +1465,8 @@ void Graph::Infer(SyncInferRequest* request) {
OPENVINO_ASSERT(IsReady(), "Wrong state of the ov::intel_cpu::Graph. Topology is not ready: ", static_cast<int>(status));
}

if (infer_count != -1) infer_count++;
// if (infer_count != -1) infer_count++;
infer_count++;
}

void Graph::SortTopologically() {
Expand Down
9 changes: 9 additions & 0 deletions src/plugins/intel_cpu/src/nodes/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,15 @@ Subgraph::Subgraph(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr
OPENVINO_ASSERT(tmp_snippet, "Attempt to create Subgraph node from an invalid op type");
subgraph_attrs->snippet = tmp_snippet->clone();
subgraph_attrs->bodyHash = getBodyHash(tmp_snippet);
//
ov::pass::Manager mgr;
auto body = tmp_snippet->body_ptr();
auto name = body->get_friendly_name();
std::string xml = name + ".xml";
std::string bin = name + ".bin";
mgr.register_pass<ov::pass::Serialize>(xml, bin);
mgr.run_passes(body);
//

#if defined(OPENVINO_ARCH_ARM64)
subgraph_attrs->snippet->set_generator(std::make_shared<aarch64::CPUGenerator>(host_isa));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,7 @@ void Transformations::MainSnippets(void) {

#if defined(OPENVINO_ARCH_X86_64)
auto is_supported_matmul = [this](const std::shared_ptr<const ov::Node>& n) {
// return true;
const auto matmul = ov::as_type_ptr<const ov::op::v0::MatMul>(n);
if (!matmul || matmul->is_dynamic())
return false;
Expand Down Expand Up @@ -1113,9 +1114,21 @@ void Transformations::Snippets(void) {
if (!useSnippets)
return;

ov::pass::Manager mgr0;
std::string xml0 = "before_tokenization.xml";
std::string bin0 = "before_tokenization.bin";
mgr0.register_pass<ov::pass::Serialize>(xml0, bin0);
mgr0.run_passes(model);

CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Snippets);
MainSnippets();
PostSnippets();

ov::pass::Manager mgr1;
std::string xml1 = "after_tokenization.xml";
std::string bin1 = "after_tokenization.bin";
mgr1.register_pass<ov::pass::Serialize>(xml1, bin1);
mgr1.run_passes(model);
}

} // namespace intel_cpu
Expand Down
141 changes: 141 additions & 0 deletions src/plugins/intel_cpu/src/utils/my_profiler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <atomic>
#include <iostream>
#include <mutex>
#include <sstream>
#include <thread>
#include <vector>

#include "my_profiler.hpp"

#ifdef WIN32
#include <intrin.h>
#pragma intrinsic(__rdtsc)
#else
#include <x86intrin.h>
#endif

struct dump_items {
std::string name; // The name of the event, as displayed in Trace Viewer
std::string cat; // The event categories
std::string ph = "X"; // The event type, 'B'/'E' OR 'X'
std::string pid = "0"; // The process ID for the process
std::string tid; // The thread ID for the thread that output this event.
uint64_t ts1; // The tracing clock timestamp of the event, [microsecond]
uint64_t ts2; // Duration = ts2 - ts1.
std::string tts; // Optional. The thread clock timestamp of the event
std::vector<std::pair<std::string, std::string>> vecArgs;
};

static inline std::string get_thread_id() {
std::stringstream ss;
ss << std::this_thread::get_id();
return ss.str();
}

static uint64_t rdtsc_calibrate(int seconds = 1) {
uint64_t start_ticks = __rdtsc();
std::this_thread::sleep_for(std::chrono::seconds(seconds));
return (__rdtsc() - start_ticks) / seconds;
}

class ProfilerManager {
protected:
std::vector<dump_items> _vecItems;
std::atomic<uint64_t> tsc_ticks_per_second{0};
std::atomic<uint64_t> tsc_ticks_base{0};
std::mutex _mutex;

public:
ProfilerManager() {
if (tsc_ticks_per_second == 0) {
uint64_t expected = 0;
auto tps = rdtsc_calibrate();
tsc_ticks_per_second.compare_exchange_strong(expected, tps);
std::cout << "=== ProfilerManager: tsc_ticks_per_second = " << tsc_ticks_per_second << std::endl;
tsc_ticks_base.compare_exchange_strong(expected, __rdtsc());
std::cout << "=== ProfilerManager: tsc_ticks_base = " << tsc_ticks_base << std::endl;
}
}

ProfilerManager(ProfilerManager& other) = delete;
void operator=(const ProfilerManager&) = delete;
~ProfilerManager() {
// Save tracing log to json file.
printf("ProfilerManager unconstruct...............\n");
save_to_json();
}

void add(const dump_items& val) {
std::lock_guard<std::mutex> lk(_mutex);
_vecItems.emplace_back(val);
}

private:
std::string tsc_to_nsec(uint64_t tsc_ticks) {
double val = (tsc_ticks - tsc_ticks_base) * 1000000.0 / tsc_ticks_per_second;
return std::to_string(val);
}

std::string tsc_to_nsec(uint64_t start, uint64_t end) {
double val = (end - start) * 1000000.0 / tsc_ticks_per_second;
return std::to_string(val);
}

void save_to_json() {
std::string out_fn = std::string("profile_cpu_") + std::to_string(tsc_ticks_base) + ".json";
FILE* pf = fopen(out_fn.c_str(), "wb");
if (nullptr == pf) {
printf("Can't fopen: %s\n", out_fn.c_str());
return;
}
printf("Save profile log: %s\n", out_fn.c_str());

// Headers
fprintf(pf, "{\n\"schemaVersion\": 1,\n\"traceEvents\":[\n");

for (size_t i = 0; i < _vecItems.size(); i++) {
auto& itm = _vecItems[i];
// Write 1 event
fprintf(pf, "{");
fprintf(pf, "\"name\":\"%s\",", itm.name.c_str());
fprintf(pf, "\"cat\":\"%s\",", itm.cat.c_str());
fprintf(pf, "\"ph\":\"%s\",", itm.ph.c_str());
fprintf(pf, "\"pid\":\"%s\",", itm.pid.c_str());
fprintf(pf, "\"tid\":\"%s\",", itm.tid.c_str());
fprintf(pf, "\"ts\":\"%s\",", tsc_to_nsec(itm.ts1).c_str());
fprintf(pf, "\"dur\":\"%s\",", tsc_to_nsec(itm.ts1, itm.ts2).c_str());
fprintf(pf, "\"args\":{");
for (size_t j = 0; j < itm.vecArgs.size(); j++) {
fprintf(pf,
"\"%s\":\"%s\"%s",
itm.vecArgs[j].first.c_str(),
itm.vecArgs[j].second.c_str(),
j + 1 == itm.vecArgs.size() ? "" : ",");
}
fprintf(pf, "}}%s\n", i == _vecItems.size() - 1 ? "" : ",");
}

fprintf(pf, "]\n}\n");
fclose(pf);
}
};
static ProfilerManager g_profileManage;
MyProfile::MyProfile(const std::string& name, const std::vector<std::pair<std::string, std::string>>& args) {
_name = name;
_args = args;
_ts1 = __rdtsc();
}

MyProfile::~MyProfile() {
dump_items itm;
itm.ts2 = __rdtsc();
itm.ts1 = _ts1;
itm.name = _name;
itm.tid = get_thread_id();
itm.cat = "PERF";
itm.vecArgs = _args;
g_profileManage.add(itm);
}
38 changes: 38 additions & 0 deletions src/plugins/intel_cpu/src/utils/my_profiler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include <vector>

class MyProfile {
public:
MyProfile() = delete;
// MyProfile(MyProfile&)=delete;
MyProfile(const std::string& name,
const std::vector<std::pair<std::string, std::string>>& args =
std::vector<std::pair<std::string, std::string>>());
~MyProfile();

private:
std::string _name;
uint64_t _ts1;
std::vector<std::pair<std::string, std::string>> _args;
};

#define MY_PROFILE(NAME) MyProfile(NAME + std::string(":") + std::to_string(__LINE__))
#define MY_PROFILE_ARGS(NAME, ...) MyProfile(NAME + std::string(":") + std::to_string(__LINE__), __VA_ARGS__)

// Example:
// ==========================================
// auto p = MyProfile("fun_name")
// Or
// {
// auto p = MyProfile("fun_name")
// func()
// }
// Or
// {
// auto p2 = MyProfile("fun_name", {{"arg1", "sleep 30 ms"}});
// func()
// }

0 comments on commit 8817f25

Please sign in to comment.