Skip to content

Commit

Permalink
add config for snippets perf count
Browse files Browse the repository at this point in the history
  • Loading branch information
chenhu-wang committed Oct 9, 2023
1 parent 4f3cdba commit 33facce
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 19 deletions.
7 changes: 0 additions & 7 deletions src/common/snippets/include/snippets/op/perf_count.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,6 @@ class PerfCountBegin : public PerfCountBeginBase {
void set_start_time();
std::chrono::high_resolution_clock::time_point& get_start_time();

// ~PerfCountBegin() {
// auto start_time_stamp_c = start_time_stamp.time_since_epoch().count();
// std::cout << "start_time_stamp_c:" << start_time_stamp_c << "ns" << std::endl;
// }

private:
std::chrono::high_resolution_clock::time_point start_time_stamp = {};
};
Expand All @@ -89,8 +84,6 @@ class PerfCountEnd : public PerfCountEndBase {
private:
uint64_t accumulation = 0ul;
uint32_t iteration = 0u;
// pc_begin as member for perf? no get for each get perf start?
// std::shared_ptr<PerfCountBegin> m_pc_begin = nullptr;
};

} // namespace op
Expand Down
6 changes: 3 additions & 3 deletions src/common/snippets/src/lowered/pass/insert_loops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ void InsertLoops::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPtr&
init_params(loop_exits);

// begin
const auto& perf_count_begin = std::make_shared<op::PerfCountBegin>();
const auto& perf_count_begin_expr = linear_ir.create_expression(perf_count_begin, std::vector<PortConnectorPtr>{});
linear_ir.insert(loop_begin_pos, perf_count_begin_expr);
// const auto& perf_count_begin = std::make_shared<op::PerfCountBegin>();
// const auto& perf_count_begin_expr = linear_ir.create_expression(perf_count_begin, std::vector<PortConnectorPtr>{});
// linear_ir.insert(loop_begin_pos, perf_count_begin_expr);
// begin

const auto& loop_begin = std::make_shared<op::LoopBegin>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,20 @@ INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(ENABLE);
INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(IGNORE_CALLBACK);
INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(DISABLE);

/**
* @brief Defines Snippets performance count mode
* @param DISABLED - default, w/o perf count for snippets
* @param CHRONO - chrono call based perf count. This is a universal method for both arm and x86.
* @param RDTSC - perf count via read rdtsc register. This is more light overhead.
* for example, test show that RDTSC take ~50ns while CHRONO take 260ns for a pair of perf count start and perf count
* end execution. RDTSC is only applicable on x86.
* @ingroup ie_dev_api_plugin_api
*/
INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_KEY(SNIPPETS_PERF_COUNT_MODE);
INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(DISABLED);
INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(CHRONO);
INFERENCE_ENGINE_1_0_DEPRECATED DECLARE_CONFIG_VALUE(RDTSC);

} // namespace PluginConfigInternalParams

} // namespace InferenceEngine
10 changes: 10 additions & 0 deletions src/plugins/intel_cpu/src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,16 @@ void Config::readProperties(const std::map<std::string, std::string> &prop, cons
else
IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_SNIPPETS_MODE
<< ". Expected values: ENABLE/DISABLE/IGNORE_CALLBACK";
} else if (key == PluginConfigInternalParams::KEY_SNIPPETS_PERF_COUNT_MODE) {
if (val == PluginConfigInternalParams::DISABLED)
snippetsPCMode = SnippetsPerfCountMode::Disabled;
else if (val == PluginConfigInternalParams::CHRONO)
snippetsPCMode = SnippetsPerfCountMode::Chrono;
else if (val == PluginConfigInternalParams::RDTSC)
snippetsPCMode = SnippetsPerfCountMode::Rdtsc;
else
IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_SNIPPETS_PERF_COUNT_MODE
<< ". Expected values: DISABLE/CHRONO/RDTSC";
} else if (key == ov::hint::execution_mode.name()) {
if (val == "PERFORMANCE") {
executionMode = ov::hint::ExecutionMode::PERFORMANCE;
Expand Down
7 changes: 7 additions & 0 deletions src/plugins/intel_cpu/src/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ struct Config {
Disable,
};

enum SnippetsPerfCountMode {
Disabled,
Chrono,
Rdtsc,
};

enum class LatencyThreadingMode {
PER_NUMA_NODE,
PER_SOCKET,
Expand All @@ -53,6 +59,7 @@ struct Config {
bool collectPerfCounters = false;
bool exclusiveAsyncRequests = false;
SnippetsMode snippetsMode = SnippetsMode::Enable;
SnippetsPerfCountMode snippetsPCMode = SnippetsPerfCountMode::Disabled;
std::string dumpToDot = {};
std::string device_id = {};
float fcSparseWeiDecompressionRate = 1.0f;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@
#include <cpu/x64/jit_generator.hpp>
#include "transformations/snippets/x64/op/perf_count_rdtsc.hpp"

// if CHRONO_CALL is defined, use std::chrono::high_resolution_clock as timer
// otherwise uncomment below line to read tsc as cycle counters
#define CHRONO_CALL

namespace ov {
namespace intel_cpu {

Expand All @@ -33,7 +29,6 @@ class jit_perf_count_rdtsc_end_emitter : public jit_emitter {
size_t get_inputs_num() const override;

private:
// use start in emit_impl
void emit_impl(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs) const override;
mutable uint64_t* m_start_count = nullptr;
mutable uint64_t* m_accumulation = nullptr;
Expand Down
16 changes: 16 additions & 0 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,22 @@ static Config::SnippetsMode getSnippetsMode(const std::map<std::string, std::str
IE_THROW() << "Wrong value for property key SNIPPETS_MODE. Expected values: ENABLE/DISABLE/IGNORE_CALLBACK";
}

static Config::SnippetsPerfCountMode getSnippetsPerfCountMode(const std::map<std::string, std::string>& modelConfig, const Config& engineConfig) {
const auto& snippetsPCMode = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_PERF_COUNT_MODE);
if (snippetsPCMode == modelConfig.end()) // not set explicitly
return Config::SnippetsPerfCountMode::Disabled; // disabled by default

const auto& val = snippetsPCMode->second;
if (val == PluginConfigInternalParams::CHRONO)
return Config::SnippetsPerfCountMode::Chrono;
else if (val == PluginConfigInternalParams::RDTSC)
return Config::SnippetsPerfCountMode::Rdtsc;
else if (val == PluginConfigInternalParams::DISABLED)
return Config::SnippetsPerfCountMode::Disabled;
else
IE_THROW() << "Wrong value for property key SNIPPETS_PERF_COUNT_MODE. Expected values: DISABLED/CHRONO/RDTSC";
}

InferenceEngine::IExecutableNetworkInternal::Ptr
Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &orig_config) {
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Engine::LoadExeNetworkImpl");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,10 @@ class PerfCountRdtscEnd : public PerfCountEndBase {
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;

std::shared_ptr<PerfCountRdtscBegin> get_pc_begin();
// in each call, PerfCountRdtscBegin get start_time_stamp.
// in each call, PerfCountRdtscEnd get end_time_stamp, then total_duration += end_time_stamp - start_time_stamp, and iteration++.
// in each call, PerfCountRdtscBegin get start_count.
// in each call, PerfCountRdtscEnd get end_count, then total_duration += end_count - start_count, and iteration++.
// in destructor of PerfCountRdtscEnd, output the perf info
// PerfCountRdtscBegin& perf_count_start;
// accumulation is time in nanosecond for chrono call, cycle count for rdtsc
// accumulation is cycle count
uint64_t accumulation = 0ul;
uint32_t iteration = 0u;
};
Expand Down

0 comments on commit 33facce

Please sign in to comment.