Skip to content

Commit

Permalink
saved node instead of entire LIR for perf count node
Browse files Browse the repository at this point in the history
  • Loading branch information
chenhu-wang committed Nov 8, 2023
1 parent aa0cec4 commit 03752c5
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 6 deletions.
10 changes: 10 additions & 0 deletions src/common/snippets/include/snippets/generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ class LoweringResult {
// Some emitters rely on other precompiled kernels.
// We need to keep the pointers to such emitters alive, so the kernels would still be accessible at runtime.
std::vector<std::shared_ptr<Emitter>> m_saved_emitters{};
// For perf count nodes, kernel will read/write these nodes, so should be alive in execution.
std::vector<std::shared_ptr<ov::Node>> m_saved_nodes{};

public:
std::shared_ptr<CompiledSnippet> compiled_snippet = nullptr;
Expand Down Expand Up @@ -126,6 +128,14 @@ class Generator {
* @return bool
*/
virtual bool uses_precompiled_kernel(const std::shared_ptr<Emitter>& emitter) const { return false; }
/**
* @brief returns true if a node should be alive in execution.
* @return bool
*/
virtual bool should_node_alive_in_execution(const std::shared_ptr<ov::Node>& op) const {
return std::dynamic_pointer_cast<op::PerfCountBeginBase>(op) ||
std::dynamic_pointer_cast<op::PerfCountEndBase>(op);
}

std::shared_ptr<TargetMachine> target;
};
Expand Down
2 changes: 1 addition & 1 deletion src/common/snippets/include/snippets/op/subgraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ class Subgraph : public ov::op::util::SubGraphOp {
LoweringResult& lowering_result,
const lowered::pass::PassPipeline& backend_passes_pre_common,
const lowered::pass::PassPipeline& backend_passes_post_common) const;
void perf_count_transformations(lowered::LinearIR& linear_ir);
void perf_count_transformations(lowered::LinearIR& linear_ir) const;
void init_config();
// Count of Subgraph virtual ports:
// - Potential non-scalar Constants that will be created after some transformations (At the moment it's relevant only for FakeQuantize decomposition)
Expand Down
11 changes: 10 additions & 1 deletion src/common/snippets/src/generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ void Generator::generate(lowered::LinearIR& linear_ir, LoweringResult& result, c
result.m_saved_emitters.emplace_back(emitter);
}
}
// perf count node should be alive in execution.
if (linear_ir.get_config().perf_count_mode != lowered::PerfCountMode::Disabled) {
for (const auto& expr : linear_ir) {
const auto& node = expr->get_node();
if (should_node_alive_in_execution(node)) {
result.m_saved_nodes.emplace_back(node);
}
}
}
result.compiled_snippet = target->get_snippet();
}

Expand All @@ -66,7 +75,7 @@ Generator::opRegType Generator::get_op_reg_type(const std::shared_ptr<Node>& op)
std::dynamic_pointer_cast<op::LoopEnd>(op) ||
std::dynamic_pointer_cast<op::Brgemm>(op) ||
std::dynamic_pointer_cast<op::Buffer>(op) ||
std::dynamic_pointer_cast<op::RankNormalization>(op)) ||
std::dynamic_pointer_cast<op::RankNormalization>(op) ||
std::dynamic_pointer_cast<op::PerfCountBeginBase>(op) ||
std::dynamic_pointer_cast<op::PerfCountEndBase>(op))
return gpr2gpr;
Expand Down
5 changes: 1 addition & 4 deletions src/common/snippets/src/op/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ void Subgraph::control_flow_transformations(lowered::LinearIR& linear_ir,
lowering_result.buffer_scratchpad_size = buffer_allocation_pass->get_scratchpad_size();
}

void Subgraph::perf_count_transformations(lowered::LinearIR& linear_ir) {
void Subgraph::perf_count_transformations(lowered::LinearIR& linear_ir) const {
INTERNAL_OP_SCOPE(Subgraph);
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::perf_count_transformations")

Expand All @@ -485,8 +485,6 @@ snippets::Schedule Subgraph::generate(const BlockedShapeVector& blocked_input_sh
const lowered::pass::PassPipeline& backend_passes_pre_common,
const lowered::pass::PassPipeline& backend_passes_post_common,
const std::shared_ptr<IShapeInferSnippetsFactory>& factory,
}

const void* compile_params) {
data_flow_transformations(blocked_input_shapes, input_precisions, output_precisions, data_flow_backend_passes);
convert_body_to_linear_ir(factory);
Expand All @@ -500,7 +498,6 @@ snippets::Schedule Subgraph::generate_from_linear_ir(const lowered::pass::PassPi
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::generate")
OPENVINO_ASSERT(m_generator != nullptr, "generate is called while generator is not set");


// actual code emission
// Note: some transformations performed in the generator, e.g. tail insertion, can break shape propagation
// until we fix this behavior, we have to make a copy of LIR before giving it to the generator.
Expand Down

0 comments on commit 03752c5

Please sign in to comment.