Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

intermediate_buffer_output_share_memory #3

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/common/snippets/include/snippets/generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class Generator;
* @brief Holds all relevant information produced during lowering
* @param compiled_snippet pointer to interface class that encapsulates compiled binary code
* @param buffer_scratchpad_size the amount of additional memory required by the binary code to execute.
* @param buffer_inplace_output buffer share memory with subgraph output result. -1 means no sharing. i>=0 means share ith output memory.
* Must be allocated and freed by the backend.
*/
class LoweringResult {
Expand All @@ -35,6 +36,7 @@ class LoweringResult {
public:
std::shared_ptr<CompiledSnippet> compiled_snippet = nullptr;
size_t buffer_scratchpad_size = 0;
int buffer_inplace_output = -1;
};

/**
Expand Down
16 changes: 16 additions & 0 deletions src/common/snippets/include/snippets/lowered/linear_ir.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,22 @@ class LinearIR {
*/
exprIt replace_with_expr(const std::vector<ExpressionPtr>& old_exprs, const ExpressionPtr& new_expr);

/**
* @brief Propagate start_expr through zero to several consecutive shape infer exprs(such as reshape, rankNormalization).
* @param start_expr Propagate from start_expr.
* @param downstream Propagate downstream if it's true, otherwise propagate upstream.
* @return shape infer op consumers as a sequence if downstream, or shape infer op sources as a sequence if upstream.
*/
static std::vector<ExpressionPtr> propagate_expr_through_shape_infer_ops(const ExpressionPtr& start_expr, bool downstream);

/**
* @brief Get last shape infer op from start_expr in a sequence. If no shape infer op is connect to start_expr, return start_expr.
* @param start_expr Search from start_expr.
* @param downstream search downstream if it's true, otherwise search upstream.
* @return last shape infer expr
*/
static ExpressionPtr get_last_shape_infer_expr(const ExpressionPtr& start_expr, bool downstream);

private:
std::shared_ptr<ShapeInferSnippetsNode> m_shape_infer = nullptr;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace pass {
class AllocateBuffers: public RangedPass {
public:
OPENVINO_RTTI("AllocateBuffers", "RangedPass")
AllocateBuffers(size_t& buffer_scratchpad_size, bool is_optimized = true);
AllocateBuffers(size_t& buffer_scratchpad_size, int& buffer_inplace_output, bool is_optimized = true);

/**
* @brief Apply the pass to the Linear IR
Expand All @@ -44,8 +44,10 @@ class AllocateBuffers: public RangedPass {

using BufferCluster = std::set<ExpressionPtr>;
using BufferClusters = std::vector<BufferCluster>;

private:
size_t& m_buffer_scratchpad_size;
int& m_buffer_inplace_output;
bool m_is_optimized_mode = true;
};

Expand Down
38 changes: 38 additions & 0 deletions src/common/snippets/include/snippets/op/reshape.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/op/op.hpp"
#include "snippets/shape_inference/shape_inference.hpp"

namespace ov {
namespace snippets {
namespace op {

/**
* @interface Reshape
* @brief Reshape input tensor to reqiured target shape
* @ingroup snippets
*/
class Reshape : public ov::op::Op {
public:
OPENVINO_OP("Reshape", "SnippetsOpset");
Reshape(const Output<Node>& x, ov::PartialShape target_shape);
Reshape() = default;

bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;

const ov::PartialShape& get_target_shape() const;
void set_target_shape(ov::PartialShape shape);

private:
ov::PartialShape m_target_shape = {};
};

} // namespace op
} // namespace snippets
} // namespace ov
2 changes: 2 additions & 0 deletions src/common/snippets/include/snippets/op/subgraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ class Subgraph : public ov::op::util::SubGraphOp {
// Return estimated unique buffer count (upper bound). It's needed for tokenization
static auto get_estimated_buffer_count(const ov::NodeVector& ops) -> size_t;
static auto is_domain_sensitive_op(const std::shared_ptr<ov::Node>& op) -> bool;
static auto is_shape_infer_op(const std::shared_ptr<ov::Node>& op) -> bool;
static auto get_last_shape_infer_op(const std::shared_ptr<ov::Node>& op, bool downstream) -> std::shared_ptr<ov::Node>;

void data_flow_transformations(const BlockedShapeVector& blocked_input_shapes = {},
const std::vector<ov::element::Type>& input_precisions = {},
Expand Down
27 changes: 27 additions & 0 deletions src/common/snippets/include/snippets/pass/gn_decomposition.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/pass/graph_rewrite.hpp"
#include "openvino/pass/pattern/matcher.hpp"

namespace ov {
namespace snippets {
namespace pass {

/**
* @interface GNDecomposition
* @brief Decomposes GroupNormalization to a range of low-level operations
* @ingroup snippets
*/
class GNDecomposition: public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("GNDecomposition", "0");
GNDecomposition();
};

} // namespace pass
} // namespace snippets
} // namespace ov
27 changes: 27 additions & 0 deletions src/common/snippets/include/snippets/pass/gn_tokenization.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/pass/pattern/matcher.hpp"
#include "snippets/pass/tokenization.hpp"

namespace ov {
namespace snippets {
namespace pass {

/**
* @interface TokenizeGNSnippets
* @brief Tokenize GroupNormalization to a subgraph
* @ingroup snippets
*/
class TokenizeGNSnippets : public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("TokenizeGNSnippets", "0");
TokenizeGNSnippets();
};

} // namespace pass
} // namespace snippets
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,12 @@ class ReduceShapeInfer : public IShapeInferSnippets {
Result infer(const std::vector<VectorDimsRef>& input_shapes) override;
};

class ReshapeShapeInfer : public IShapeInferSnippets {
ov::PartialShape target_shape;
public:
explicit ReshapeShapeInfer(const std::shared_ptr<Node>& n);
Result infer(const std::vector<VectorDimsRef>& input_shapes) override;
};

} // namespace snippets
} // namespace ov
1 change: 1 addition & 0 deletions src/common/snippets/include/snippets/snippets_isa.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "op/fill.hpp"
#include "op/kernel.hpp"
#include "op/load.hpp"
#include "op/reshape.hpp"
#include "op/nop.hpp"
#include "op/scalar.hpp"
#include "op/powerstatic.hpp"
Expand Down
1 change: 1 addition & 0 deletions src/common/snippets/include/snippets/snippets_isa_tbl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ OV_OP(LoopBegin, ov::snippets::op)
OV_OP(LoopEnd, ov::snippets::op)
OV_OP(Brgemm, ov::snippets::op)
OV_OP(BroadcastLoad, ov::snippets::op)
OV_OP(Reshape, ov::snippets::op)

OV_OP(Store, ov::snippets::op)

Expand Down
8 changes: 8 additions & 0 deletions src/common/snippets/include/snippets/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,14 @@ VectorDims get_planar_vdims(const snippets::lowered::ExpressionPort& expr_port);
* @return preordered shape: `shape[i]` = `planar_shape[order[i]]` where `shape` is shape before applying the order.
*/
VectorDims get_preordered_vdims(const snippets::lowered::ExpressionPort& expr_port);
/**
* @brief Returns element count of a shape
* @param shape input shape
* @return element count of input shape
*/
inline auto get_shape_size(const VectorDims& shape) -> size_t {
return std::accumulate(shape.begin(), shape.end(), static_cast<size_t>(1), std::multiplies<size_t>());
}
/* --------------------------- */

} // namespace utils
Expand Down
1 change: 1 addition & 0 deletions src/common/snippets/src/generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ RegType Generator::get_op_out_reg_type(const ov::Output<Node>& out) const {
std::dynamic_pointer_cast<op::IntermediateMemoryBuffer>(op) ||
std::dynamic_pointer_cast<op::NewMemoryBuffer>(op) ||
std::dynamic_pointer_cast<op::RankNormalization>(op) ||
std::dynamic_pointer_cast<op::Reshape>(op) ||
std::dynamic_pointer_cast<snippets::op::Store>(op)
#ifdef SNIPPETS_DEBUG_CAPS
|| std::dynamic_pointer_cast<op::PerfCountBeginBase>(op)
Expand Down
81 changes: 77 additions & 4 deletions src/common/snippets/src/lowered/linear_ir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "openvino/core/graph_util.hpp"
#include "openvino/core/type.hpp"
#include "snippets/utils.hpp"
#include "snippets/op/subgraph.hpp"

namespace ov {
namespace snippets {
Expand Down Expand Up @@ -365,10 +366,14 @@ VectorDims LinearIR::get_master_shape() const {
}
// Note: Snippets would benefit from a more generic master_shape calculation approach.
// It will be implemented in the scope of ROI propagation activity (ticket 120505)
const auto& source = out_exprs[0]->get_input_port_connector(0)->get_source();
if (!m_config.m_enable_domain_optimization && out_exprs.size() == 1 &&
ov::is_type<snippets::op::Brgemm>(source.get_expr()->get_node())) {
master_shape = utils::get_preordered_vdims(source);
if (out_exprs.size() == 1) {
const auto& source = out_exprs[0]->get_input_port_connector(0)->get_source();
if (!m_config.m_enable_domain_optimization && ov::is_type<snippets::op::Brgemm>(source.get_expr()->get_node())) {
master_shape = utils::get_preordered_vdims(source);
} else {
auto last_shape_infer_expr = LinearIR::get_last_shape_infer_expr(out_exprs[0], false);
master_shape = utils::get_preordered_vdims(last_shape_infer_expr->get_input_port_connector(0)->get_source());
}
} else {
for (const auto& oe : out_exprs) {
const auto& port_desc = oe->get_input_port_descriptor(0);
Expand Down Expand Up @@ -493,6 +498,74 @@ LinearIR::exprIt LinearIR::replace_with_expr(const std::vector<ExpressionPtr>& o
return replace_with_expr(old_exprs, new_expr, insertion_place);
}

std::vector<ExpressionPtr> LinearIR::propagate_expr_through_shape_infer_ops(const ExpressionPtr& start_expr, bool downstream) {
std::vector<ExpressionPtr> shape_infer_exprs;
auto current_exp = start_expr;
if (op::Subgraph::is_shape_infer_op(current_exp->get_node())) {
shape_infer_exprs.push_back(current_exp);
}
if (downstream) {
if (current_exp->get_output_count() == 0)
return shape_infer_exprs;
auto consumers = current_exp->get_output_port_connector(0)->get_consumers();
auto first_child = consumers.begin()->get_expr();
while (op::Subgraph::is_shape_infer_op(first_child->get_node())) {
OPENVINO_ASSERT(consumers.size() == 1, "Shape infer ops are supposed to be the only consumer.");
shape_infer_exprs.push_back(first_child);
current_exp = first_child;
if (current_exp->get_output_count() == 0)
break;
consumers = current_exp->get_output_port_connector(0)->get_consumers();
first_child = consumers.begin()->get_expr();
}
return shape_infer_exprs;
} else {
// upstream
if (current_exp->get_input_count() == 0)
return shape_infer_exprs;
auto first_source = current_exp->get_input_port_connector(0)->get_source().get_expr();
while (op::Subgraph::is_shape_infer_op(first_source->get_node())) {
shape_infer_exprs.push_back(first_source);
current_exp = first_source;
if (current_exp->get_input_count() == 0)
break;
first_source = current_exp->get_input_port_connector(0)->get_source().get_expr();
}
return shape_infer_exprs;
}
}

ExpressionPtr LinearIR::get_last_shape_infer_expr(const ExpressionPtr& start_expr, bool downstream) {
auto last_exp = start_expr;
if (downstream) {
if (last_exp->get_output_count() == 0)
return last_exp;
auto consumers = last_exp->get_output_port_connector(0)->get_consumers();
auto first_child = consumers.begin()->get_expr();
while (op::Subgraph::is_shape_infer_op(first_child->get_node())) {
OPENVINO_ASSERT(consumers.size() == 1, "Shape infer ops are supposed to be the only consumer.");
last_exp = first_child;
if (last_exp->get_output_count() == 0)
break;
consumers = last_exp->get_output_port_connector(0)->get_consumers();
first_child = consumers.begin()->get_expr();
}
return last_exp;
} else {
// upstream
if (last_exp->get_input_count() == 0)
return last_exp;
auto first_source = last_exp->get_input_port_connector(0)->get_source().get_expr();
while (op::Subgraph::is_shape_infer_op(first_source->get_node())) {
last_exp = first_source;
if (last_exp->get_input_count() == 0)
break;
first_source = last_exp->get_input_port_connector(0)->get_source().get_expr();
}
return last_exp;
}
}

LinearIR::LIRShapeInfer::LIRShapeInfer(container& body_exprs, io_container& io_exprs)
: ShapeInferSnippetsNode(),
m_exprs{std::make_shared<container>(body_exprs)} {
Expand Down
10 changes: 6 additions & 4 deletions src/common/snippets/src/lowered/pass/allocate_buffers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ namespace snippets {
namespace lowered {
namespace pass {

AllocateBuffers::AllocateBuffers(size_t& buffer_scratchpad_size, bool is_optimized)
: m_buffer_scratchpad_size(buffer_scratchpad_size), m_is_optimized_mode(is_optimized) {}
AllocateBuffers::AllocateBuffers(size_t& buffer_scratchpad_size, int& buffer_inplace_output, bool is_optimized)
: m_buffer_scratchpad_size(buffer_scratchpad_size), m_is_optimized_mode(is_optimized), m_buffer_inplace_output(buffer_inplace_output) {}

void AllocateBuffers::set_buffer_offset(const ExpressionPtr& buffer_expr, const size_t offset) {
// If Buffer has offset We set this offset in the connected MemoryAccess ops
Expand All @@ -46,7 +46,8 @@ void AllocateBuffers::set_buffer_offset(const ExpressionPtr& buffer_expr, const
}
}
// Propagate to down: in Load. Buffer can have several Load
const auto& buffer_out = buffer_expr->get_output_port_connector(0);
auto last_shape_infer = ov::snippets::lowered::LinearIR::get_last_shape_infer_expr(buffer_expr, true);
const auto& buffer_out = last_shape_infer->get_output_port_connector(0);
for (const auto& child_expr_input : buffer_out->get_consumers()) {
const auto& child_expr = child_expr_input.get_expr();
const auto port = child_expr_input.get_index();
Expand All @@ -59,7 +60,7 @@ void AllocateBuffers::set_buffer_offset(const ExpressionPtr& buffer_expr, const
continue;
} else {
OPENVINO_THROW(
"Buffer::set_offset() was called when Buffer didn't have the corresponding MemoryAccess op for offset propagation");
"Buffer::set_offset() was called when Buffer didn't have the corresponding MemoryAccess op for offset propagation");
}
}
}
Expand All @@ -77,6 +78,7 @@ bool AllocateBuffers::run(lowered::LinearIR& linear_ir, lowered::LinearIR::const
pipeline.register_pass<SolveBufferMemory>(m_buffer_scratchpad_size, buffer_clusters);
pipeline.register_pass<NormalizeBufferIDs>();
pipeline.run(linear_ir);
m_buffer_inplace_output = 0;
} else {
InitBuffersDefault(m_buffer_scratchpad_size).run(linear_ir, linear_ir.cbegin(), linear_ir.cend());
}
Expand Down
Loading
Loading