Skip to content

Commit

Permalink
[Snippets][CPU] Applied Vladislav comments
Browse files Browse the repository at this point in the history
  • Loading branch information
a-sidorova committed Dec 30, 2024
1 parent ed31224 commit 9bb9646
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,6 @@ using namespace ov::snippets::utils;

namespace ov {
namespace intel_cpu {
namespace {
bool get_is_transposed(const ov::snippets::lowered::ExpressionPtr& expr) {
const auto& layout = expr->get_input_port_descriptor(0)->get_layout();
const auto is_transposed = !layout.empty() && layout.back() != layout.size() - 1;
OV_CPU_JIT_EMITTER_ASSERT(IMPLICATION(is_transposed, (layout[layout.size() - 2] == layout.size() - 1)),
"supports only N dim placed as last or pre last dimension");
return is_transposed;
}
} // namespace

jit_brgemm_copy_b_emitter::jit_brgemm_copy_b_emitter(jit_generator* h,
cpu_isa_t isa,
Expand All @@ -50,7 +41,7 @@ jit_brgemm_copy_b_emitter::jit_brgemm_copy_b_emitter(jit_generator* h,
const auto& src_prc = brgemm_repack->get_src_element_type();
const auto& wei_prc = brgemm_repack->get_input_element_type(0);
const auto wei_N_blk = brgemm_utils::repacking::compute_inner_n_block(wei_prc);
const auto is_transposed = get_is_transposed(expr);
const auto is_transposed = BrgemmCopyB::is_transposed(expr->get_input_port_descriptor(0)->get_layout());
const auto brgemm_type = get_brgemm_type(src_prc, is_transposed);
const auto primitive_isa = brgemm_utils::get_primitive_isa(src_prc, with_amx(brgemm_type));
m_with_comp = with_compensations(brgemm_type);
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/src/nodes/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1015,7 +1015,7 @@ Subgraph::SubgraphExecutor::SubgraphExecutor(const std::shared_ptr<Subgraph::Sub
// each thread should have own buffer to store repacked data
external_buffer_size *= m_nthreads;

// To avoid extra overheads in runtime on unordered_map creation,
// To avoid extra overheads in runtime on vector creation,
// we initialize `repacked_offsets_by_threads` by default here
m_repacked_offsets_by_threads.resize(m_nthreads);
for (size_t i = 0; i < m_repacked_offsets_by_threads.size(); ++i)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ size_t BrgemmCopyB::get_offset_compensations() const {
return get_output_offset(1);
}

bool BrgemmCopyB::is_transposed(const std::vector<size_t>& layout) {
const auto is_transposed = !layout.empty() && layout.back() != layout.size() - 1;
OPENVINO_ASSERT(IMPLICATION(is_transposed, (layout[layout.size() - 2] == layout.size() - 1)),
"supports only N dim placed as last or pre last dimension");
return is_transposed;
}

BrgemmCopyB::ShapeInfer::ShapeInfer(const std::shared_ptr<ov::Node>& n) {
const auto& brg_copyb = ov::as_type_ptr<BrgemmCopyB>(n);
OPENVINO_ASSERT(brg_copyb, "Got invalid node in BrgemmCopyB::ShapeInfer");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ class BrgemmCopyB : public snippets::modifier::MemoryAccess, public ov::op::Op {
Result infer(const std::vector<snippets::VectorDimsRef>& input_shapes) override;
};

static bool is_transposed(const std::vector<size_t>& layout);

private:
void custom_constructor_validate_and_infer_types(std::vector<size_t> layout_input = {});
void validate_element_type(const ov::element::Type& element_type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ pass::EliminateBrgemmCopyB::EliminateBrgemmCopyB() {
return false;

// If there is non-empty and non-planar layout, we should insert reshape to support shape inference
if (!layout.empty() && !ov::snippets::utils::is_planar_layout(layout)) {
if (!ov::snippets::utils::is_planar_layout(layout)) {
const auto& subtensor = in_desc->get_subtensor();
const auto& reshape =
std::make_shared<ov::snippets::op::ReshapeWithOrder>(copy_b_node->input_value(0), layout);
ov::snippets::lowered::PortDescriptorUtils::set_port_descriptor(reshape->input(0), subtensor, layout);
ov::snippets::lowered::PortDescriptorUtils::set_port_descriptor(reshape->output(0), subtensor);
ov::replace_node(copy_b_node, reshape);
ov::replace_node_update_name(copy_b_node, reshape);
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
namespace ov {
namespace intel_cpu {

const size_t BrgemmExternalRepackingAdjuster::brgemm_kernel_rank = 2;

BrgemmExternalRepackingAdjuster::BrgemmExternalRepackingAdjuster(const ov::snippets::lowered::LinearIRCPtr& linear_ir,
const CPURuntimeConfigurator* configurator)
: snippets::lowered::pass::RuntimeOptimizer(configurator) {
Expand All @@ -32,7 +34,9 @@ BrgemmExternalRepackingAdjuster::BrgemmExternalRepackingAdjuster(const ov::snipp
const auto wei_prc = brgemm->get_input_element_type(1);
const auto isa = brgemm_utils::get_primitive_isa(src_prc, brgemm_utils::with_amx(brgemm->get_type()));
const auto inner_n_block = brgemm_utils::repacking::compute_inner_n_block(wei_prc);
auto config = BrgemmCopyBKernelConfig(src_prc, wei_prc, isa, false, false, inner_n_block);
const auto is_transposed_b =
BrgemmCopyB::is_transposed(m_configurator->get_io_descs()[i]->get_layout());
auto config = BrgemmCopyBKernelConfig(src_prc, wei_prc, isa, false, is_transposed_b, inner_n_block);
m_executors[i] = std::make_shared<BrgemmCopyBKernelExecutor>(
static_cast<const CPURuntimeConfigurator*>(m_configurator)->get_cache(),
config);
Expand All @@ -49,13 +53,13 @@ VectorDims BrgemmExternalRepackingAdjuster::get_blk_order(size_t shape_rank) {
return order;
}

VectorDims BrgemmExternalRepackingAdjuster::get_blk_shape(const VectorDims& shape, ov::element::Type prc) {
VectorDims BrgemmExternalRepackingAdjuster::get_blk_shape(const VectorDims& planar_shape, ov::element::Type prc) {
const auto vnni_factor = brgemm_utils::compute_vnni_factor(prc);
const auto K = *++shape.rbegin();
const auto N = *shape.rbegin();
const auto K = *++planar_shape.rbegin();
const auto N = *planar_shape.rbegin();
const auto new_K = snippets::utils::div_up(K, vnni_factor);
const auto new_N = std::max(N, brgemm_utils::repacking::compute_inner_n_block(prc));
VectorDims blk_shape(shape.begin(), shape.end() - brgemm_kernel_rank);
VectorDims blk_shape(planar_shape.begin(), planar_shape.end() - brgemm_kernel_rank);
blk_shape.insert(blk_shape.end(), {new_K, new_N, vnni_factor});
return blk_shape;
}
Expand All @@ -66,9 +70,10 @@ void BrgemmExternalRepackingAdjuster::update_kernel(const RepackExecutorPtr& exe
size_t N,
size_t K,
ov::element::Type prc) {
const auto copy_wei_stride = ov::snippets::utils::get_dim_in_stride(shape, layout, 1) * prc.size();
const auto generic_config = executor->get_config().get_clone_ptr();
auto config = static_cast<BrgemmCopyBKernelConfig*>(generic_config.get());
const auto idx = config->is_transposed_B() ? 0 : 1;
const auto copy_wei_stride = ov::snippets::utils::get_dim_in_stride(shape, layout, idx) * prc.size();
config->update(N, N, K, K, copy_wei_stride, brgemm_utils::repacking::compute_LDB(N, prc));
executor->update_by_config(*config);
}
Expand Down Expand Up @@ -125,6 +130,10 @@ bool BrgemmExternalRepackingAdjuster::run(const snippets::lowered::LinearIR& lin
const auto desc = std::make_shared<CpuBlockedMemoryDesc>(prc, Shape(planar_shape), blk_shape, order);

// Save original input offsets for input before repacking.
// If the shape has not been changed, it means that we already created `RepackedInput` for this input
// on previous pass call and now `cpu_config->io_data_offsets[i]` contains offsets not for original input -
// they were updated for blocked shapes/zeroed for previous initialization and we canonot use them as original
// offsets.
const auto in_offsets =
shape == cpu_config->latest_shapes[i] ? repacked_in.in_offsets() : cpu_config->io_data_offsets[i];

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class BrgemmExternalRepackingAdjuster : public ov::snippets::lowered::pass::Runt
private:
using RepackExecutorPtr = std::shared_ptr<BrgemmCopyBKernelExecutor>;
static VectorDims get_blk_order(size_t shape_rank);
static VectorDims get_blk_shape(const VectorDims& shape, ov::element::Type prc);
static VectorDims get_blk_shape(const VectorDims& planar_shape, ov::element::Type prc);

void update_kernel(const RepackExecutorPtr& executor,
const VectorDims& shape,
Expand All @@ -39,7 +39,7 @@ class BrgemmExternalRepackingAdjuster : public ov::snippets::lowered::pass::Runt
size_t K,
ov::element::Type prc);

const static size_t brgemm_kernel_rank = 2;
static const size_t brgemm_kernel_rank;
std::unordered_map<size_t, RepackExecutorPtr> m_executors;
};

Expand Down

0 comments on commit 9bb9646

Please sign in to comment.