From 982e2c25d99c2a71dcd52a7d75e3b8a72592ddf7 Mon Sep 17 00:00:00 2001 From: chenhu-wang Date: Tue, 14 Jan 2025 04:03:58 +0800 Subject: [PATCH] refactor tpp on x64 and aarch64 --- src/plugins/intel_cpu/CMakeLists.txt | 8 ++- .../snippets/aarch64/cpu_generator.cpp | 9 ++- .../snippets/aarch64/cpu_generator.hpp | 2 +- .../src/emitters/snippets/brgemm_base.cpp | 2 +- .../emitters/snippets/x64/cpu_generator.cpp | 4 +- .../aarch64/jit_brgemm_emitter.cpp | 7 ++- .../aarch64/jit_brgemm_emitter.hpp | 7 ++- .../aarch64/kernel_executors/brgemm.cpp | 7 ++- .../aarch64/kernel_executors/brgemm.hpp | 0 .../src/emitters/tpp/common/utils.hpp | 36 +++++++++++ .../emitters/tpp/x64/jit_brgemm_emitter.cpp | 7 ++- .../src/emitters/tpp/x64/jit_tpp_emitter.cpp | 17 +---- .../src/emitters/tpp/x64/jit_tpp_emitter.hpp | 1 - src/plugins/intel_cpu/src/emitters/utils.hpp | 19 ------ src/plugins/intel_cpu/src/nodes/subgraph.cpp | 43 ++++++------- .../snippets/aarch64/shape_inference.cpp | 2 +- .../x64/pass/brgemm_to_brgemm_cpu.cpp | 2 +- .../snippets/x64/shape_inference.cpp | 2 +- .../pass/lowered/brgemm_tpp_blocking.cpp | 63 +++++++++++++++++++ .../pass/lowered/brgemm_tpp_blocking.hpp | 55 ++++++++++++++++ .../tpp/{x64 => common}/op/brgemm.cpp | 0 .../tpp/{x64 => common}/op/brgemm.hpp | 2 +- .../tpp/{x64 => common}/op/modifiers.hpp | 0 .../pass/brgemm_to_brgemm_tpp.cpp | 16 ++--- .../pass/brgemm_to_brgemm_tpp.hpp | 0 .../pass/lowered/set_tpp_leading_dim.cpp | 9 +-- .../pass/lowered/set_tpp_leading_dim.hpp | 0 .../transformations/tpp/x64/op/eltwise.hpp | 2 +- .../transformations/tpp/x64/op/equation.hpp | 2 +- .../src/transformations/tpp/x64/op/reduce.hpp | 3 +- .../src/transformations/tpp/x64/op/scalar.cpp | 1 - .../src/transformations/tpp/x64/op/scalar.hpp | 2 +- .../x64/pass/lowered/brgemm_tpp_blocking.cpp | 4 +- .../x64/pass/lowered/brgemm_tpp_blocking.hpp | 4 +- .../tpp/x64/pass/scalar_to_scalar_tpp.cpp | 2 +- .../transformation_pipeline.cpp | 7 +-- .../x64/lowered/brgemm_blocking.cpp | 6 +- .../intel_cpu/thirdparty/CMakeLists.txt | 3 +- 38 files changed, 248 insertions(+), 108 deletions(-) rename src/plugins/intel_cpu/src/emitters/{snippets => tpp}/aarch64/jit_brgemm_emitter.cpp (91%) rename src/plugins/intel_cpu/src/emitters/{snippets => tpp}/aarch64/jit_brgemm_emitter.hpp (80%) rename src/plugins/intel_cpu/src/emitters/{snippets => tpp}/aarch64/kernel_executors/brgemm.cpp (97%) rename src/plugins/intel_cpu/src/emitters/{snippets => tpp}/aarch64/kernel_executors/brgemm.hpp (100%) create mode 100644 src/plugins/intel_cpu/src/emitters/tpp/common/utils.hpp create mode 100644 src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.cpp create mode 100644 src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.hpp rename src/plugins/intel_cpu/src/transformations/tpp/{x64 => common}/op/brgemm.cpp (100%) rename src/plugins/intel_cpu/src/transformations/tpp/{x64 => common}/op/brgemm.hpp (96%) rename src/plugins/intel_cpu/src/transformations/tpp/{x64 => common}/op/modifiers.hpp (100%) rename src/plugins/intel_cpu/src/transformations/tpp/{x64 => common}/pass/brgemm_to_brgemm_tpp.cpp (98%) rename src/plugins/intel_cpu/src/transformations/tpp/{x64 => common}/pass/brgemm_to_brgemm_tpp.hpp (100%) rename src/plugins/intel_cpu/src/transformations/tpp/{x64 => common}/pass/lowered/set_tpp_leading_dim.cpp (99%) rename src/plugins/intel_cpu/src/transformations/tpp/{x64 => common}/pass/lowered/set_tpp_leading_dim.hpp (100%) diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt index 97bf72f77bac76..eb07b2e57cb3fc 100644 --- a/src/plugins/intel_cpu/CMakeLists.txt +++ b/src/plugins/intel_cpu/CMakeLists.txt @@ -160,7 +160,7 @@ if(ENABLE_CPU_DEBUG_CAPS) add_definitions(-DCPU_DEBUG_CAPS) endif() -if(AARCH64 OR ARM) +if(AARCH64 AND (NOT ANDROID)) set(ENABLE_SNIPPETS_LIBXSMM_TPP ON) endif() @@ -202,7 +202,9 @@ if(NOT X86_64) ${CMAKE_CURRENT_SOURCE_DIR}/src/nodes/kernels/x64/* ${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/plugin/x64/* ${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/snippets/x64/* - ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/cpu_opset/x64/*) + ${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/tpp/x64/* + ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/cpu_opset/x64/* + ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/tpp/x64/*) endif() if (AARCH64) @@ -212,7 +214,9 @@ endif() if(NOT (AARCH64 OR ARM)) list(APPEND EXCLUDE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/cpu_opset/arm/* + ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/tpp/aarch64/* ${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/plugin/aarch64/* + ${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/tpp/aarch64/* ${CMAKE_CURRENT_SOURCE_DIR}/src/nodes/executors/aarch64/* ${CMAKE_CURRENT_SOURCE_DIR}/src/nodes/kernels/aarch64/*) endif() diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp index 841dd1b19fbeda..48f71a4491aeb0 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp @@ -6,7 +6,6 @@ #include "emitters/plugin/aarch64/jit_conversion_emitters.hpp" #include "emitters/plugin/aarch64/jit_eltwise_emitters.hpp" -#include "emitters/snippets/aarch64/jit_brgemm_emitter.hpp" #include "emitters/snippets/aarch64/jit_fill_emitter.hpp" #include "emitters/snippets/aarch64/jit_kernel_emitter.hpp" #include "emitters/snippets/aarch64/jit_loop_emitters.hpp" @@ -25,7 +24,11 @@ #include "snippets/snippets_isa.hpp" #include "transformations/cpu_opset/common/op/swish_cpu.hpp" #include "transformations/snippets/common/op/fused_mul_add.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" + +#ifdef SNIPPETS_LIBXSMM_TPP +# include "emitters/tpp/aarch64/jit_brgemm_emitter.hpp" +# include "transformations/tpp/common/op/brgemm.hpp" +#endif namespace ov { @@ -205,9 +208,11 @@ CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, jitters[ov::intel_cpu::SwishNode::get_type_info_static()] = CREATE_CPU_EMITTER(jit_swish_emitter); jitters[ov::op::v0::Tanh::get_type_info_static()] = CREATE_CPU_EMITTER(jit_tanh_emitter); +#ifdef SNIPPETS_LIBXSMM_TPP // brgemm jitters[ov::intel_cpu::tpp::op::BrgemmTPP::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_brgemm_emitter, configurator->get_kernel_executor_table(), compiled_kernel_cache); +#endif // control flow jitters[snippets::op::KernelStatic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_kernel_static_emitter); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.hpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.hpp index a120e7cf424ac3..90c2662e33d070 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024-2025 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // diff --git a/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.cpp b/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.cpp index 2c2c7d50d3ef6b..9bc99888463f24 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.cpp @@ -262,6 +262,7 @@ void BrgemmBaseKernelExecutor::update_config(const ov::snippets::lowered::Expres // In case of data repacking LDB is chosen in accordance with repacking buffer size if (with_repacking(brgemm_node->get_type())) LDB = DIM_CAST(brgemm_utils::repacking::compute_LDB(LDB, brgemm_node->get_input_element_type(1))); + config.update(DIM_CAST(M), DIM_CAST(N), DIM_CAST(K), LDA, LDB, LDC, beta); } @@ -327,7 +328,6 @@ void BrgemmBaseKernelExecutor::execute_brgemm_kernel( brgemm_p.do_post_ops = with_comp; brgemm_p.do_apply_comp = with_comp; brgemm_p.skip_accm = 0; - brgemm_p.BS = 1; // default value OV_CPU_JIT_EMITTER_ASSERT(kernel, "has nullptr Brgemm kernel"); (*kernel)(&brgemm_p); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp index 6bc05dbc43b41d..5c0882c6b9666c 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp @@ -45,10 +45,10 @@ # include "emitters/tpp/x64/jit_eltwise_emitters.hpp" # include "emitters/tpp/x64/jit_equation_emitter.hpp" # include "emitters/tpp/x64/jit_scalar_emitter.hpp" -# include "transformations/tpp/x64/op/brgemm.hpp" +# include "transformations/tpp/common/op/brgemm.hpp" +# include "transformations/tpp/common/op/modifiers.hpp" # include "transformations/tpp/x64/op/eltwise.hpp" # include "transformations/tpp/x64/op/equation.hpp" -# include "transformations/tpp/x64/op/modifiers.hpp" # include "transformations/tpp/x64/op/reduce.hpp" # include "transformations/tpp/x64/op/scalar.hpp" // Note: for reference implementations diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.cpp b/src/plugins/intel_cpu/src/emitters/tpp/aarch64/jit_brgemm_emitter.cpp similarity index 91% rename from src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.cpp rename to src/plugins/intel_cpu/src/emitters/tpp/aarch64/jit_brgemm_emitter.cpp index 5d705a755ca492..ac57ebbad42ab7 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/aarch64/jit_brgemm_emitter.cpp @@ -5,7 +5,7 @@ #include "jit_brgemm_emitter.hpp" #include "snippets/utils/utils.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" using namespace Xbyak_aarch64; @@ -42,7 +42,10 @@ void jit_brgemm_emitter::validate_arguments(const std::vector& in, const OV_CPU_JIT_EMITTER_ASSERT(out.size() == 1, "Expects 1 output reg, got" + std::to_string(out.size())); } -void jit_brgemm_emitter::emit_code(const std::vector& in, const std::vector& out) const { +void jit_brgemm_emitter::emit_code(const std::vector& in, + const std::vector& out, + const std::vector& pool_vec_idxs, + const std::vector& pool_gpr_idxs) const { validate_arguments(in, out); emit_impl(in, out); } diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.hpp b/src/plugins/intel_cpu/src/emitters/tpp/aarch64/jit_brgemm_emitter.hpp similarity index 80% rename from src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.hpp rename to src/plugins/intel_cpu/src/emitters/tpp/aarch64/jit_brgemm_emitter.hpp index 83e46631ac8030..d98e97800e4b6e 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.hpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/aarch64/jit_brgemm_emitter.hpp @@ -5,7 +5,7 @@ #pragma once #include "emitters/plugin/aarch64/jit_emitter.hpp" -#include "emitters/snippets/aarch64/kernel_executors/brgemm.hpp" +#include "emitters/tpp/aarch64/kernel_executors/brgemm.hpp" namespace ov { namespace intel_cpu { @@ -26,7 +26,10 @@ class jit_brgemm_emitter : public jit_emitter { static std::set> get_supported_precisions( const std::shared_ptr& node = nullptr); - void emit_code(const std::vector& in, const std::vector& out) const; + void emit_code(const std::vector& in_idxs, + const std::vector& out_idxs, + const std::vector& pool_vec_idxs = {}, + const std::vector& pool_gpr_idxs = {}) const override; private: void validate_arguments(const std::vector& in, const std::vector& out) const override; diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.cpp b/src/plugins/intel_cpu/src/emitters/tpp/aarch64/kernel_executors/brgemm.cpp similarity index 97% rename from src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.cpp rename to src/plugins/intel_cpu/src/emitters/tpp/aarch64/kernel_executors/brgemm.cpp index 05d8fc4c5939ff..ae3a77022f3a31 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.cpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/aarch64/kernel_executors/brgemm.cpp @@ -4,7 +4,8 @@ #include "brgemm.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" +#include "emitters/tpp/common/utils.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" using namespace Xbyak; using namespace dnnl::impl; @@ -34,8 +35,8 @@ BrgemmKernelConfig::StaticParams::StaticParams(const element::Type& in0_dtype, const element::Type& in1_dtype, dnnl::impl::cpu::aarch64::cpu_isa_t primitive_isa) : StaticBaseParams(in0_dtype, in1_dtype, dnnl::impl::cpu::x64::cpu_isa_t::isa_undef, compute_hash(primitive_isa)) { - m_type_in0 = ov_to_xsmm_dtype(in0_dtype); - m_type_in1 = ov_to_xsmm_dtype(in1_dtype); + m_type_in0 = tpp::ov_to_xsmm_dtype(in0_dtype); + m_type_in1 = tpp::ov_to_xsmm_dtype(in1_dtype); m_type_exec = LIBXSMM_DATATYPE_F32; m_type_out0 = LIBXSMM_DATATYPE_F32; m_compile_flags = LIBXSMM_GEMM_FLAGS('N', 'N'); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.hpp b/src/plugins/intel_cpu/src/emitters/tpp/aarch64/kernel_executors/brgemm.hpp similarity index 100% rename from src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.hpp rename to src/plugins/intel_cpu/src/emitters/tpp/aarch64/kernel_executors/brgemm.hpp diff --git a/src/plugins/intel_cpu/src/emitters/tpp/common/utils.hpp b/src/plugins/intel_cpu/src/emitters/tpp/common/utils.hpp new file mode 100644 index 00000000000000..f4778a0b7553f3 --- /dev/null +++ b/src/plugins/intel_cpu/src/emitters/tpp/common/utils.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "emitters/utils.hpp" +#include "libxsmm.h" + +namespace ov { +namespace intel_cpu { +namespace tpp { + +inline libxsmm_datatype ov_to_xsmm_dtype(ov::element::Type_t elemet_type) { + switch (elemet_type) { + case ov::element::Type_t::f32: + return LIBXSMM_DATATYPE_F32; + case ov::element::Type_t::bf16: + return LIBXSMM_DATATYPE_BF16; + case ov::element::Type_t::f16: + return LIBXSMM_DATATYPE_F16; + case ov::element::Type_t::i8: + return LIBXSMM_DATATYPE_I8; + case ov::element::Type_t::u8: + return LIBXSMM_DATATYPE_U8; + default: + OV_CPU_JIT_EMITTER_THROW("Attempt to convert unsupported ov data type"); + return LIBXSMM_DATATYPE_IMPLICIT; + } +} + +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_brgemm_emitter.cpp b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_brgemm_emitter.cpp index 0fcb394a8a5bde..ef7bc2638fa454 100644 --- a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_brgemm_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_brgemm_emitter.cpp @@ -4,7 +4,8 @@ #include "jit_brgemm_emitter.hpp" #include "emitters/snippets/x64/jit_snippets_emitters.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" +#include "emitters/tpp/common/utils.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" using jit_generator = dnnl::impl::cpu::x64::jit_generator; using cpu_isa_t = dnnl::impl::cpu::x64::cpu_isa_t; @@ -32,8 +33,8 @@ BrgemmTppEmitter::BrgemmTppEmitter(jit_generator* h, cpu_isa_t isa, const Expres brgemm_node->get_input_stride(1), brgemm_node->get_output_stride(0)}; - auto in_0_prec = ov_to_xsmm_dtype(brgemm_node->get_input_element_type(0)); - auto in_1_prec = ov_to_xsmm_dtype(brgemm_node->get_input_element_type(1)); + auto in_0_prec = tpp::ov_to_xsmm_dtype(brgemm_node->get_input_element_type(0)); + auto in_1_prec = tpp::ov_to_xsmm_dtype(brgemm_node->get_input_element_type(1)); exec_dtype = in_0_prec == LIBXSMM_DATATYPE_I8 || in_0_prec == LIBXSMM_DATATYPE_U8 ? LIBXSMM_DATATYPE_I32 : LIBXSMM_DATATYPE_F32; diff --git a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp index cb18f69082e1b2..19b4f8c7f40b52 100644 --- a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp @@ -6,6 +6,7 @@ #include "snippets/lowered/port_descriptor.hpp" #include "transformations/tpp/x64/op/eltwise.hpp" #include "emitters/plugin/x64/utils.hpp" +#include "emitters/tpp/common/utils.hpp" using namespace Xbyak; using namespace dnnl::impl; @@ -55,7 +56,7 @@ TppEmitter::TppEmitter(dnnl::impl::cpu::x64::jit_generator* h, }; for (size_t i = 0; i < num_ins; i++) { - io_dtypes[i] = ov_to_xsmm_dtype(node->get_input_element_type(i)); + io_dtypes[i] = tpp::ov_to_xsmm_dtype(node->get_input_element_type(i)); io_offsets[i] = tpp_mod->get_input_offset(i); io_strides[i] = replace_full_dim(tpp_mod->get_input_stride(i), expr->get_input_port_descriptor(i)->get_shape().back()); @@ -64,7 +65,7 @@ TppEmitter::TppEmitter(dnnl::impl::cpu::x64::jit_generator* h, for (size_t i = 0; i < num_outs; i++) { const auto i_off = i + num_ins; - io_dtypes[i_off] = ov_to_xsmm_dtype(node->get_output_element_type(i)); + io_dtypes[i_off] = tpp::ov_to_xsmm_dtype(node->get_output_element_type(i)); io_offsets[i_off] = tpp_mod->get_output_offset(i); io_strides[i_off] = replace_full_dim(tpp_mod->get_output_stride(i), expr->get_output_port_descriptor(i)->get_shape().back()); @@ -114,17 +115,5 @@ void TppEmitter::emit_impl(const std::vector& in, const std::vector &in, const std::vector &out) const; - static libxsmm_datatype ov_to_xsmm_dtype(ov::element::Type_t elemet_type); protected: void emit_impl(const std::vector& in, diff --git a/src/plugins/intel_cpu/src/emitters/utils.hpp b/src/plugins/intel_cpu/src/emitters/utils.hpp index 7d4d5318062427..92df781a1fc318 100644 --- a/src/plugins/intel_cpu/src/emitters/utils.hpp +++ b/src/plugins/intel_cpu/src/emitters/utils.hpp @@ -6,7 +6,6 @@ #include -#include "libxsmm.h" #include "openvino/core/except.hpp" #include "openvino/core/type/element_type.hpp" @@ -24,23 +23,5 @@ std::string jit_emitter_pretty_name(const std::string& pretty_func); #define OV_CPU_JIT_EMITTER_THROW(...) OPENVINO_THROW(OV_CPU_JIT_EMITTER_NAME, ": ", __VA_ARGS__) #define OV_CPU_JIT_EMITTER_ASSERT(cond, ...) OPENVINO_ASSERT((cond), OV_CPU_JIT_EMITTER_NAME, ": ", __VA_ARGS__) -inline libxsmm_datatype ov_to_xsmm_dtype(ov::element::Type_t elemet_type) { - switch (elemet_type) { - case ov::element::Type_t::f32: - return LIBXSMM_DATATYPE_F32; - case ov::element::Type_t::bf16: - return LIBXSMM_DATATYPE_BF16; - case ov::element::Type_t::f16: - return LIBXSMM_DATATYPE_F16; - case ov::element::Type_t::i8: - return LIBXSMM_DATATYPE_I8; - case ov::element::Type_t::u8: - return LIBXSMM_DATATYPE_U8; - default: - OV_CPU_JIT_EMITTER_THROW("Attempt to convert unsupported ov data type"); - return LIBXSMM_DATATYPE_IMPLICIT; - } -} - } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index a13d00ea9a9b91..7d842f349f5278 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -51,12 +51,16 @@ #ifdef SNIPPETS_LIBXSMM_TPP # include "snippets/lowered/pass/optimize_domain.hpp" -# include "transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.hpp" -# include "transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.hpp" -# include "transformations/tpp/x64/pass/fuse_tpp_to_equations.hpp" -# include "transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp" -# include "transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.hpp" -# include "transformations/tpp/x64/pass/scalar_to_scalar_tpp.hpp" +# include "transformations/tpp/common/pass/brgemm_to_brgemm_tpp.hpp" +# include "transformations/tpp/common/pass/lowered/set_tpp_leading_dim.hpp" +# if defined(OPENVINO_ARCH_ARM64) +# include "transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.hpp" +# else +# include "transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.hpp" +# include "transformations/tpp/x64/pass/fuse_tpp_to_equations.hpp" +# include "transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp" +# include "transformations/tpp/x64/pass/scalar_to_scalar_tpp.hpp" +# endif #endif namespace ov { @@ -492,9 +496,6 @@ Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() { ov::intel_cpu::pass::EliminateBrgemmCopyB); SNIPPETS_REGISTER_PASS_ABSOLUTE_X86_64(Place::PipelineEnd, ov::intel_cpu::pass::RemoveConverts); SNIPPETS_REGISTER_PASS_ABSOLUTE_COMMON(Place::PipelineEnd, ov::intel_cpu::pass::MulAddToFMA); - SNIPPETS_REGISTER_PASS_RELATIVE_ARM64(Place::Before, - ov::snippets::pass::PropagatePrecision, - ov::intel_cpu::tpp::pass::BrgemmToBrgemmTPP); #ifdef SNIPPETS_LIBXSMM_TPP SNIPPETS_REGISTER_PASS_RELATIVE_X86_64(Place::Before, @@ -508,6 +509,9 @@ Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() { SNIPPETS_REGISTER_PASS_RELATIVE_X86_64(Place::After, ov::intel_cpu::tpp::pass::EltwiseToEltwiseTPP, ov::intel_cpu::tpp::pass::FuseTPPToEquations); + SNIPPETS_REGISTER_PASS_RELATIVE_ARM64(Place::Before, + ov::snippets::pass::PropagatePrecision, + ov::intel_cpu::tpp::pass::BrgemmToBrgemmTPP); #endif #undef SNIPPETS_REGISTER_PASS_ABSOLUTE_COMMON @@ -522,11 +526,10 @@ Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() { Subgraph::ControlFlowPasses Subgraph::getControlFlowPasses() const { ControlFlowPasses backend_passes; +#if defined(OPENVINO_ARCH_X86_64) || defined(OPENVINO_ARCH_ARM64) using PassPosition = ov::snippets::pass::PassPosition; using Place = PassPosition::Place; -#define SNIPPETS_REGISTER_PASS_RELATIVE_COMMON(PASS_PLACE, TARGET_PASS, PASS, ...) \ - backend_passes.emplace_back(PassPosition(PASS_PLACE, TARGET_PASS::get_type_info_static()), \ - std::make_shared(__VA_ARGS__)) +#endif #if defined(OPENVINO_ARCH_X86_64) # define SNIPPETS_REGISTER_PASS_RELATIVE_X86_64(PASS_PLACE, TARGET_PASS, PASS, ...) \ @@ -559,23 +562,21 @@ Subgraph::ControlFlowPasses Subgraph::getControlFlowPasses() const { ov::snippets::lowered::pass::InsertBuffers, ov::intel_cpu::pass::InsertBrgemmCopyBuffers); - SNIPPETS_REGISTER_PASS_RELATIVE_ARM64(Place::After, - ov::snippets::lowered::pass::MarkLoops, - ov::intel_cpu::tpp::pass::BrgemmTPPBlocking); - SNIPPETS_REGISTER_PASS_RELATIVE_ARM64(Place::After, - ov::snippets::lowered::pass::InsertLoops, - ov::intel_cpu::tpp::pass::SetTPPLeadingDim); - #ifdef SNIPPETS_LIBXSMM_TPP SNIPPETS_REGISTER_PASS_RELATIVE_X86_64(Place::Before, ov::intel_cpu::pass::BrgemmCPUBlocking, - ov::intel_cpu::tpp::pass::BrgemmTPPBlocking); + ov::intel_cpu::tpp::pass::x64::BrgemmTPPBlocking); SNIPPETS_REGISTER_PASS_RELATIVE_X86_64(Place::After, ov::intel_cpu::pass::FuseLoadStoreConvert, ov::intel_cpu::tpp::pass::SetTPPLeadingDim); + SNIPPETS_REGISTER_PASS_RELATIVE_ARM64(Place::After, + ov::snippets::lowered::pass::MarkLoops, + ov::intel_cpu::tpp::pass::aarch64::BrgemmTPPBlocking); + SNIPPETS_REGISTER_PASS_RELATIVE_ARM64(Place::After, + ov::snippets::lowered::pass::InsertLoops, + ov::intel_cpu::tpp::pass::SetTPPLeadingDim); #endif -#undef SNIPPETS_REGISTER_PASS_RELATIVE_COMMON #undef SNIPPETS_REGISTER_PASS_RELATIVE_X86_64 #undef SNIPPETS_REGISTER_PASS_RELATIVE_ARM64 return backend_passes; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/aarch64/shape_inference.cpp b/src/plugins/intel_cpu/src/transformations/snippets/aarch64/shape_inference.cpp index ab07227c328906..041dedb06f8896 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/aarch64/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/aarch64/shape_inference.cpp @@ -7,7 +7,7 @@ #include "snippets/shape_inference/shape_infer_instances.hpp" #include "transformations/cpu_opset/common/op/swish_cpu.hpp" #include "transformations/snippets/common/op/fused_mul_add.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" namespace ov { namespace snippets { diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp index 48456b8220300a..0d0176c164e5d2 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp @@ -16,7 +16,7 @@ #include "transformations/snippets/x64/op/brgemm_copy_b.hpp" #include "transformations/snippets/x64/op/brgemm_cpu.hpp" #include "transformations/snippets/x64/op/brgemm_utils.hpp" -#include "transformations/tpp/x64/op/modifiers.hpp" +#include "transformations/tpp/common/op/modifiers.hpp" #include "utils/general_utils.h" namespace ov { diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/shape_inference.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/shape_inference.cpp index 50a2399e93ecc4..14d72c3d3d0969 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/shape_inference.cpp @@ -14,7 +14,7 @@ #include "transformations/cpu_opset/common/op/swish_cpu.hpp" #include "transformations/snippets/common/op/fused_mul_add.hpp" #ifdef SNIPPETS_LIBXSMM_TPP -# include "transformations/tpp/x64/op/brgemm.hpp" +# include "transformations/tpp/common/op/brgemm.hpp" # include "transformations/tpp/x64/op/equation.hpp" # include "transformations/tpp/x64/op/reduce.hpp" # include "transformations/tpp/x64/op/scalar.hpp" diff --git a/src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.cpp b/src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.cpp new file mode 100644 index 00000000000000..70fa593f0e4a92 --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.cpp @@ -0,0 +1,63 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "brgemm_tpp_blocking.hpp" + +#include "snippets/itt.hpp" +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" +#include "snippets/snippets_isa.hpp" +#include "snippets/utils/utils.hpp" + +namespace ov { +namespace intel_cpu { +namespace tpp { +namespace pass { +namespace aarch64 { +using namespace ov::snippets::utils; + +bool BrgemmTPPBlocking::SetBrgemmBeta::run(ov::snippets::lowered::LinearIR& linear_ir, + ov::snippets::lowered::LinearIR::constExprIt begin, + ov::snippets::lowered::LinearIR::constExprIt end) { + for (auto expr_it = begin; expr_it != end; ++expr_it) { + if (const auto brgemm = ov::as_type_ptr(expr_it->get()->get_node())) + brgemm->set_beta(0); + } + return true; +} + +std::shared_ptr BrgemmTPPBlocking::SetBrgemmBeta::merge( + const std::shared_ptr& other) { + return !other || ov::is_type(other) ? std::make_shared() : nullptr; +} + +std::tuple BrgemmTPPBlocking::get_blocking_params( + const ov::snippets::lowered::ExpressionPtr& brgemm_expr) const { + size_t m, n, k; + std::tie(m, n, k) = get_brgemm_dimensions(brgemm_expr); + OPENVINO_ASSERT(!is_dynamic_value(m) && !is_dynamic_value(n) && !is_dynamic_value(n), + "BrgemmTPP doesn't support dynamic shapes"); + + size_t m_blk, n_blk, k_blk; + std::tie(m_blk, n_blk, k_blk) = BrgemmBlockingBase::get_blocking_params(brgemm_expr); + + auto get_projected_blk = [](const size_t dim, const size_t blk) { + return ov::snippets::utils::is_full_dim_value(blk) ? dim : blk; + }; + return std::make_tuple(get_projected_blk(m, m_blk), get_projected_blk(n, n_blk), get_projected_blk(k, k_blk)); +} + +ov::snippets::lowered::SpecificIterationHandlers BrgemmTPPBlocking::get_k_loop_handlers(size_t work_amount, + size_t block_size) const { + ov::snippets::lowered::SpecificIterationHandlers handlers = + ov::snippets::lowered::pass::BrgemmBlockingBase::get_k_loop_handlers(work_amount, block_size); + handlers.register_pass(); + return handlers; +} + +} // namespace aarch64 +} // namespace pass +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.hpp b/src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.hpp new file mode 100644 index 00000000000000..5df6f198e34533 --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.hpp @@ -0,0 +1,55 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "snippets/lowered/pass/brgemm_blocking.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" + +namespace ov { +namespace intel_cpu { +namespace tpp { +namespace pass { +namespace aarch64 { +/** + * @interface BrgemmTPPBlocking + * @brief Covers BrgemmTPP with blocking loops + * @ingroup snippets + */ + +class BrgemmTPPBlocking : public ov::snippets::lowered::pass::BrgemmBlocking { +public: + OPENVINO_RTTI("BrgemmTPPBlocking", + "tpp::op::BrgemmTPP", + snippets::lowered::pass::BrgemmBlocking); + + /** + * @interface SetBrgemmBeta + * @brief The pass set `beta = 0` to BrgemmTPP. + * Note: the pass is in public section to have opportunity to validate blocking loop in tests + * @ingroup snippets + */ + class SetBrgemmBeta : public snippets::lowered::pass::RangedPass { + public: + OPENVINO_RTTI("SetBrgemmBeta", "0", snippets::lowered::pass::RangedPass); + SetBrgemmBeta() = default; + bool run(ov::snippets::lowered::LinearIR& linear_ir, + ov::snippets::lowered::LinearIR::constExprIt begin, + ov::snippets::lowered::LinearIR::constExprIt end) override; + std::shared_ptr merge( + const std::shared_ptr& other) override; + }; + +private: + std::tuple get_blocking_params( + const ov::snippets::lowered::ExpressionPtr& brgemm_expr) const override; + ov::snippets::lowered::SpecificIterationHandlers get_k_loop_handlers(size_t work_amount, + size_t block_size) const override; +}; + +} // namespace aarch64 +} // namespace pass +} // namespace tpp +} // namespace intel_cpu +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.cpp b/src/plugins/intel_cpu/src/transformations/tpp/common/op/brgemm.cpp similarity index 100% rename from src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.cpp rename to src/plugins/intel_cpu/src/transformations/tpp/common/op/brgemm.cpp diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.hpp b/src/plugins/intel_cpu/src/transformations/tpp/common/op/brgemm.hpp similarity index 96% rename from src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.hpp rename to src/plugins/intel_cpu/src/transformations/tpp/common/op/brgemm.hpp index c9199c3c7f82df..1729262f4a4f7a 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/common/op/brgemm.hpp @@ -4,8 +4,8 @@ #pragma once -#include "transformations/snippets/x64/op/brgemm_cpu.hpp" #include "modifiers.hpp" +#include "snippets/op/brgemm.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/modifiers.hpp b/src/plugins/intel_cpu/src/transformations/tpp/common/op/modifiers.hpp similarity index 100% rename from src/plugins/intel_cpu/src/transformations/tpp/x64/op/modifiers.hpp rename to src/plugins/intel_cpu/src/transformations/tpp/common/op/modifiers.hpp diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.cpp b/src/plugins/intel_cpu/src/transformations/tpp/common/pass/brgemm_to_brgemm_tpp.cpp similarity index 98% rename from src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.cpp rename to src/plugins/intel_cpu/src/transformations/tpp/common/pass/brgemm_to_brgemm_tpp.cpp index 53992b1e67da9c..b69a69aa24847a 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/common/pass/brgemm_to_brgemm_tpp.cpp @@ -2,22 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/itt.hpp" - #include "brgemm_to_brgemm_tpp.hpp" -#include "snippets/utils/utils.hpp" -#include "snippets/op/brgemm.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" - +#include "cpu_shape.h" #include "openvino/core/rt_info.hpp" -#include "openvino/pass/pattern/op/wrap_type.hpp" #include "openvino/pass/pattern/matcher.hpp" - -#include "cpu_shape.h" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "snippets/itt.hpp" +#include "snippets/op/brgemm.hpp" +#include "snippets/utils/utils.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" #include "utils/general_utils.h" - namespace ov { namespace intel_cpu { namespace tpp { diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.hpp b/src/plugins/intel_cpu/src/transformations/tpp/common/pass/brgemm_to_brgemm_tpp.hpp similarity index 100% rename from src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.hpp rename to src/plugins/intel_cpu/src/transformations/tpp/common/pass/brgemm_to_brgemm_tpp.hpp diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp b/src/plugins/intel_cpu/src/transformations/tpp/common/pass/lowered/set_tpp_leading_dim.cpp similarity index 99% rename from src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp rename to src/plugins/intel_cpu/src/transformations/tpp/common/pass/lowered/set_tpp_leading_dim.cpp index dcd97fdd74b638..2331e32e5f2f1c 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/common/pass/lowered/set_tpp_leading_dim.cpp @@ -2,13 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "snippets/itt.hpp" -#include "snippets/op/buffer.hpp" -#include "transformations/tpp/x64/op/modifiers.hpp" #include "set_tpp_leading_dim.hpp" -#include "snippets/op/brgemm.hpp" + +#include "snippets/itt.hpp" #include "snippets/lowered/loop_manager.hpp" +#include "snippets/op/brgemm.hpp" +#include "snippets/op/buffer.hpp" #include "snippets/utils/utils.hpp" +#include "transformations/tpp/common/op/modifiers.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.hpp b/src/plugins/intel_cpu/src/transformations/tpp/common/pass/lowered/set_tpp_leading_dim.hpp similarity index 100% rename from src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.hpp rename to src/plugins/intel_cpu/src/transformations/tpp/common/pass/lowered/set_tpp_leading_dim.hpp diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.hpp index a61668c2a04328..ed34c97f7abd3b 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.hpp @@ -4,7 +4,6 @@ #pragma once -#include "modifiers.hpp" #include "openvino/op/add.hpp" #include "openvino/op/subtract.hpp" #include "openvino/op/multiply.hpp" @@ -13,6 +12,7 @@ #include "openvino/op/relu.hpp" #include "snippets/op/powerstatic.hpp" #include "snippets/utils/utils.hpp" +#include "transformations/tpp/common/op/modifiers.hpp" #include "descriptor.hpp" diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.hpp index 4ba53393336ad4..4baaade36a609b 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.hpp @@ -4,8 +4,8 @@ #pragma once -#include "modifiers.hpp" #include "openvino/op/op.hpp" +#include "transformations/tpp/common/op/modifiers.hpp" #include "descriptor.hpp" namespace ov { diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.hpp index 9542c4ec90b0b6..ea63ed21b61e56 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.hpp @@ -4,10 +4,9 @@ #pragma once -#include "modifiers.hpp" #include "eltwise.hpp" #include "snippets/op/reduce.hpp" - +#include "transformations/tpp/common/op/modifiers.hpp" #include "libxsmm_typedefs.h" diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.cpp index 566a2a5afde658..fa972cd260bb27 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.cpp @@ -3,7 +3,6 @@ // #include "scalar.hpp" -#include "modifiers.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.hpp index f9578c20fb13f5..79dd74a1fbb493 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.hpp @@ -4,9 +4,9 @@ #pragma once -#include "modifiers.hpp" #include "eltwise.hpp" #include "snippets/op/reduce.hpp" +#include "transformations/tpp/common/op/modifiers.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp index fa545c26dbb53e..d8b4a0e0275ac0 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp @@ -9,13 +9,13 @@ #include "snippets/lowered/loop_manager.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/utils/utils.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" namespace ov { namespace intel_cpu { namespace tpp { namespace pass { +namespace x64 { using namespace ov::snippets::utils; bool BrgemmTPPBlocking::SetBrgemmBeta::run(ov::snippets::lowered::LinearIR& linear_ir, @@ -49,6 +49,8 @@ ov::snippets::lowered::SpecificIterationHandlers BrgemmTPPBlocking::get_k_loop_h handlers.register_pass(); return handlers; } + +} // namespace x64 } // namespace pass } // namespace tpp } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp index 908d12087175aa..af9d57b3eaa6ad 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp @@ -5,12 +5,13 @@ #pragma once #include "snippets/lowered/pass/brgemm_blocking.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" namespace ov { namespace intel_cpu { namespace tpp { namespace pass { +namespace x64 { /** * @interface BrgemmTPPBlocking * @brief Covers BrgemmTPP with blocking loops @@ -44,6 +45,7 @@ class BrgemmTPPBlocking : public ov::snippets::lowered::pass::BrgemmBlocking& n) -> bool { + auto is_supported_op = [](const std::shared_ptr& n) -> bool { #if defined(OPENVINO_ARCH_ARM64) return (ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || @@ -1110,8 +1110,7 @@ void Transformations::MainSnippets(void) { ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - (ov::is_type(n) && ignoreCallback)); + ov::is_type(n) || ov::is_type(n)); #else // CPU Plugin support Swish in Subgraph via conversion to SwichCPU which assumes second input to be constant, // and CPU Plugin does not support Mish for x64 diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp index fc6783f3b3ca45..48ec794a081a22 100644 --- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp +++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp @@ -13,7 +13,7 @@ #include "snippets/snippets_isa.hpp" #include "transformations/snippets/x64/op/brgemm_copy_b.hpp" #include "transformations/snippets/x64/op/brgemm_cpu.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" #include "cpu/x64/cpu_isa_traits.hpp" namespace ov { @@ -33,7 +33,7 @@ SpecificIterationHandlers get_k_loop_handlers(size_t work_amount, size_t block_s switch (backend) { #ifdef SNIPPETS_LIBXSMM_TPP case BACKEND_TYPE::TPP: - handlers.register_pass(); + handlers.register_pass(); break; #endif case BACKEND_TYPE::CPU: @@ -333,7 +333,7 @@ class BrgemmTPPBlockingTest : public BrgemmBlockingTest { BrgemmTPPBlockingTest() : BrgemmBlockingTest() {} void SetUp() override { - pipeline.register_pass(); + pipeline.register_pass(); } }; diff --git a/src/plugins/intel_cpu/thirdparty/CMakeLists.txt b/src/plugins/intel_cpu/thirdparty/CMakeLists.txt index 106945914b21da..038e012902d647 100644 --- a/src/plugins/intel_cpu/thirdparty/CMakeLists.txt +++ b/src/plugins/intel_cpu/thirdparty/CMakeLists.txt @@ -154,11 +154,12 @@ function(ov_add_onednn) endif() endfunction() -if(AARCH64 OR ARM) +if(AARCH64 AND (NOT ANDROID)) set(ENABLE_SNIPPETS_LIBXSMM_TPP ON) endif() if (ENABLE_SNIPPETS_LIBXSMM_TPP) + ov_add_compiler_flags(-Wno-missing-declarations) add_subdirectory(libxsmm) ov_install_static_lib(libxsmm ${OV_CPACK_COMP_CORE}) endif()