From f3367699673b23ff6c44b7c6c7511cc676d30f17 Mon Sep 17 00:00:00 2001 From: chenhu-wang Date: Tue, 14 Jan 2025 04:03:58 +0800 Subject: [PATCH] refactor tpp on x64 and aarch64 --- src/plugins/intel_cpu/CMakeLists.txt | 8 ++- .../snippets/aarch64/cpu_generator.cpp | 9 ++- .../snippets/aarch64/cpu_generator.hpp | 2 +- .../src/emitters/snippets/brgemm_base.cpp | 2 +- .../src/emitters/snippets/brgemm_base.hpp | 3 +- .../emitters/snippets/x64/cpu_generator.cpp | 4 +- .../aarch64/jit_brgemm_emitter.cpp | 7 ++- .../aarch64/jit_brgemm_emitter.hpp | 7 ++- .../aarch64/kernel_executors/brgemm.cpp | 7 ++- .../aarch64/kernel_executors/brgemm.hpp | 0 .../src/emitters/tpp/common/utils.hpp | 36 +++++++++++ .../emitters/tpp/x64/jit_brgemm_emitter.cpp | 7 ++- .../src/emitters/tpp/x64/jit_tpp_emitter.cpp | 21 +------ .../src/emitters/tpp/x64/jit_tpp_emitter.hpp | 1 - src/plugins/intel_cpu/src/emitters/utils.hpp | 19 ------ src/plugins/intel_cpu/src/nodes/subgraph.cpp | 43 ++++++------- .../snippets/aarch64/shape_inference.cpp | 2 +- .../x64/pass/brgemm_to_brgemm_cpu.cpp | 2 +- .../snippets/x64/shape_inference.cpp | 2 +- .../pass/lowered/brgemm_tpp_blocking.cpp | 63 +++++++++++++++++++ .../pass/lowered/brgemm_tpp_blocking.hpp | 55 ++++++++++++++++ .../tpp/{x64 => common}/op/brgemm.cpp | 0 .../tpp/{x64 => common}/op/brgemm.hpp | 2 +- .../tpp/{x64 => common}/op/modifiers.hpp | 0 .../pass/brgemm_to_brgemm_tpp.cpp | 2 +- .../pass/brgemm_to_brgemm_tpp.hpp | 0 .../pass/lowered/set_tpp_leading_dim.cpp | 2 +- .../pass/lowered/set_tpp_leading_dim.hpp | 0 .../transformations/tpp/x64/op/eltwise.hpp | 2 +- .../transformations/tpp/x64/op/equation.hpp | 2 +- .../src/transformations/tpp/x64/op/reduce.hpp | 2 +- .../src/transformations/tpp/x64/op/scalar.cpp | 2 - .../src/transformations/tpp/x64/op/scalar.hpp | 2 +- .../x64/pass/lowered/brgemm_tpp_blocking.cpp | 4 +- .../x64/pass/lowered/brgemm_tpp_blocking.hpp | 4 +- .../tpp/x64/pass/scalar_to_scalar_tpp.cpp | 2 +- .../transformation_pipeline.cpp | 7 +-- .../x64/lowered/brgemm_blocking.cpp | 6 +- .../intel_cpu/thirdparty/CMakeLists.txt | 3 +- 39 files changed, 241 insertions(+), 101 deletions(-) rename src/plugins/intel_cpu/src/emitters/{snippets => tpp}/aarch64/jit_brgemm_emitter.cpp (91%) rename src/plugins/intel_cpu/src/emitters/{snippets => tpp}/aarch64/jit_brgemm_emitter.hpp (80%) rename src/plugins/intel_cpu/src/emitters/{snippets => tpp}/aarch64/kernel_executors/brgemm.cpp (97%) rename src/plugins/intel_cpu/src/emitters/{snippets => tpp}/aarch64/kernel_executors/brgemm.hpp (100%) create mode 100644 src/plugins/intel_cpu/src/emitters/tpp/common/utils.hpp create mode 100644 src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.cpp create mode 100644 src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.hpp rename src/plugins/intel_cpu/src/transformations/tpp/{x64 => common}/op/brgemm.cpp (100%) rename src/plugins/intel_cpu/src/transformations/tpp/{x64 => common}/op/brgemm.hpp (96%) rename src/plugins/intel_cpu/src/transformations/tpp/{x64 => common}/op/modifiers.hpp (100%) rename src/plugins/intel_cpu/src/transformations/tpp/{x64 => common}/pass/brgemm_to_brgemm_tpp.cpp (99%) rename src/plugins/intel_cpu/src/transformations/tpp/{x64 => common}/pass/brgemm_to_brgemm_tpp.hpp (100%) rename src/plugins/intel_cpu/src/transformations/tpp/{x64 => common}/pass/lowered/set_tpp_leading_dim.cpp (99%) rename src/plugins/intel_cpu/src/transformations/tpp/{x64 => common}/pass/lowered/set_tpp_leading_dim.hpp (100%) diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt index 190df5d9c9e371..9ba84937b261df 100644 --- a/src/plugins/intel_cpu/CMakeLists.txt +++ b/src/plugins/intel_cpu/CMakeLists.txt @@ -160,7 +160,7 @@ if(ENABLE_CPU_DEBUG_CAPS) add_definitions(-DCPU_DEBUG_CAPS) endif() -if(AARCH64 OR ARM) +if(AARCH64 AND (NOT ANDROID)) set(ENABLE_SNIPPETS_LIBXSMM_TPP ON) endif() @@ -202,7 +202,9 @@ if(NOT X86_64) ${CMAKE_CURRENT_SOURCE_DIR}/src/nodes/kernels/x64/* ${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/plugin/x64/* ${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/snippets/x64/* - ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/cpu_opset/x64/*) + ${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/tpp/x64/* + ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/cpu_opset/x64/* + ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/tpp/x64/*) endif() if (AARCH64) @@ -212,7 +214,9 @@ endif() if(NOT (AARCH64 OR ARM)) list(APPEND EXCLUDE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/cpu_opset/arm/* + ${CMAKE_CURRENT_SOURCE_DIR}/src/transformations/tpp/aarch64/* ${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/plugin/aarch64/* + ${CMAKE_CURRENT_SOURCE_DIR}/src/emitters/tpp/aarch64/* ${CMAKE_CURRENT_SOURCE_DIR}/src/nodes/executors/aarch64/* ${CMAKE_CURRENT_SOURCE_DIR}/src/nodes/kernels/aarch64/*) endif() diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp index 410f4a36f5e51f..c6b66a8cd7f215 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp @@ -6,7 +6,6 @@ #include "emitters/plugin/aarch64/jit_conversion_emitters.hpp" #include "emitters/plugin/aarch64/jit_eltwise_emitters.hpp" -#include "emitters/snippets/aarch64/jit_brgemm_emitter.hpp" #include "emitters/snippets/aarch64/jit_fill_emitter.hpp" #include "emitters/snippets/aarch64/jit_kernel_emitter.hpp" #include "emitters/snippets/aarch64/jit_loop_emitters.hpp" @@ -25,7 +24,11 @@ #include "snippets/snippets_isa.hpp" #include "transformations/cpu_opset/common/op/swish_cpu.hpp" #include "transformations/snippets/common/op/fused_mul_add.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" + +#ifdef SNIPPETS_LIBXSMM_TPP +# include "emitters/tpp/aarch64/jit_brgemm_emitter.hpp" +# include "transformations/tpp/common/op/brgemm.hpp" +#endif namespace ov { @@ -205,9 +208,11 @@ CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, jitters[ov::intel_cpu::SwishNode::get_type_info_static()] = CREATE_CPU_EMITTER(jit_swish_emitter); jitters[ov::op::v0::Tanh::get_type_info_static()] = CREATE_CPU_EMITTER(jit_tanh_emitter); +#ifdef SNIPPETS_LIBXSMM_TPP // brgemm jitters[ov::intel_cpu::tpp::op::BrgemmTPP::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_brgemm_emitter, configurator->get_kernel_executor_table(), compiled_kernel_cache); +#endif // control flow jitters[snippets::op::KernelStatic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_kernel_static_emitter); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.hpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.hpp index 0b539df2cf365c..34881742f1812e 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024-2025 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // diff --git a/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.cpp b/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.cpp index 2c2c7d50d3ef6b..9bc99888463f24 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.cpp @@ -262,6 +262,7 @@ void BrgemmBaseKernelExecutor::update_config(const ov::snippets::lowered::Expres // In case of data repacking LDB is chosen in accordance with repacking buffer size if (with_repacking(brgemm_node->get_type())) LDB = DIM_CAST(brgemm_utils::repacking::compute_LDB(LDB, brgemm_node->get_input_element_type(1))); + config.update(DIM_CAST(M), DIM_CAST(N), DIM_CAST(K), LDA, LDB, LDC, beta); } @@ -327,7 +328,6 @@ void BrgemmBaseKernelExecutor::execute_brgemm_kernel( brgemm_p.do_post_ops = with_comp; brgemm_p.do_apply_comp = with_comp; brgemm_p.skip_accm = 0; - brgemm_p.BS = 1; // default value OV_CPU_JIT_EMITTER_ASSERT(kernel, "has nullptr Brgemm kernel"); (*kernel)(&brgemm_p); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.hpp b/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.hpp index 674ea42522230b..a0a55e58df75b7 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.hpp @@ -7,12 +7,13 @@ #include #include "cpu/x64/cpu_isa_traits.hpp" -#include "emitters/plugin/x64/jit_emitter.hpp" #include "emitters/snippets/cpu_kernel_executor_table.hpp" #include "emitters/snippets/jit_snippets_call_args.hpp" +#include "emitters/utils.hpp" #include "openvino/core/type/element_type.hpp" #include "snippets/lowered/loop_info.hpp" #include "snippets/lowered/loop_manager.hpp" +#include "utils/general_utils.h" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp index 5e4a8992aa7165..b04f93a73aca77 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp @@ -44,10 +44,10 @@ # include "emitters/tpp/x64/jit_eltwise_emitters.hpp" # include "emitters/tpp/x64/jit_equation_emitter.hpp" # include "emitters/tpp/x64/jit_scalar_emitter.hpp" -# include "transformations/tpp/x64/op/brgemm.hpp" +# include "transformations/tpp/common/op/brgemm.hpp" +# include "transformations/tpp/common/op/modifiers.hpp" # include "transformations/tpp/x64/op/eltwise.hpp" # include "transformations/tpp/x64/op/equation.hpp" -# include "transformations/tpp/x64/op/modifiers.hpp" # include "transformations/tpp/x64/op/reduce.hpp" # include "transformations/tpp/x64/op/scalar.hpp" // Note: for reference implementations diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.cpp b/src/plugins/intel_cpu/src/emitters/tpp/aarch64/jit_brgemm_emitter.cpp similarity index 91% rename from src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.cpp rename to src/plugins/intel_cpu/src/emitters/tpp/aarch64/jit_brgemm_emitter.cpp index 5d705a755ca492..ac57ebbad42ab7 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/aarch64/jit_brgemm_emitter.cpp @@ -5,7 +5,7 @@ #include "jit_brgemm_emitter.hpp" #include "snippets/utils/utils.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" using namespace Xbyak_aarch64; @@ -42,7 +42,10 @@ void jit_brgemm_emitter::validate_arguments(const std::vector& in, const OV_CPU_JIT_EMITTER_ASSERT(out.size() == 1, "Expects 1 output reg, got" + std::to_string(out.size())); } -void jit_brgemm_emitter::emit_code(const std::vector& in, const std::vector& out) const { +void jit_brgemm_emitter::emit_code(const std::vector& in, + const std::vector& out, + const std::vector& pool_vec_idxs, + const std::vector& pool_gpr_idxs) const { validate_arguments(in, out); emit_impl(in, out); } diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.hpp b/src/plugins/intel_cpu/src/emitters/tpp/aarch64/jit_brgemm_emitter.hpp similarity index 80% rename from src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.hpp rename to src/plugins/intel_cpu/src/emitters/tpp/aarch64/jit_brgemm_emitter.hpp index 83e46631ac8030..d98e97800e4b6e 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.hpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/aarch64/jit_brgemm_emitter.hpp @@ -5,7 +5,7 @@ #pragma once #include "emitters/plugin/aarch64/jit_emitter.hpp" -#include "emitters/snippets/aarch64/kernel_executors/brgemm.hpp" +#include "emitters/tpp/aarch64/kernel_executors/brgemm.hpp" namespace ov { namespace intel_cpu { @@ -26,7 +26,10 @@ class jit_brgemm_emitter : public jit_emitter { static std::set> get_supported_precisions( const std::shared_ptr& node = nullptr); - void emit_code(const std::vector& in, const std::vector& out) const; + void emit_code(const std::vector& in_idxs, + const std::vector& out_idxs, + const std::vector& pool_vec_idxs = {}, + const std::vector& pool_gpr_idxs = {}) const override; private: void validate_arguments(const std::vector& in, const std::vector& out) const override; diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.cpp b/src/plugins/intel_cpu/src/emitters/tpp/aarch64/kernel_executors/brgemm.cpp similarity index 97% rename from src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.cpp rename to src/plugins/intel_cpu/src/emitters/tpp/aarch64/kernel_executors/brgemm.cpp index 05d8fc4c5939ff..ae3a77022f3a31 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.cpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/aarch64/kernel_executors/brgemm.cpp @@ -4,7 +4,8 @@ #include "brgemm.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" +#include "emitters/tpp/common/utils.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" using namespace Xbyak; using namespace dnnl::impl; @@ -34,8 +35,8 @@ BrgemmKernelConfig::StaticParams::StaticParams(const element::Type& in0_dtype, const element::Type& in1_dtype, dnnl::impl::cpu::aarch64::cpu_isa_t primitive_isa) : StaticBaseParams(in0_dtype, in1_dtype, dnnl::impl::cpu::x64::cpu_isa_t::isa_undef, compute_hash(primitive_isa)) { - m_type_in0 = ov_to_xsmm_dtype(in0_dtype); - m_type_in1 = ov_to_xsmm_dtype(in1_dtype); + m_type_in0 = tpp::ov_to_xsmm_dtype(in0_dtype); + m_type_in1 = tpp::ov_to_xsmm_dtype(in1_dtype); m_type_exec = LIBXSMM_DATATYPE_F32; m_type_out0 = LIBXSMM_DATATYPE_F32; m_compile_flags = LIBXSMM_GEMM_FLAGS('N', 'N'); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.hpp b/src/plugins/intel_cpu/src/emitters/tpp/aarch64/kernel_executors/brgemm.hpp similarity index 100% rename from src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.hpp rename to src/plugins/intel_cpu/src/emitters/tpp/aarch64/kernel_executors/brgemm.hpp diff --git a/src/plugins/intel_cpu/src/emitters/tpp/common/utils.hpp b/src/plugins/intel_cpu/src/emitters/tpp/common/utils.hpp new file mode 100644 index 00000000000000..f4778a0b7553f3 --- /dev/null +++ b/src/plugins/intel_cpu/src/emitters/tpp/common/utils.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "emitters/utils.hpp" +#include "libxsmm.h" + +namespace ov { +namespace intel_cpu { +namespace tpp { + +inline libxsmm_datatype ov_to_xsmm_dtype(ov::element::Type_t elemet_type) { + switch (elemet_type) { + case ov::element::Type_t::f32: + return LIBXSMM_DATATYPE_F32; + case ov::element::Type_t::bf16: + return LIBXSMM_DATATYPE_BF16; + case ov::element::Type_t::f16: + return LIBXSMM_DATATYPE_F16; + case ov::element::Type_t::i8: + return LIBXSMM_DATATYPE_I8; + case ov::element::Type_t::u8: + return LIBXSMM_DATATYPE_U8; + default: + OV_CPU_JIT_EMITTER_THROW("Attempt to convert unsupported ov data type"); + return LIBXSMM_DATATYPE_IMPLICIT; + } +} + +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_brgemm_emitter.cpp b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_brgemm_emitter.cpp index e873d7f7aa98eb..3e99b4c1db5449 100644 --- a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_brgemm_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_brgemm_emitter.cpp @@ -5,7 +5,8 @@ #include "jit_brgemm_emitter.hpp" #include "emitters/snippets/x64/jit_snippets_emitters.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" +#include "emitters/tpp/common/utils.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" using jit_generator = dnnl::impl::cpu::x64::jit_generator; using cpu_isa_t = dnnl::impl::cpu::x64::cpu_isa_t; @@ -33,8 +34,8 @@ BrgemmTppEmitter::BrgemmTppEmitter(jit_generator* h, cpu_isa_t isa, const Expres brgemm_node->get_input_stride(1), brgemm_node->get_output_stride(0)}; - auto in_0_prec = ov_to_xsmm_dtype(brgemm_node->get_input_element_type(0)); - auto in_1_prec = ov_to_xsmm_dtype(brgemm_node->get_input_element_type(1)); + auto in_0_prec = tpp::ov_to_xsmm_dtype(brgemm_node->get_input_element_type(0)); + auto in_1_prec = tpp::ov_to_xsmm_dtype(brgemm_node->get_input_element_type(1)); exec_dtype = in_0_prec == LIBXSMM_DATATYPE_I8 || in_0_prec == LIBXSMM_DATATYPE_U8 ? LIBXSMM_DATATYPE_I32 : LIBXSMM_DATATYPE_F32; auto out_0_prec = exec_dtype == LIBXSMM_DATATYPE_I32 ? LIBXSMM_DATATYPE_I32 : LIBXSMM_DATATYPE_F32; diff --git a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp index a18b1616bb517c..61c96820dca052 100644 --- a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp @@ -5,6 +5,7 @@ #include "jit_tpp_emitter.hpp" #include "emitters/plugin/x64/utils.hpp" +#include "emitters/tpp/common/utils.hpp" #include "snippets/lowered/port_descriptor.hpp" #include "transformations/tpp/x64/op/eltwise.hpp" @@ -56,7 +57,7 @@ TppEmitter::TppEmitter(dnnl::impl::cpu::x64::jit_generator* h, }; for (size_t i = 0; i < num_ins; i++) { - io_dtypes[i] = ov_to_xsmm_dtype(node->get_input_element_type(i)); + io_dtypes[i] = tpp::ov_to_xsmm_dtype(node->get_input_element_type(i)); io_offsets[i] = tpp_mod->get_input_offset(i); io_strides[i] = replace_full_dim(tpp_mod->get_input_stride(i), expr->get_input_port_descriptor(i)->get_shape().back()); @@ -65,7 +66,7 @@ TppEmitter::TppEmitter(dnnl::impl::cpu::x64::jit_generator* h, for (size_t i = 0; i < num_outs; i++) { const auto i_off = i + num_ins; - io_dtypes[i_off] = ov_to_xsmm_dtype(node->get_output_element_type(i)); + io_dtypes[i_off] = tpp::ov_to_xsmm_dtype(node->get_output_element_type(i)); io_offsets[i_off] = tpp_mod->get_output_offset(i); io_strides[i_off] = replace_full_dim(tpp_mod->get_output_stride(i), expr->get_output_port_descriptor(i)->get_shape().back()); @@ -121,21 +122,5 @@ void TppEmitter::emit_impl(const std::vector& in, const std::vector& in, const std::vector& out) const; - static libxsmm_datatype ov_to_xsmm_dtype(ov::element::Type_t elemet_type); protected: void emit_impl(const std::vector& in, const std::vector& out) const override; diff --git a/src/plugins/intel_cpu/src/emitters/utils.hpp b/src/plugins/intel_cpu/src/emitters/utils.hpp index 7d4d5318062427..92df781a1fc318 100644 --- a/src/plugins/intel_cpu/src/emitters/utils.hpp +++ b/src/plugins/intel_cpu/src/emitters/utils.hpp @@ -6,7 +6,6 @@ #include -#include "libxsmm.h" #include "openvino/core/except.hpp" #include "openvino/core/type/element_type.hpp" @@ -24,23 +23,5 @@ std::string jit_emitter_pretty_name(const std::string& pretty_func); #define OV_CPU_JIT_EMITTER_THROW(...) OPENVINO_THROW(OV_CPU_JIT_EMITTER_NAME, ": ", __VA_ARGS__) #define OV_CPU_JIT_EMITTER_ASSERT(cond, ...) OPENVINO_ASSERT((cond), OV_CPU_JIT_EMITTER_NAME, ": ", __VA_ARGS__) -inline libxsmm_datatype ov_to_xsmm_dtype(ov::element::Type_t elemet_type) { - switch (elemet_type) { - case ov::element::Type_t::f32: - return LIBXSMM_DATATYPE_F32; - case ov::element::Type_t::bf16: - return LIBXSMM_DATATYPE_BF16; - case ov::element::Type_t::f16: - return LIBXSMM_DATATYPE_F16; - case ov::element::Type_t::i8: - return LIBXSMM_DATATYPE_I8; - case ov::element::Type_t::u8: - return LIBXSMM_DATATYPE_U8; - default: - OV_CPU_JIT_EMITTER_THROW("Attempt to convert unsupported ov data type"); - return LIBXSMM_DATATYPE_IMPLICIT; - } -} - } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index 3c5dba585f31ad..e4a080e2e94fa4 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -52,12 +52,16 @@ #ifdef SNIPPETS_LIBXSMM_TPP # include "snippets/lowered/pass/optimize_domain.hpp" -# include "transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.hpp" -# include "transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.hpp" -# include "transformations/tpp/x64/pass/fuse_tpp_to_equations.hpp" -# include "transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp" -# include "transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.hpp" -# include "transformations/tpp/x64/pass/scalar_to_scalar_tpp.hpp" +# include "transformations/tpp/common/pass/brgemm_to_brgemm_tpp.hpp" +# include "transformations/tpp/common/pass/lowered/set_tpp_leading_dim.hpp" +# if defined(OPENVINO_ARCH_ARM64) +# include "transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.hpp" +# else +# include "transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.hpp" +# include "transformations/tpp/x64/pass/fuse_tpp_to_equations.hpp" +# include "transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp" +# include "transformations/tpp/x64/pass/scalar_to_scalar_tpp.hpp" +# endif #endif namespace ov { @@ -497,9 +501,6 @@ Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() { ov::intel_cpu::pass::EliminateBrgemmCopyB); SNIPPETS_REGISTER_PASS_ABSOLUTE_X86_64(Place::PipelineEnd, ov::intel_cpu::pass::RemoveConverts); SNIPPETS_REGISTER_PASS_ABSOLUTE_COMMON(Place::PipelineEnd, ov::intel_cpu::pass::MulAddToFMA); - SNIPPETS_REGISTER_PASS_RELATIVE_ARM64(Place::Before, - ov::snippets::pass::PropagatePrecision, - ov::intel_cpu::tpp::pass::BrgemmToBrgemmTPP); #ifdef SNIPPETS_LIBXSMM_TPP SNIPPETS_REGISTER_PASS_RELATIVE_X86_64(Place::Before, @@ -513,6 +514,9 @@ Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() { SNIPPETS_REGISTER_PASS_RELATIVE_X86_64(Place::After, ov::intel_cpu::tpp::pass::EltwiseToEltwiseTPP, ov::intel_cpu::tpp::pass::FuseTPPToEquations); + SNIPPETS_REGISTER_PASS_RELATIVE_ARM64(Place::Before, + ov::snippets::pass::PropagatePrecision, + ov::intel_cpu::tpp::pass::BrgemmToBrgemmTPP); #endif #undef SNIPPETS_REGISTER_PASS_ABSOLUTE_COMMON @@ -527,11 +531,10 @@ Subgraph::DataFlowPasses Subgraph::getDataFlowPasses() { Subgraph::ControlFlowPasses Subgraph::getControlFlowPasses() const { ControlFlowPasses backend_passes; +#if defined(OPENVINO_ARCH_X86_64) || (defined(OPENVINO_ARCH_ARM64) && defined(SNIPPETS_LIBXSMM_TPP)) using PassPosition = ov::snippets::pass::PassPosition; using Place = PassPosition::Place; -#define SNIPPETS_REGISTER_PASS_RELATIVE_COMMON(PASS_PLACE, TARGET_PASS, PASS, ...) \ - backend_passes.emplace_back(PassPosition(PASS_PLACE, TARGET_PASS::get_type_info_static()), \ - std::make_shared(__VA_ARGS__)) +#endif #if defined(OPENVINO_ARCH_X86_64) # define SNIPPETS_REGISTER_PASS_RELATIVE_X86_64(PASS_PLACE, TARGET_PASS, PASS, ...) \ @@ -564,23 +567,21 @@ Subgraph::ControlFlowPasses Subgraph::getControlFlowPasses() const { ov::snippets::lowered::pass::InsertBuffers, ov::intel_cpu::pass::InsertBrgemmCopyBuffers); - SNIPPETS_REGISTER_PASS_RELATIVE_ARM64(Place::After, - ov::snippets::lowered::pass::MarkLoops, - ov::intel_cpu::tpp::pass::BrgemmTPPBlocking); - SNIPPETS_REGISTER_PASS_RELATIVE_ARM64(Place::After, - ov::snippets::lowered::pass::InsertLoops, - ov::intel_cpu::tpp::pass::SetTPPLeadingDim); - #ifdef SNIPPETS_LIBXSMM_TPP SNIPPETS_REGISTER_PASS_RELATIVE_X86_64(Place::Before, ov::intel_cpu::pass::BrgemmCPUBlocking, - ov::intel_cpu::tpp::pass::BrgemmTPPBlocking); + ov::intel_cpu::tpp::pass::x64::BrgemmTPPBlocking); SNIPPETS_REGISTER_PASS_RELATIVE_X86_64(Place::After, ov::intel_cpu::pass::FuseLoadStoreConvert, ov::intel_cpu::tpp::pass::SetTPPLeadingDim); + SNIPPETS_REGISTER_PASS_RELATIVE_ARM64(Place::After, + ov::snippets::lowered::pass::MarkLoops, + ov::intel_cpu::tpp::pass::aarch64::BrgemmTPPBlocking); + SNIPPETS_REGISTER_PASS_RELATIVE_ARM64(Place::After, + ov::snippets::lowered::pass::InsertLoops, + ov::intel_cpu::tpp::pass::SetTPPLeadingDim); #endif -#undef SNIPPETS_REGISTER_PASS_RELATIVE_COMMON #undef SNIPPETS_REGISTER_PASS_RELATIVE_X86_64 #undef SNIPPETS_REGISTER_PASS_RELATIVE_ARM64 return backend_passes; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/aarch64/shape_inference.cpp b/src/plugins/intel_cpu/src/transformations/snippets/aarch64/shape_inference.cpp index ab07227c328906..041dedb06f8896 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/aarch64/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/aarch64/shape_inference.cpp @@ -7,7 +7,7 @@ #include "snippets/shape_inference/shape_infer_instances.hpp" #include "transformations/cpu_opset/common/op/swish_cpu.hpp" #include "transformations/snippets/common/op/fused_mul_add.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" namespace ov { namespace snippets { diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp index e0a87ca288bac1..f6bf4d29818a88 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp @@ -16,7 +16,7 @@ #include "transformations/snippets/x64/op/brgemm_copy_b.hpp" #include "transformations/snippets/x64/op/brgemm_cpu.hpp" #include "transformations/snippets/x64/op/brgemm_utils.hpp" -#include "transformations/tpp/x64/op/modifiers.hpp" +#include "transformations/tpp/common/op/modifiers.hpp" #include "utils/general_utils.h" namespace ov { diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/shape_inference.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/shape_inference.cpp index 50a2399e93ecc4..14d72c3d3d0969 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/shape_inference.cpp @@ -14,7 +14,7 @@ #include "transformations/cpu_opset/common/op/swish_cpu.hpp" #include "transformations/snippets/common/op/fused_mul_add.hpp" #ifdef SNIPPETS_LIBXSMM_TPP -# include "transformations/tpp/x64/op/brgemm.hpp" +# include "transformations/tpp/common/op/brgemm.hpp" # include "transformations/tpp/x64/op/equation.hpp" # include "transformations/tpp/x64/op/reduce.hpp" # include "transformations/tpp/x64/op/scalar.hpp" diff --git a/src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.cpp b/src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.cpp new file mode 100644 index 00000000000000..70fa593f0e4a92 --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.cpp @@ -0,0 +1,63 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "brgemm_tpp_blocking.hpp" + +#include "snippets/itt.hpp" +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_manager.hpp" +#include "snippets/snippets_isa.hpp" +#include "snippets/utils/utils.hpp" + +namespace ov { +namespace intel_cpu { +namespace tpp { +namespace pass { +namespace aarch64 { +using namespace ov::snippets::utils; + +bool BrgemmTPPBlocking::SetBrgemmBeta::run(ov::snippets::lowered::LinearIR& linear_ir, + ov::snippets::lowered::LinearIR::constExprIt begin, + ov::snippets::lowered::LinearIR::constExprIt end) { + for (auto expr_it = begin; expr_it != end; ++expr_it) { + if (const auto brgemm = ov::as_type_ptr(expr_it->get()->get_node())) + brgemm->set_beta(0); + } + return true; +} + +std::shared_ptr BrgemmTPPBlocking::SetBrgemmBeta::merge( + const std::shared_ptr& other) { + return !other || ov::is_type(other) ? std::make_shared() : nullptr; +} + +std::tuple BrgemmTPPBlocking::get_blocking_params( + const ov::snippets::lowered::ExpressionPtr& brgemm_expr) const { + size_t m, n, k; + std::tie(m, n, k) = get_brgemm_dimensions(brgemm_expr); + OPENVINO_ASSERT(!is_dynamic_value(m) && !is_dynamic_value(n) && !is_dynamic_value(n), + "BrgemmTPP doesn't support dynamic shapes"); + + size_t m_blk, n_blk, k_blk; + std::tie(m_blk, n_blk, k_blk) = BrgemmBlockingBase::get_blocking_params(brgemm_expr); + + auto get_projected_blk = [](const size_t dim, const size_t blk) { + return ov::snippets::utils::is_full_dim_value(blk) ? dim : blk; + }; + return std::make_tuple(get_projected_blk(m, m_blk), get_projected_blk(n, n_blk), get_projected_blk(k, k_blk)); +} + +ov::snippets::lowered::SpecificIterationHandlers BrgemmTPPBlocking::get_k_loop_handlers(size_t work_amount, + size_t block_size) const { + ov::snippets::lowered::SpecificIterationHandlers handlers = + ov::snippets::lowered::pass::BrgemmBlockingBase::get_k_loop_handlers(work_amount, block_size); + handlers.register_pass(); + return handlers; +} + +} // namespace aarch64 +} // namespace pass +} // namespace tpp +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.hpp b/src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.hpp new file mode 100644 index 00000000000000..5df6f198e34533 --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/tpp/aarch64/pass/lowered/brgemm_tpp_blocking.hpp @@ -0,0 +1,55 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "snippets/lowered/pass/brgemm_blocking.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" + +namespace ov { +namespace intel_cpu { +namespace tpp { +namespace pass { +namespace aarch64 { +/** + * @interface BrgemmTPPBlocking + * @brief Covers BrgemmTPP with blocking loops + * @ingroup snippets + */ + +class BrgemmTPPBlocking : public ov::snippets::lowered::pass::BrgemmBlocking { +public: + OPENVINO_RTTI("BrgemmTPPBlocking", + "tpp::op::BrgemmTPP", + snippets::lowered::pass::BrgemmBlocking); + + /** + * @interface SetBrgemmBeta + * @brief The pass set `beta = 0` to BrgemmTPP. + * Note: the pass is in public section to have opportunity to validate blocking loop in tests + * @ingroup snippets + */ + class SetBrgemmBeta : public snippets::lowered::pass::RangedPass { + public: + OPENVINO_RTTI("SetBrgemmBeta", "0", snippets::lowered::pass::RangedPass); + SetBrgemmBeta() = default; + bool run(ov::snippets::lowered::LinearIR& linear_ir, + ov::snippets::lowered::LinearIR::constExprIt begin, + ov::snippets::lowered::LinearIR::constExprIt end) override; + std::shared_ptr merge( + const std::shared_ptr& other) override; + }; + +private: + std::tuple get_blocking_params( + const ov::snippets::lowered::ExpressionPtr& brgemm_expr) const override; + ov::snippets::lowered::SpecificIterationHandlers get_k_loop_handlers(size_t work_amount, + size_t block_size) const override; +}; + +} // namespace aarch64 +} // namespace pass +} // namespace tpp +} // namespace intel_cpu +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.cpp b/src/plugins/intel_cpu/src/transformations/tpp/common/op/brgemm.cpp similarity index 100% rename from src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.cpp rename to src/plugins/intel_cpu/src/transformations/tpp/common/op/brgemm.cpp diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.hpp b/src/plugins/intel_cpu/src/transformations/tpp/common/op/brgemm.hpp similarity index 96% rename from src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.hpp rename to src/plugins/intel_cpu/src/transformations/tpp/common/op/brgemm.hpp index cda7f58afebea8..9c450ec93b96ba 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/brgemm.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/common/op/brgemm.hpp @@ -5,7 +5,7 @@ #pragma once #include "modifiers.hpp" -#include "transformations/snippets/x64/op/brgemm_cpu.hpp" +#include "snippets/op/brgemm.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/modifiers.hpp b/src/plugins/intel_cpu/src/transformations/tpp/common/op/modifiers.hpp similarity index 100% rename from src/plugins/intel_cpu/src/transformations/tpp/x64/op/modifiers.hpp rename to src/plugins/intel_cpu/src/transformations/tpp/common/op/modifiers.hpp diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.cpp b/src/plugins/intel_cpu/src/transformations/tpp/common/pass/brgemm_to_brgemm_tpp.cpp similarity index 99% rename from src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.cpp rename to src/plugins/intel_cpu/src/transformations/tpp/common/pass/brgemm_to_brgemm_tpp.cpp index c042373f054fa2..03cc43dbe82ab1 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/common/pass/brgemm_to_brgemm_tpp.cpp @@ -11,7 +11,7 @@ #include "snippets/itt.hpp" #include "snippets/op/brgemm.hpp" #include "snippets/utils/utils.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" #include "utils/general_utils.h" namespace ov { diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.hpp b/src/plugins/intel_cpu/src/transformations/tpp/common/pass/brgemm_to_brgemm_tpp.hpp similarity index 100% rename from src/plugins/intel_cpu/src/transformations/tpp/x64/pass/brgemm_to_brgemm_tpp.hpp rename to src/plugins/intel_cpu/src/transformations/tpp/common/pass/brgemm_to_brgemm_tpp.hpp diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp b/src/plugins/intel_cpu/src/transformations/tpp/common/pass/lowered/set_tpp_leading_dim.cpp similarity index 99% rename from src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp rename to src/plugins/intel_cpu/src/transformations/tpp/common/pass/lowered/set_tpp_leading_dim.cpp index c1b981275face0..7720e0b142d45f 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/common/pass/lowered/set_tpp_leading_dim.cpp @@ -9,7 +9,7 @@ #include "snippets/op/brgemm.hpp" #include "snippets/op/buffer.hpp" #include "snippets/utils/utils.hpp" -#include "transformations/tpp/x64/op/modifiers.hpp" +#include "transformations/tpp/common/op/modifiers.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.hpp b/src/plugins/intel_cpu/src/transformations/tpp/common/pass/lowered/set_tpp_leading_dim.hpp similarity index 100% rename from src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.hpp rename to src/plugins/intel_cpu/src/transformations/tpp/common/pass/lowered/set_tpp_leading_dim.hpp diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.hpp index 7338450ff8257d..0e0a38c0c6161c 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/eltwise.hpp @@ -5,7 +5,6 @@ #pragma once #include "descriptor.hpp" -#include "modifiers.hpp" #include "openvino/op/add.hpp" #include "openvino/op/divide.hpp" #include "openvino/op/exp.hpp" @@ -14,6 +13,7 @@ #include "openvino/op/subtract.hpp" #include "snippets/op/powerstatic.hpp" #include "snippets/utils/utils.hpp" +#include "transformations/tpp/common/op/modifiers.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.hpp index bf16f149b415de..0df8ad2eb776f0 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/equation.hpp @@ -5,8 +5,8 @@ #pragma once #include "descriptor.hpp" -#include "modifiers.hpp" #include "openvino/op/op.hpp" +#include "transformations/tpp/common/op/modifiers.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.hpp index 07ed321abc7ff5..a8cf39cef7bda5 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/reduce.hpp @@ -6,8 +6,8 @@ #include "eltwise.hpp" #include "libxsmm_typedefs.h" -#include "modifiers.hpp" #include "snippets/op/reduce.hpp" +#include "transformations/tpp/common/op/modifiers.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.cpp index 5855481efd1d60..f43d65d15180d3 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.cpp @@ -4,8 +4,6 @@ #include "scalar.hpp" -#include "modifiers.hpp" - namespace ov { namespace intel_cpu { namespace tpp { diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.hpp index 9807dbfafa31d0..f836e01d32554a 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/op/scalar.hpp @@ -5,8 +5,8 @@ #pragma once #include "eltwise.hpp" -#include "modifiers.hpp" #include "snippets/op/reduce.hpp" +#include "transformations/tpp/common/op/modifiers.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp index d9485b1c6b7b9d..ef1911a3d16f63 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.cpp @@ -9,12 +9,12 @@ #include "snippets/lowered/loop_manager.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/utils/utils.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" namespace ov { namespace intel_cpu { namespace tpp { namespace pass { +namespace x64 { using namespace ov::snippets::utils; bool BrgemmTPPBlocking::SetBrgemmBeta::run(ov::snippets::lowered::LinearIR& linear_ir, @@ -55,6 +55,8 @@ ov::snippets::lowered::SpecificIterationHandlers BrgemmTPPBlocking::get_k_loop_h handlers.register_pass(); return handlers; } + +} // namespace x64 } // namespace pass } // namespace tpp } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp index 31f4bfeadc8979..42fda2cb8528ce 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/brgemm_tpp_blocking.hpp @@ -5,12 +5,13 @@ #pragma once #include "snippets/lowered/pass/brgemm_blocking.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" namespace ov { namespace intel_cpu { namespace tpp { namespace pass { +namespace x64 { /** * @interface BrgemmTPPBlocking * @brief Covers BrgemmTPP with blocking loops @@ -47,6 +48,7 @@ class BrgemmTPPBlocking : public ov::snippets::lowered::pass::BrgemmBlocking& n) -> bool { + auto is_supported_op = [](const std::shared_ptr& n) -> bool { #if defined(OPENVINO_ARCH_ARM64) return (ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || @@ -1110,8 +1110,7 @@ void Transformations::MainSnippets(void) { ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || - ov::is_type(n) || ov::is_type(n) || - (ov::is_type(n) && ignoreCallback)); + ov::is_type(n) || ov::is_type(n)); #else // CPU Plugin support Swish in Subgraph via conversion to SwichCPU which assumes second input to be constant, // and CPU Plugin does not support Mish for x64 diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp index fc6783f3b3ca45..48ec794a081a22 100644 --- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp +++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp @@ -13,7 +13,7 @@ #include "snippets/snippets_isa.hpp" #include "transformations/snippets/x64/op/brgemm_copy_b.hpp" #include "transformations/snippets/x64/op/brgemm_cpu.hpp" -#include "transformations/tpp/x64/op/brgemm.hpp" +#include "transformations/tpp/common/op/brgemm.hpp" #include "cpu/x64/cpu_isa_traits.hpp" namespace ov { @@ -33,7 +33,7 @@ SpecificIterationHandlers get_k_loop_handlers(size_t work_amount, size_t block_s switch (backend) { #ifdef SNIPPETS_LIBXSMM_TPP case BACKEND_TYPE::TPP: - handlers.register_pass(); + handlers.register_pass(); break; #endif case BACKEND_TYPE::CPU: @@ -333,7 +333,7 @@ class BrgemmTPPBlockingTest : public BrgemmBlockingTest { BrgemmTPPBlockingTest() : BrgemmBlockingTest() {} void SetUp() override { - pipeline.register_pass(); + pipeline.register_pass(); } }; diff --git a/src/plugins/intel_cpu/thirdparty/CMakeLists.txt b/src/plugins/intel_cpu/thirdparty/CMakeLists.txt index 106945914b21da..038e012902d647 100644 --- a/src/plugins/intel_cpu/thirdparty/CMakeLists.txt +++ b/src/plugins/intel_cpu/thirdparty/CMakeLists.txt @@ -154,11 +154,12 @@ function(ov_add_onednn) endif() endfunction() -if(AARCH64 OR ARM) +if(AARCH64 AND (NOT ANDROID)) set(ENABLE_SNIPPETS_LIBXSMM_TPP ON) endif() if (ENABLE_SNIPPETS_LIBXSMM_TPP) + ov_add_compiler_flags(-Wno-missing-declarations) add_subdirectory(libxsmm) ov_install_static_lib(libxsmm ${OV_CPACK_COMP_CORE}) endif()