From 520cf299627e2db450385076b6d81d5ae5c8f936 Mon Sep 17 00:00:00 2001 From: chenhuwa Date: Tue, 26 Dec 2023 14:52:21 +0800 Subject: [PATCH] encapsulate segfault detector related --- .../snippets/include/snippets/emitter.hpp | 4 - .../include/snippets/target_machine.hpp | 5 +- src/common/snippets/src/generator.cpp | 5 - src/common/snippets/src/lowered/linear_ir.cpp | 10 +- src/plugins/intel_cpu/src/emitters/utils.cpp | 74 ++++++ src/plugins/intel_cpu/src/emitters/utils.hpp | 7 + .../src/emitters/x64/cpu_generator.cpp | 3 +- .../src/emitters/x64/cpu_generator.hpp | 3 + .../emitters/x64/jit_conversion_emitters.cpp | 14 -- .../emitters/x64/jit_conversion_emitters.hpp | 6 - .../src/emitters/x64/jit_dnnl_emitters.cpp | 11 - .../src/emitters/x64/jit_dnnl_emitters.hpp | 3 - .../src/emitters/x64/jit_eltwise_emitters.cpp | 15 +- .../src/emitters/x64/jit_eltwise_emitters.hpp | 7 +- .../src/emitters/x64/jit_emitter.cpp | 31 +-- .../src/emitters/x64/jit_emitter.hpp | 45 +--- .../x64/jit_segfault_detector_emitter.cpp | 82 ++++++ .../x64/jit_segfault_detector_emitter.hpp | 55 ++++ .../emitters/x64/jit_snippets_emitters.cpp | 236 +++++------------- .../emitters/x64/jit_snippets_emitters.hpp | 70 +----- src/plugins/intel_cpu/src/nodes/subgraph.cpp | 12 +- src/plugins/intel_cpu/src/nodes/subgraph.h | 1 + 22 files changed, 307 insertions(+), 392 deletions(-) create mode 100644 src/plugins/intel_cpu/src/emitters/x64/jit_segfault_detector_emitter.cpp create mode 100644 src/plugins/intel_cpu/src/emitters/x64/jit_segfault_detector_emitter.hpp diff --git a/src/common/snippets/include/snippets/emitter.hpp b/src/common/snippets/include/snippets/emitter.hpp index 496ad627798ba0..a2aa4923c2eef4 100644 --- a/src/common/snippets/include/snippets/emitter.hpp +++ b/src/common/snippets/include/snippets/emitter.hpp @@ -46,10 +46,6 @@ class Emitter { virtual void emit_data() const {} virtual ~Emitter() = default; - -#ifdef SNIPPETS_DEBUG_CAPS - virtual void set_custom_segfault_detector(const bool is_enable) {} -#endif }; } // namespace snippets diff --git a/src/common/snippets/include/snippets/target_machine.hpp b/src/common/snippets/include/snippets/target_machine.hpp index e2dc5f970b6ab1..d42779bcd7153c 100644 --- a/src/common/snippets/include/snippets/target_machine.hpp +++ b/src/common/snippets/include/snippets/target_machine.hpp @@ -50,6 +50,7 @@ class TargetMachine { */ virtual size_t get_lanes() const = 0; + /** * @brief called by generator to all the emitter for a target machine * @return a map by node's type info with callbacks to create an instance of emitter for corresponding operation type @@ -64,10 +65,6 @@ class TargetMachine { bool has(const ov::DiscreteTypeInfo& type) const; virtual ~TargetMachine() = default; -#ifdef SNIPPETS_DEBUG_CAPS - bool custom_segfault_detector = false; -#endif - protected: std::map jitters; }; diff --git a/src/common/snippets/src/generator.cpp b/src/common/snippets/src/generator.cpp index 09674700036057..5c4848c2535358 100644 --- a/src/common/snippets/src/generator.cpp +++ b/src/common/snippets/src/generator.cpp @@ -46,11 +46,6 @@ void Generator::generate(lowered::LinearIR& linear_ir, LoweringResult& result, c loops2DKernel->compile_params = compile_params; auto loops2DKernelExpr = linear_ir.create_expression(loops2DKernel, std::vector{}); std::shared_ptr kernel = target->get(op::Kernel::get_type_info_static())(loops2DKernelExpr); -#ifdef SNIPPETS_DEBUG_CAPS - // This is entrance emitter outside of LIR and not derived from MemoryAccess, but access memory, manually enable it. - if (target->custom_segfault_detector) - kernel->set_custom_segfault_detector(true); -#endif kernel->emit_code({}, {}); diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp index 8e027db9768db6..4456b6a240be4a 100644 --- a/src/common/snippets/src/lowered/linear_ir.cpp +++ b/src/common/snippets/src/lowered/linear_ir.cpp @@ -156,16 +156,8 @@ void LinearIR::debug_print(bool tds_as_pointers) const { void LinearIR::init_emitters(const std::shared_ptr& target) { for (auto& expr : m_expressions) { - if (!expr->get_emitter()) { + if (!expr->get_emitter()) expr->m_emitter = target->get(expr->get_node()->get_type_info())(expr); -#ifdef SNIPPETS_DEBUG_CAPS - if (target->custom_segfault_detector) { - if (is_type(expr->get_node())) { - expr->m_emitter->set_custom_segfault_detector(true); - } - } -#endif - } } } diff --git a/src/plugins/intel_cpu/src/emitters/utils.cpp b/src/plugins/intel_cpu/src/emitters/utils.cpp index 4530c74a97961e..9d8c999e3bb8f8 100644 --- a/src/plugins/intel_cpu/src/emitters/utils.cpp +++ b/src/plugins/intel_cpu/src/emitters/utils.cpp @@ -211,5 +211,79 @@ void RegPrinter::print_reg(jit_generator &h, REG_T reg, const char *name) { postamble(h); } +#ifdef SNIPPETS_DEBUG_CAPS +std::string get_type_name(const jit_emitter* emitter); +std::string get_type_name(const jit_emitter* emitter) { + std::string name = typeid(*emitter).name(); +#ifndef _WIN32 + int status; + std::unique_ptr demangled_name( + abi::__cxa_demangle(name.c_str(), nullptr, nullptr, &status), + std::free); + name = demangled_name.get(); +#endif + return name; +} + +void print_segfault_detector_result(jit_uni_segfault_detector_emitter* detector_emitter) { + auto print_memory_emitter_info = [&](MemoryEmitter *memory_emitter) { + std::cerr << "detailed emitter info is, src precision:" << memory_emitter->src_prc << ", dst precision:" << memory_emitter->dst_prc + << ", load/store element number:" << memory_emitter->count + << ", byte offset" << memory_emitter->byte_offset << std::endl; + // more memory address info tracked in detector_emitter. + std::cerr << "start_address:" << detector_emitter->start_address + << ", current_address:" << detector_emitter->current_address + << ", iteration:" << detector_emitter->iteration << "\n"; + }; + auto print_brgemm_emitter_info = [&](BrgemmEmitter* brgemm_emitter) { + std::cerr << "detailed emitter info is, m_brgCtx.M:" << brgemm_emitter->m_brgCtx.M + << " m_brgCtx.K:" << brgemm_emitter->m_brgCtx.K + << " m_brgCtx.N:" << brgemm_emitter->m_brgCtx.N + << " m_brgCtx.LDA:" << brgemm_emitter->m_brgCtx.LDA + << " m_brgCtx.LDB:" << brgemm_emitter->m_brgCtx.LDB + << " m_brgCtx.LDC:" << brgemm_emitter->m_brgCtx.LDC + << " m_brgCtx.dt_in0:" << brgemm_emitter->m_brgCtx.dt_in0 + << " m_brgCtx.dt_in1:" << brgemm_emitter->m_brgCtx.dt_in1 + << " m_brgCtx.palette:" << brgemm_emitter->m_brgCtx.palette + << " m_brgCtx.is_with_amx:" << brgemm_emitter->m_brgCtx.is_with_amx + << " m_brgCtx.is_with_comp:" << brgemm_emitter->m_brgCtx.is_with_comp + << " m_brgCtx.beta:" << brgemm_emitter->m_brgCtx.beta + << " m_load_offset_a:" << brgemm_emitter->m_load_offset_a + << " m_load_offset_b:" << brgemm_emitter->m_load_offset_b + << " m_load_offset_scratch:" << brgemm_emitter->m_load_offset_scratch + << " m_store_offset_c:" << brgemm_emitter->m_store_offset_c + << " m_with_scratch:" << brgemm_emitter->m_with_scratch + << " m_with_comp:" << brgemm_emitter->m_with_comp << "\n"; + }; + auto print_brgemm_copy_emitter_info = [&](BrgemmCopyBEmitter* brgemm_copy_emitter) { + std::cerr << "detailed emitter info is, m_LDB:" << brgemm_copy_emitter->m_LDB + << " m_K:" << brgemm_copy_emitter->m_K + << " m_K_blk:" << brgemm_copy_emitter->m_K_blk + << " m_K_tail:" << brgemm_copy_emitter->m_K_tail + << " m_N:" << brgemm_copy_emitter->m_N + << " m_N_blk:" << brgemm_copy_emitter->m_N_blk + << " m_N_tail:" << brgemm_copy_emitter->m_N_tail + << " m_brgemm_prc_in0:" << brgemm_copy_emitter->m_brgemm_prc_in0 + << " m_brgemm_prc_in1:" << brgemm_copy_emitter->m_brgemm_prc_in1 + << " m_brgemmVNNIFactor:" << brgemm_copy_emitter->m_brgemmVNNIFactor + << " m_with_comp:" << brgemm_copy_emitter->m_with_comp + << " m_in_offset:" << brgemm_copy_emitter->m_in_offset + << " m_out_offset:" << brgemm_copy_emitter->m_out_offset + << " m_comp_offset:" << brgemm_copy_emitter->m_comp_offset << "\n"; + }; + std::cerr << "Node name:" << detector_emitter->m_target_node_name << std::endl; + auto target_emitter = detector_emitter->m_target_emitter; + std::cerr << "Emitter type name:" << get_type_name(target_emitter) << std::endl; + if (auto *memory_emitter = dynamic_cast(target_emitter)) { + print_memory_emitter_info(memory_emitter); + } else if (auto *brgemm_emitter = dynamic_cast(target_emitter)) { + print_brgemm_emitter_info(brgemm_emitter); + } else if (auto *brgemm_copy_emitter = dynamic_cast(target_emitter)) { + print_brgemm_copy_emitter_info(brgemm_copy_emitter); + } +} + +#endif + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/emitters/utils.hpp b/src/plugins/intel_cpu/src/emitters/utils.hpp index af79d08c867d41..990a12ef5638d2 100644 --- a/src/plugins/intel_cpu/src/emitters/utils.hpp +++ b/src/plugins/intel_cpu/src/emitters/utils.hpp @@ -5,6 +5,13 @@ #pragma once #include +#include "x64/jit_emitter.hpp" +#include "x64/jit_snippets_emitters.hpp" +#include "x64/jit_segfault_detector_emitter.hpp" + +#ifndef _WIN32 +#include +#endif namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp index 492773809ce372..f27dae9520209f 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp @@ -243,7 +243,8 @@ bool intel_cpu::CPUGenerator::uses_precompiled_kernel(const std::shared_ptr(e) || std::dynamic_pointer_cast(e); #ifdef SNIPPETS_DEBUG_CAPS - need = need || target->custom_segfault_detector || + const auto cpu_target_machine = std::dynamic_pointer_cast(target); + need = need || (cpu_target_machine && cpu_target_machine->custom_segfault_detector) || std::dynamic_pointer_cast(e) || std::dynamic_pointer_cast(e) || std::dynamic_pointer_cast(e) || diff --git a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp index fa3528df6c9e6d..41f409aca8231a 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp +++ b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp @@ -30,6 +30,9 @@ class CPUTargetMachine : public snippets::TargetMachine { snippets::CompiledSnippetPtr get_snippet() override; size_t get_lanes() const override; dnnl::impl::cpu::x64::cpu_isa_t get_isa() const; +#ifdef SNIPPETS_DEBUG_CAPS + bool custom_segfault_detector = false; +#endif private: std::unique_ptr h; diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_conversion_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_conversion_emitters.cpp index 420fb46d8f5d80..b36c118189286c 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_conversion_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_conversion_emitters.cpp @@ -192,13 +192,6 @@ void jit_convert_truncation_emitter::dword2int8(const std::vector &in_ve } } -#ifdef SNIPPETS_DEBUG_CAPS -void jit_convert_truncation_emitter::print_debug_info() const { - std::cerr << "Emitter type name:" << get_type_name(this) << "\n"; - std::cerr << "input_type:" << input_type << " output_type" << output_type << "\n"; -} -#endif - jit_convert_saturation_emitter::jit_convert_saturation_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr& node, ov::element::Type exec_prc) : jit_convert_emitter(host, host_isa, node, exec_prc) { @@ -338,12 +331,5 @@ size_t jit_convert_saturation_emitter::aux_vecs_count() const { return output_type == ov::element::u8 && host_isa_ == dnnl::impl::cpu::x64::avx512_core? 1 : 0; } -#ifdef SNIPPETS_DEBUG_CAPS -void jit_convert_saturation_emitter::print_debug_info() const { - std::cerr << "Emitter type name:" << get_type_name(this) << "\n"; - std::cerr << "input_type:" << input_type << " output_type" << output_type << "\n"; -} -#endif - } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_conversion_emitters.hpp b/src/plugins/intel_cpu/src/emitters/x64/jit_conversion_emitters.hpp index 2fc5200e6bb765..908ed66f5f745b 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_conversion_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_conversion_emitters.hpp @@ -48,9 +48,6 @@ class jit_convert_truncation_emitter : public jit_convert_emitter { public: jit_convert_truncation_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n, ov::element::Type exec_prc = ov::element::f32); -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif private: void emit_impl(const std::vector& in, const std::vector& out) const override; @@ -72,9 +69,6 @@ class jit_convert_saturation_emitter : public jit_convert_emitter { public: jit_convert_saturation_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n, ov::element::Type exec_prc = ov::element::f32); -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif private: void emit_impl(const std::vector& in, const std::vector& out) const override; diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_dnnl_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_dnnl_emitters.cpp index 41b6b01a9f49f2..2fa8206b0321d4 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_dnnl_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_dnnl_emitters.cpp @@ -54,10 +54,6 @@ size_t jit_dnnl_emitter::get_inputs_num() const { return 1; } void jit_dnnl_emitter::emit_code(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) const { -#ifdef SNIPPETS_DEBUG_CAPS - if (m_custom_emitter_segfault_detector) - build_debug_info(); -#endif if (host_isa_ == cpu::x64::sse41) { if (out_vec_idxs[0] != in_vec_idxs[0]) h->uni_vmovups(Xmm(out_vec_idxs[0]), Xmm(in_vec_idxs[0])); @@ -87,13 +83,6 @@ void jit_dnnl_emitter::emit_data() const { } } -#ifdef SNIPPETS_DEBUG_CAPS -void jit_dnnl_emitter::print_debug_info() const { - std::cerr << "Emitter type name:" << get_type_name(this) << "\n"; - std::cerr << "dnnl_alg_kind:" << kind << " alpha" << alpha << " beta" << beta << "\n"; -} -#endif - jit_dnnl_aux_emitter::jit_dnnl_aux_emitter(jit_generator *host, cpu_isa_t host_isa, dnnl_alg_kind_t algKind, float inpAlpha, float inpBeta, ov::element::Type exec_prc) diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_dnnl_emitters.hpp b/src/plugins/intel_cpu/src/emitters/x64/jit_dnnl_emitters.hpp index 085ea724f738c6..96cff2ac8441cb 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_dnnl_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_dnnl_emitters.hpp @@ -21,9 +21,6 @@ class jit_dnnl_emitter : public jit_emitter { void emit_impl(const std::vector &in_idxs, const std::vector &out_idxs) const override {}; static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif protected: jit_dnnl_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.cpp index 7feb59a6a03b79..506a7a18f99f99 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.cpp @@ -771,6 +771,7 @@ void jit_power_dynamic_emitter::emit_isa(const std::vector &in_vec_idxs, h->add(h->rsp, n_gprs_to_save * gpr_size); } + /// EQUAL /// jit_equal_emitter::jit_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node, ov::element::Type exec_prc) : jit_emitter(host, host_isa, exec_prc) { @@ -1686,13 +1687,6 @@ size_t jit_power_static_emitter::aux_vecs_count() const { return 1; } -#ifdef SNIPPETS_DEBUG_CAPS -void jit_power_static_emitter::print_debug_info() const { - std::cerr << "Emitter type name:" << get_type_name(this) << "\n"; - std::cerr << "power:" << power << " scale:" << scale << " shift:" << shift << "\n"; -} -#endif - /// PRELU /// jit_prelu_emitter::jit_prelu_emitter(x64::jit_generator* host, x64::cpu_isa_t host_isa, @@ -2192,13 +2186,6 @@ void jit_is_inf_emitter::register_table_entries() { } } -#ifdef SNIPPETS_DEBUG_CAPS -void jit_is_inf_emitter::print_debug_info() const { - std::cerr << "Emitter type name:" << get_type_name(this) << "\n"; - std::cerr << "detect_negative:" << detect_negative << " detect_positive" << detect_positive << "\n"; -} -#endif - /// IS_NAN /// template <> void jit_is_nan_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.hpp index c8ade8de5f8565..028dff4e519341 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_eltwise_emitters.hpp @@ -458,9 +458,7 @@ class jit_power_static_emitter : public jit_emitter { size_t get_inputs_num() const override; static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif + private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -607,9 +605,6 @@ class jit_is_inf_emitter : public jit_emitter { ov::element::Type execPrc = ov::element::f32): jit_emitter(host, hostIsa, execPrc) { prepare_table(); } -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif size_t get_inputs_num() const override { return 1; }; static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr) { diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.cpp index 09aec221979ae5..dbaafdde8124cc 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.cpp @@ -6,18 +6,13 @@ #include #include "utils/general_utils.h" -using namespace dnnl::impl; using namespace dnnl::impl::cpu; -using namespace dnnl::impl::cpu::x64; +using namespace dnnl::impl; using namespace Xbyak; namespace ov { namespace intel_cpu { -#ifdef SNIPPETS_DEBUG_CAPS -std::shared_ptr> g_custom_segfault_handler = std::make_shared>(); -#endif - size_t jit_emitter::get_max_vecs_count() const { return one_of(host_isa_, cpu::x64::avx512_core, cpu::x64::avx512_core) ? 32 : 16; } @@ -213,35 +208,11 @@ void jit_emitter::emit_code(const std::vector &in_idxs, const std::vecto const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs) const { emitter_preamble(in_idxs, out_idxs, pool_vec_idxs, pool_gpr_idxs); -#ifdef SNIPPETS_DEBUG_CAPS - if (m_custom_emitter_segfault_detector) - build_debug_info(); -#endif - emit_impl(in_idxs, out_idxs); emitter_postamble(); } -#ifdef SNIPPETS_DEBUG_CAPS -void jit_emitter::build_debug_info() const { - internal_call_preamble(); - - const auto &set_local_handler_overload = static_cast(set_local_handler); - h->mov(h->rax, reinterpret_cast(set_local_handler_overload)); - h->mov(abi_param1, reinterpret_cast(this)); - internal_call_rsp_align(); - h->call(h->rax); - internal_call_rsp_restore(); - - internal_call_postamble(); -} - -void jit_emitter::set_local_handler(jit_emitter* emitter_address) { - g_custom_segfault_handler->local() = emitter_address; -} -#endif - void jit_emitter::internal_call_preamble() const { // gprs Xbyak::Operand gprs_to_save[] = {h->r8, h->r9, h->r10, h->r11, h->r12, h->r13, h->r14, h->r15, diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.hpp b/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.hpp index 9d067ffb432414..66f681265b9fd9 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.hpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_emitter.hpp @@ -10,26 +10,12 @@ #include "snippets/snippets_isa.hpp" #include "snippets/generator.hpp" #include -#include - -#ifdef SNIPPETS_DEBUG_CAPS -#include "openvino/runtime/threading/thread_local.hpp" - -#ifndef _WIN32 -#include -#endif -using namespace ov::threading; -#endif +#include namespace ov { namespace intel_cpu { -#ifdef SNIPPETS_DEBUG_CAPS -class jit_emitter; -extern std::shared_ptr> g_custom_segfault_handler; -#endif - enum emitter_in_out_map { vec_to_vec, vec_to_gpr, @@ -48,9 +34,6 @@ class jit_emitter : public ov::snippets::Emitter { ov::element::Type exec_prc = ov::element::f32, emitter_in_out_map in_out_type = emitter_in_out_map::vec_to_vec) : Emitter(), h(host), host_isa_(host_isa), exec_prc_(exec_prc), l_table (new Xbyak::Label()), in_out_type_(in_out_type) { k_mask = Xbyak::Opmask(1); // FIXME: in general case we need preserve k_mask state as well -#ifdef SNIPPETS_DEBUG_CAPS - m_custom_emitter_segfault_detector = false; -#endif } void emit_code(const std::vector &in_idxs, const std::vector &out_idxs, @@ -68,15 +51,6 @@ class jit_emitter : public ov::snippets::Emitter { */ static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); -#ifdef SNIPPETS_DEBUG_CAPS - void set_custom_segfault_detector(const bool is_enable) override { - m_custom_emitter_segfault_detector = is_enable; - } - virtual void print_debug_info() const { - std::cerr << "Emitter type name:" << get_type_name(this) << std::endl; - } -#endif - protected: virtual size_t aux_gprs_count() const; @@ -157,23 +131,6 @@ class jit_emitter : public ov::snippets::Emitter { push_arg_entry_of(key, te.val, te.bcast); } } -#ifdef SNIPPETS_DEBUG_CAPS - bool m_custom_emitter_segfault_detector = false; - void build_debug_info() const; - static void set_local_handler(jit_emitter* emitter_address); - - std::string get_type_name(const jit_emitter* emitter) const { - std::string name = typeid(*emitter).name(); -#ifndef _WIN32 - int status; - std::unique_ptr demangled_name( - abi::__cxa_demangle(name.c_str(), nullptr, nullptr, &status), - std::free); - name = demangled_name.get(); -#endif - return name; - } -#endif void internal_call_preamble() const; void internal_call_postamble() const; diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_segfault_detector_emitter.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_segfault_detector_emitter.cpp new file mode 100644 index 00000000000000..d83a74075eec92 --- /dev/null +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_segfault_detector_emitter.cpp @@ -0,0 +1,82 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifdef SNIPPETS_DEBUG_CAPS + +#include "jit_segfault_detector_emitter.hpp" + +using namespace dnnl::impl::utils; +using namespace dnnl::impl; +using namespace dnnl::impl::cpu::x64; +using namespace Xbyak; + +namespace ov { +namespace intel_cpu { + +std::shared_ptr> g_custom_segfault_handler1 = + std::make_shared>(); + +jit_uni_segfault_detector_emitter::jit_uni_segfault_detector_emitter(dnnl::impl::cpu::x64::jit_generator* host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, + jit_emitter* target_emitter, bool is_load, bool is_store, std::string target_node_name) : + jit_emitter(host, host_isa), + m_target_emitter(target_emitter), + is_target_use_load_emitter(is_load), + is_target_use_store_emitter(is_store), + m_target_node_name(target_node_name) { +} + +size_t jit_uni_segfault_detector_emitter::get_inputs_num() const { return 1; } + +void jit_uni_segfault_detector_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { + save_target_emitter(); + if (is_target_use_load_emitter) { + memory_track(in_vec_idxs[0]); + } else if (is_target_use_store_emitter) { + memory_track(out_vec_idxs[0]); + } +} + +void jit_uni_segfault_detector_emitter::save_target_emitter() const { + // use internal call as "->local" shoule be the execution thread. Otherwise always compilation thread. + internal_call_preamble(); + + const auto &set_local_handler_overload = static_cast(set_local_handler); + h->mov(h->rax, reinterpret_cast(set_local_handler_overload)); + h->mov(abi_param1, reinterpret_cast(this)); + internal_call_rsp_align(); + h->call(h->rax); + internal_call_rsp_restore(); + + internal_call_postamble(); +} + +void jit_uni_segfault_detector_emitter::set_local_handler(jit_uni_segfault_detector_emitter* emitter_address) { + g_custom_segfault_handler1->local() = emitter_address; +} + +void jit_uni_segfault_detector_emitter::memory_track(size_t gpr_idx_for_mem_address) const { + h->push(h->r15); + Xbyak::Label label_set_address_current; + Xbyak::Label label_set_address_end; + h->mov(h->r15, reinterpret_cast(&start_address)); + h->cmp(h->qword[h->r15], 0); + h->jne(label_set_address_current); + h->mov(h->qword[h->r15], Xbyak::Reg64(gpr_idx_for_mem_address)); + h->jmp(label_set_address_end); + h->L(label_set_address_current); + { + h->mov(h->r15, reinterpret_cast(¤t_address)); + h->mov(h->qword[h->r15], Xbyak::Reg64(gpr_idx_for_mem_address)); + } + h->L(label_set_address_end); + // iteration++, 1 means first access + h->mov(h->r15, reinterpret_cast(&iteration)); + h->add(h->qword[h->r15], 0x01); + h->pop(h->r15); +} + +} // namespace intel_cpu +} // namespace ov + +#endif diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_segfault_detector_emitter.hpp b/src/plugins/intel_cpu/src/emitters/x64/jit_segfault_detector_emitter.hpp new file mode 100644 index 00000000000000..d9637cacc81d47 --- /dev/null +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_segfault_detector_emitter.hpp @@ -0,0 +1,55 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifdef SNIPPETS_DEBUG_CAPS + +#pragma once + +#include +#include "jit_emitter.hpp" +#include "openvino/runtime/threading/thread_local.hpp" + +#ifndef _WIN32 +#include +#endif + +using namespace ov::threading; + +namespace ov { +namespace intel_cpu { + +class jit_uni_segfault_detector_emitter; +extern std::shared_ptr> g_custom_segfault_handler1; + +class jit_uni_segfault_detector_emitter : public jit_emitter { +public: + jit_uni_segfault_detector_emitter(dnnl::impl::cpu::x64::jit_generator* host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, + jit_emitter* target_emitter, bool is_load, bool is_store, std::string target_node_name); + + size_t get_inputs_num() const override; + + friend void print_segfault_detector_result(jit_uni_segfault_detector_emitter* detector_emitter); + +private: + // emit code is to save "this" pointer(jit_uni_segfault_detector_emitter) to global handler, then print info w/ it's target_emitter. + // and to save tracked memory address, iteration, etc to print + void emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const override; + jit_emitter *m_target_emitter = nullptr; + bool is_target_use_load_emitter = false; + bool is_target_use_store_emitter = false; + std::string m_target_node_name = ""; + + void save_target_emitter() const; + static void set_local_handler(jit_uni_segfault_detector_emitter* emitter_address); + void memory_track(size_t gpr_idx_for_mem_address) const; + + mutable size_t start_address = 0; + mutable size_t current_address = 0; + mutable size_t iteration = 0; +}; + +} // namespace intel_cpu +} // namespace ov + +#endif diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp index bbfe2e510e6a6c..e70a63bb093f42 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.cpp @@ -10,6 +10,8 @@ #include "snippets/utils.hpp" #include "snippets/lowered/expression.hpp" #include "snippets/lowered/port_connector.hpp" +#include "transformations/snippets/x64/op/brgemm_copy_b.hpp" +#include "transformations/snippets/x64/op//brgemm_cpu.hpp" #include "snippets/op/rank_normalization.hpp" using namespace InferenceEngine; @@ -197,26 +199,24 @@ KernelEmitter::KernelEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPt gpr_map_pool.second.push_back(reg_indexes_idx); gpr_map_pool.second.push_back(reg_const_params_idx); map_abstract_registers(gpr_map_pool, vec_map_pool, general_exprs); +#ifdef SNIPPETS_DEBUG_CAPS + DebugCapsConfig debugCaps; + if (!debugCaps.snippets_segfault_detector.empty()) + segfault_detector_emitter.reset(new jit_uni_segfault_detector_emitter(h, isa, this, false, false, kernel->get_friendly_name())); +#endif } void KernelEmitter::emit_code(const std::vector &in, const std::vector &out) const { validate_arguments(in, out); #ifdef SNIPPETS_DEBUG_CAPS - if (m_custom_emitter_segfault_detector) - build_debug_info(); + if (segfault_detector_emitter != nullptr) { + segfault_detector_emitter->emit_code(in, out); + } #endif emit_impl(in, out); } -#ifdef SNIPPETS_DEBUG_CAPS -void KernelEmitter::print_debug_info() const { - std::cerr << "Emitter type name:" << get_type_name(this) << "\n"; - std::cerr << "where num_inputs:" << num_inputs << " num_outputs:" << num_outputs << " num_unique_buffers:" << num_unique_buffers - << " reg_indexes_idx:" << reg_indexes_idx << " reg_const_params_idx:" << reg_const_params_idx << "\n"; -} -#endif - void KernelEmitter::validate_arguments(const std::vector &in, const std::vector &out) const { if (!in.empty()) @@ -357,20 +357,9 @@ LoopBeginEmitter::LoopBeginEmitter(jit_generator* h, cpu_isa_t isa, const Expres in_out_type_ = emitter_in_out_map::gpr_to_gpr; } -#ifdef SNIPPETS_DEBUG_CAPS -void LoopBeginEmitter::print_debug_info() const { - std::cerr << "Emitter type name:" << get_type_name(this) << "\n"; - std::cerr << "where evaluate_once:" << evaluate_once << " work_amount:" << work_amount << "\n"; -} -#endif - void LoopBeginEmitter::emit_code(const std::vector &in, const std::vector &out) const { validate_arguments(in, out); -#ifdef SNIPPETS_DEBUG_CAPS - if (m_custom_emitter_segfault_detector) - build_debug_info(); -#endif emit_impl(in, out); } @@ -417,21 +406,9 @@ LoopEndEmitter::LoopEndEmitter(jit_generator* h, cpu_isa_t isa, const Expression in_out_type_ = emitter_in_out_map::gpr_to_gpr; } -#ifdef SNIPPETS_DEBUG_CAPS -void LoopEndEmitter::print_debug_info() const { - std::cerr << "Emitter type name:" << get_type_name(this) << "\n"; - std::cerr << "where num_inputs:" << num_inputs << " num_outputs:" << num_outputs - << " wa_increment:" << wa_increment << " work_amount:" << work_amount << " evaluate_once:" << evaluate_once << "\n"; -} -#endif - void LoopEndEmitter::emit_code(const std::vector &in, const std::vector &out) const { validate_arguments(in, out); -#ifdef SNIPPETS_DEBUG_CAPS - if (m_custom_emitter_segfault_detector) - build_debug_info(); -#endif emit_impl(in, out); } @@ -526,13 +503,6 @@ void BroadcastMoveEmitter::emit_isa(const std::vector &in, const std::ve } } -#ifdef SNIPPETS_DEBUG_CAPS -void BroadcastMoveEmitter::print_debug_info() const { - std::cerr << "Emitter type name:" << get_type_name(this) << "\n"; - std::cerr << "where byte_size:" << byte_size << "\n"; -} -#endif - ScalarEmitter::ScalarEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& expr) : jit_emitter(h, isa) { const auto n = expr->get_node(); const auto& precision = n->get_output_element_type(0); @@ -574,56 +544,12 @@ void ScalarEmitter::emit_isa(const std::vector &in, const std::vectoruni_vbroadcastss(vmm_dst, table_val("scalar")); } -#ifdef SNIPPETS_DEBUG_CAPS -void ScalarEmitter::print_debug_info() const { - std::cerr << "Emitter type name:" << get_type_name(this) << "\n"; - std::cerr << "where value:" << value << "\n"; -} -#endif - MemoryEmitter::MemoryEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& expr) : jit_emitter(h, isa) { const auto n = expr->get_node(); src_prc = n->get_input_element_type(0); dst_prc = n->get_output_element_type(0); } -#ifdef SNIPPETS_DEBUG_CAPS -void MemoryEmitter::memory_track(size_t gpr_idx_for_mem_address) const { - h->push(h->r15); - h->push(h->r14); - Xbyak::Label label_set_address_current; - Xbyak::Label label_set_address_end; - h->mov(h->r15, reinterpret_cast(&start_address)); - h->cmp(h->qword[h->r15], 0); - h->jne(label_set_address_current); - h->mov(h->r14, reinterpret_cast(&byte_offset)); - h->mov(h->r14, h->qword[h->r14]); - h->add(h->r14, Xbyak::Reg64(gpr_idx_for_mem_address)); - h->mov(h->qword[h->r15], h->r14); - h->jmp(label_set_address_end); - h->L(label_set_address_current); - { - h->mov(h->r15, reinterpret_cast(¤t_address)); - h->mov(h->r14, reinterpret_cast(&byte_offset)); - h->mov(h->r14, h->qword[h->r14]); - h->add(h->r14, Xbyak::Reg64(gpr_idx_for_mem_address)); - h->mov(h->qword[h->r15], h->r14); - } - h->L(label_set_address_end); - // iteration++, 1 means first access - h->mov(h->r15, reinterpret_cast(&iteration)); - h->add(h->qword[h->r15], 0x01); - h->pop(h->r14); - h->pop(h->r15); -} - -void MemoryEmitter::print_debug_info() const { - std::cerr << "Emitter type name:" << get_type_name(this) << std::endl; - std::cerr << "src precision:" << src_prc << ", dst precision:" << dst_prc << ", load/store element number:" << count << std::endl; - std::cerr << "start_address:" << start_address << ", current_address:" << current_address << ", iteration:" << iteration << "\n"; -} -#endif - StoreEmitter::StoreEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& expr) : MemoryEmitter(h, isa, expr) { if (src_prc != dst_prc) OPENVINO_THROW("StoreEmitter supports only equal input and output types but gets: ", @@ -634,7 +560,9 @@ StoreEmitter::StoreEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& const auto store = ov::as_type_ptr(expr->get_node()); OPENVINO_ASSERT(store, "Node in expression is not snippets::op::Store in constructor of StoreEmitter!"); #ifdef SNIPPETS_DEBUG_CAPS - store_node = store; + DebugCapsConfig debugCaps; + if (!debugCaps.snippets_segfault_detector.empty()) + segfault_detector_emitter.reset(new jit_uni_segfault_detector_emitter(h, isa, this, false, true, store->get_friendly_name())); #endif count = store->get_count(); byte_offset = store->get_offset(); @@ -645,8 +573,8 @@ StoreEmitter::StoreEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& void StoreEmitter::emit_impl(const std::vector& in, const std::vector& out) const { #ifdef SNIPPETS_DEBUG_CAPS - if (m_custom_emitter_segfault_detector) - memory_track(out[0]); + if (segfault_detector_emitter != nullptr) + segfault_detector_emitter->emit_code(in, out); #endif if (host_isa_ == dnnl::impl::cpu::x64::sse41) { emit_isa(in, out); @@ -670,13 +598,6 @@ void StoreEmitter::emit_data() const { store_emitter->emit_data(); } -#ifdef SNIPPETS_DEBUG_CAPS -void StoreEmitter::print_debug_info() const { - std::cerr << "Node name:" << store_node->get_friendly_name() << std::endl; - MemoryEmitter::print_debug_info(); -} -#endif - LoadEmitter::LoadEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& expr) : MemoryEmitter(h, isa, expr) { if (src_prc != dst_prc) OPENVINO_THROW("LoadEmitter supports only equal input and output types but gets: ", @@ -686,7 +607,9 @@ LoadEmitter::LoadEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& e const auto load = std::dynamic_pointer_cast(expr->get_node()); OPENVINO_ASSERT(load, "Node in expression is not snippets::op::Load in constructor of LoadEmitter!"); #ifdef SNIPPETS_DEBUG_CAPS - load_node = load; + DebugCapsConfig debugCaps; + if (!debugCaps.snippets_segfault_detector.empty()) + segfault_detector_emitter.reset(new jit_uni_segfault_detector_emitter(h, isa, this, true, false, load->get_friendly_name())); #endif count = load->get_count(); byte_offset = load->get_offset(); @@ -697,8 +620,8 @@ LoadEmitter::LoadEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& e void LoadEmitter::emit_impl(const std::vector& in, const std::vector& out) const { #ifdef SNIPPETS_DEBUG_CAPS - if (m_custom_emitter_segfault_detector) - memory_track(in[0]); + if (segfault_detector_emitter != nullptr) + segfault_detector_emitter->emit_code(in, out); #endif if (host_isa_ == dnnl::impl::cpu::x64::sse41) { emit_isa(in, out); @@ -722,13 +645,6 @@ void LoadEmitter::emit_data() const { load_emitter->emit_data(); } -#ifdef SNIPPETS_DEBUG_CAPS -void LoadEmitter::print_debug_info() const { - std::cerr << "Node name:" << load_node->get_friendly_name() << std::endl; - MemoryEmitter::print_debug_info(); -} -#endif - BroadcastLoadEmitter::BroadcastLoadEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& expr) : MemoryEmitter(h, isa, expr) { if (src_prc != dst_prc) @@ -737,10 +653,12 @@ BroadcastLoadEmitter::BroadcastLoadEmitter(jit_generator* h, cpu_isa_t isa, cons " and ", dst_prc.get_type_name()); - auto broadcast_load = std::dynamic_pointer_cast(expr->get_node()); + const auto broadcast_load = std::dynamic_pointer_cast(expr->get_node()); OPENVINO_ASSERT(broadcast_load, "Node in expression is not snippets::op::BroadcastLoad in constructor of BroadcastLoadEmitter!"); #ifdef SNIPPETS_DEBUG_CAPS - broadcast_load_node = broadcast_load; + DebugCapsConfig debugCaps; + if (!debugCaps.snippets_segfault_detector.empty()) + segfault_detector_emitter.reset(new jit_uni_segfault_detector_emitter(h, isa, this, true, false, broadcast_load->get_friendly_name())); #endif byte_offset = broadcast_load->get_offset(); in_out_type_ = emitter_in_out_map::gpr_to_vec; @@ -749,8 +667,8 @@ BroadcastLoadEmitter::BroadcastLoadEmitter(jit_generator* h, cpu_isa_t isa, cons void BroadcastLoadEmitter::emit_impl(const std::vector& in, const std::vector& out) const { #ifdef SNIPPETS_DEBUG_CAPS - if (m_custom_emitter_segfault_detector) - memory_track(in[0]); + if (segfault_detector_emitter != nullptr) + segfault_detector_emitter->emit_code(in, out); #endif if (host_isa_ == dnnl::impl::cpu::x64::sse41) { emit_isa(in, out); @@ -780,22 +698,16 @@ void BroadcastLoadEmitter::emit_isa(const std::vector &in, const std::ve } } -#ifdef SNIPPETS_DEBUG_CAPS -void BroadcastLoadEmitter::print_debug_info() const { - std::cerr << "Node name:" << broadcast_load_node->get_friendly_name() << std::endl; - MemoryEmitter::print_debug_info(); -} -#endif - LoadConvertEmitter::LoadConvertEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& expr) : MemoryEmitter(h, isa, expr) { - auto load_convert = ov::as_type_ptr(expr->get_node()); - OPENVINO_ASSERT(load_convert, "Node in expression can not dynamic cast to snippets::op::Load in constructor of LoadConvertEmitter!"); + const auto load = ov::as_type_ptr(expr->get_node()); + count = load->get_count(); + byte_offset = load->get_offset(); #ifdef SNIPPETS_DEBUG_CAPS - load_convert_node = load_convert; + DebugCapsConfig debugCaps; + if (!debugCaps.snippets_segfault_detector.empty()) + segfault_detector_emitter.reset(new jit_uni_segfault_detector_emitter(h, isa, this, true, false, load->get_friendly_name())); #endif - count = load_convert->get_count(); - byte_offset = load_convert->get_offset(); in_out_type_ = emitter_in_out_map::gpr_to_vec; load_emitter.reset(new jit_load_emitter(h, isa, src_prc, dst_prc, count)); } @@ -803,8 +715,8 @@ LoadConvertEmitter::LoadConvertEmitter(jit_generator* h, cpu_isa_t isa, const Ex void LoadConvertEmitter::emit_impl(const std::vector& in, const std::vector& out) const { #ifdef SNIPPETS_DEBUG_CAPS - if (m_custom_emitter_segfault_detector) - memory_track(in[0]); + if (segfault_detector_emitter != nullptr) + segfault_detector_emitter->emit_code(in, out); #endif if (host_isa_ == dnnl::impl::cpu::x64::sse41) { emit_isa(in, out); @@ -828,22 +740,16 @@ void LoadConvertEmitter::emit_data() const { load_emitter->emit_data(); } -#ifdef SNIPPETS_DEBUG_CAPS -void LoadConvertEmitter::print_debug_info() const { - std::cerr << "Node name:" << load_convert_node->get_friendly_name() << std::endl; - MemoryEmitter::print_debug_info(); -} -#endif - StoreConvertEmitter::StoreConvertEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& expr) : MemoryEmitter(h, isa, expr) { - auto store_convert = ov::as_type_ptr(expr->get_node()); - OPENVINO_ASSERT(store_convert, "Node in expression can not dynamic cast to snippets::op::Store in constructor of StoreConvertEmitter!"); + const auto store = ov::as_type_ptr(expr->get_node()); + count = store->get_count(); + byte_offset = store->get_offset(); #ifdef SNIPPETS_DEBUG_CAPS - store_convert_node = store_convert; + DebugCapsConfig debugCaps; + if (!debugCaps.snippets_segfault_detector.empty()) + segfault_detector_emitter.reset(new jit_uni_segfault_detector_emitter(h, isa, this, false, true, store->get_friendly_name())); #endif - count = store_convert->get_count(); - byte_offset = store_convert->get_offset(); in_out_type_ = emitter_in_out_map::vec_to_gpr; if (ov::is_type(expr->get_node())) { @@ -856,8 +762,8 @@ StoreConvertEmitter::StoreConvertEmitter(jit_generator* h, cpu_isa_t isa, const void StoreConvertEmitter::emit_impl(const std::vector& in, const std::vector& out) const { #ifdef SNIPPETS_DEBUG_CAPS - if (m_custom_emitter_segfault_detector) - memory_track(out[0]); + if (segfault_detector_emitter != nullptr) + segfault_detector_emitter->emit_code(in, out); #endif if (host_isa_ == dnnl::impl::cpu::x64::sse41) { emit_isa(in, out); @@ -881,14 +787,6 @@ void StoreConvertEmitter::emit_data() const { store_emitter->emit_data(); } -#ifdef SNIPPETS_DEBUG_CAPS -void StoreConvertEmitter::print_debug_info() const { - std::cerr << "Node name:" << store_convert_node->get_friendly_name() << std::endl; - MemoryEmitter::print_debug_info(); -} -#endif - - size_t BrgemmEmitter::get_in_leading_dim(const VectorDims& shape, const std::vector& layout) { // Input shape is original, so we need to correctly read this data by order // Example: @@ -918,11 +816,12 @@ size_t BrgemmEmitter::get_out_leading_dim(const VectorDims& shape, const std::ve BrgemmEmitter::BrgemmEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& expr) : jit_emitter(h, isa) { in_out_type_ = emitter_in_out_map::gpr_to_gpr; - auto brgemm_node = as_type_ptr(expr->get_node()); - OPENVINO_ASSERT(brgemm_node, "Node in expression is not ov::intel_cpu::BrgemmCPU in constructor of BrgemmEmitter!"); + const auto& brgemm_node = as_type_ptr(expr->get_node()); OPENVINO_ASSERT(!brgemm_node->is_dynamic(), "Snippets don't support code generation for dynamic Brgemm"); #ifdef SNIPPETS_DEBUG_CAPS - brgemm_node_ptr = brgemm_node; + DebugCapsConfig debugCaps; + if (!debugCaps.snippets_segfault_detector.empty()) + segfault_detector_emitter.reset(new jit_uni_segfault_detector_emitter(h, isa, this, false, false, brgemm_node->get_friendly_name())); #endif std::vector leading_dimensions; @@ -1051,6 +950,10 @@ void BrgemmEmitter::initBrgemm(brgemmCtx& ctx, std::unique_ptr& void BrgemmEmitter::emit_impl(const std::vector& in, const std::vector& out) const { validate_arguments(in, out); +#ifdef SNIPPETS_DEBUG_CAPS + if (segfault_detector_emitter != nullptr) + segfault_detector_emitter->emit_code(in, out); +#endif if (host_isa_ == cpu::x64::avx512_core) { Xbyak::Reg64 input_0(static_cast(in[0])); Xbyak::Reg64 input_1(static_cast(in[1])); @@ -1188,27 +1091,16 @@ void BrgemmEmitter::kernel_execute(const brgemm_kernel_t *brg_kernel, (*brg_kernel)(&brgemm_p); } -#ifdef SNIPPETS_DEBUG_CAPS -void BrgemmEmitter::print_debug_info() const { - std::cerr << "Node name:" << brgemm_node_ptr->get_friendly_name() << std::endl; - std::cerr << "Emitter type name:" << get_type_name(this) << "\n"; - std::cerr << "where m_brgCtx.M:" << m_brgCtx.M << " m_brgCtx.K:" << m_brgCtx.K << " m_brgCtx.N:" << m_brgCtx.N - << " m_brgCtx.LDA:" << m_brgCtx.LDA << " m_brgCtx.LDB:" << m_brgCtx.LDB << " m_brgCtx.LDC:" << m_brgCtx.LDC - << " m_brgCtx.dt_in0:" << m_brgCtx.dt_in0 << " m_brgCtx.dt_in1:" << m_brgCtx.dt_in1 << " m_brgCtx.palette:" << m_brgCtx.palette - << " m_brgCtx.is_with_amx:" << m_brgCtx.is_with_amx << " m_brgCtx.is_with_comp:" << m_brgCtx.is_with_comp << " m_brgCtx.beta:" << m_brgCtx.beta - << " m_load_offset_a:" << m_load_offset_a << " m_load_offset_b:" << m_load_offset_b << " m_load_offset_scratch:" << m_load_offset_scratch - << " m_store_offset_c:" << m_store_offset_c - << " m_with_scratch:" << m_with_scratch << " m_with_comp:" << m_with_comp << "\n"; -} -#endif - BrgemmCopyBEmitter::BrgemmCopyBEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& expr) : jit_emitter(h, isa) { in_out_type_ = emitter_in_out_map::gpr_to_gpr; - auto brgemm_repack = ov::as_type_ptr(expr->get_node()); - OPENVINO_ASSERT(brgemm_repack, "Node in expression is not ov::intel_cpu::BrgemmCopyB in constructor of BrgemmEmitter!"); + const auto brgemm_repack = ov::as_type_ptr(expr->get_node()); + if (!brgemm_repack) + OPENVINO_THROW("BrgemmCopyBEmitters expects BrgemmCopyB node"); #ifdef SNIPPETS_DEBUG_CAPS - brgemm_repack_node = brgemm_repack; + DebugCapsConfig debugCaps; + if (!debugCaps.snippets_segfault_detector.empty()) + segfault_detector_emitter.reset(new jit_uni_segfault_detector_emitter(h, isa, this, false, false, brgemm_repack->get_friendly_name())); #endif m_brgemm_prc_in0 = brgemm_repack->get_src_element_type(); @@ -1287,6 +1179,10 @@ void BrgemmCopyBEmitter::init_brgemm_copy(std::unique_ptr& in, const std::vector& out) const { +#ifdef SNIPPETS_DEBUG_CAPS + if (segfault_detector_emitter != nullptr) + segfault_detector_emitter->emit_code(in, out); +#endif if (host_isa_ == cpu::x64::avx512_core) { Xbyak::Reg64 src(static_cast(in[0])); Xbyak::Reg64 dst(static_cast(out[0])); @@ -1400,18 +1296,6 @@ void BrgemmCopyBEmitter::execute(matmul::jit_brgemm_matmul_copy_b_t *kernel, con (*kernel)(&ctx); } -#ifdef SNIPPETS_DEBUG_CAPS -void BrgemmCopyBEmitter::print_debug_info() const { - std::cerr << "Node name:" << brgemm_repack_node->get_friendly_name() << std::endl; - std::cerr << "Emitter type name:" << get_type_name(this) << "\n"; - std::cerr << "where m_LDB:" << m_LDB << " m_K:" << m_K << " m_K_blk:" << m_K_blk << " m_K_tail:" << m_K_tail - << " m_N:" << m_N << " m_N_blk:" << m_N_blk << " m_N_tail:" << m_N_tail - << " m_brgemm_prc_in0:" << m_brgemm_prc_in0 << " m_brgemm_prc_in1:" << m_brgemm_prc_in1 - << " m_brgemmVNNIFactor:" << m_brgemmVNNIFactor << " m_with_comp:" << m_with_comp - << " m_in_offset:" << m_in_offset << " m_out_offset:" << m_out_offset << " m_comp_offset:" << m_comp_offset << "\n"; -} -#endif - HorizonEmitter::HorizonEmitter(jit_generator* h, cpu_isa_t isa, const ExpressionPtr& expr) : jit_emitter(h, isa, ov::element::f32, emitter_in_out_map::vec_to_vec) { if (ov::is_type(expr->get_node())) { diff --git a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.hpp b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.hpp index fc806d955980fc..647e4625c5b7e7 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/x64/jit_snippets_emitters.hpp @@ -14,14 +14,14 @@ #include "jit_load_store_emitters.hpp" #include "transformations/snippets/x64/op/store_convert.hpp" -#include "transformations/snippets/x64/op/brgemm_copy_b.hpp" -#include "transformations/snippets/x64/op//brgemm_cpu.hpp" // Matmul support: #include #include #include #include +#include "jit_segfault_detector_emitter.hpp" + namespace ov { namespace intel_cpu { @@ -84,9 +84,6 @@ class KernelEmitter : public jit_container_emitter { size_t get_inputs_num() const override {return 0;} void emit_code(const std::vector &in, const std::vector &out) const; -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif private: using jit_emitter::emit_code; @@ -115,6 +112,9 @@ class KernelEmitter : public jit_container_emitter { const size_t reg_indexes_idx; const size_t reg_const_params_idx; +#ifdef SNIPPETS_DEBUG_CAPS + std::shared_ptr segfault_detector_emitter = nullptr; +#endif }; class LoopBeginEmitter : public jit_emitter { @@ -126,9 +126,6 @@ class LoopBeginEmitter : public jit_emitter { const std::vector &out) const; // todo: it is purely virtual in the base class, but do we need it? size_t get_inputs_num() const override {return 0;} -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif private: using jit_emitter::emit_code; @@ -151,9 +148,6 @@ class LoopEndEmitter : public jit_emitter { const std::vector &out) const; // todo: it is purely virtual in the base class, but do we need it? size_t get_inputs_num() const override {return 0;} -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif private: using jit_emitter::emit_code; @@ -215,9 +209,6 @@ class BroadcastMoveEmitter : public jit_emitter { const ov::snippets::lowered::ExpressionPtr& expr); size_t get_inputs_num() const override {return 1;} -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif private: void emit_impl(const std::vector& in, @@ -237,9 +228,6 @@ class ScalarEmitter : public jit_emitter { const ov::snippets::lowered::ExpressionPtr& expr); size_t get_inputs_num() const override {return 0;} -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif protected: size_t aux_gprs_count() const override {return 1;} @@ -270,7 +258,7 @@ class MemoryEmitter : public jit_emitter { dnnl::impl::cpu::x64::cpu_isa_t isa, const ov::snippets::lowered::ExpressionPtr& expr); #ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; + friend void print_segfault_detector_result(jit_uni_segfault_detector_emitter* detector_emitter); #endif protected: @@ -281,13 +269,7 @@ class MemoryEmitter : public jit_emitter { size_t byte_offset = 0; #ifdef SNIPPETS_DEBUG_CAPS - // MemoryEmitter is to move data between memory and registers. Typically it's performed many times to operate the whole subtensor. - // memory_track function is to record start subtensor address and current subtensor address and iteration. - // If segfault happens, build_in segfault capabilty will give developers these info to understand what is wrong. - mutable size_t start_address = 0; - mutable size_t current_address = 0; - mutable size_t iteration = 0; - void memory_track(size_t gpr_idx_for_mem_address) const; + std::shared_ptr segfault_detector_emitter = nullptr; #endif }; @@ -298,9 +280,6 @@ class StoreEmitter : public MemoryEmitter { const ov::snippets::lowered::ExpressionPtr& expr); size_t get_inputs_num() const override {return 1;} -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif private: void emit_impl(const std::vector& in, @@ -312,9 +291,6 @@ class StoreEmitter : public MemoryEmitter { private: std::unique_ptr store_emitter = nullptr; -#ifdef SNIPPETS_DEBUG_CAPS - std::shared_ptr store_node = nullptr; -#endif }; class LoadEmitter : public MemoryEmitter { @@ -324,9 +300,6 @@ class LoadEmitter : public MemoryEmitter { const ov::snippets::lowered::ExpressionPtr& expr); size_t get_inputs_num() const override {return 0;} -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif private: void emit_impl(const std::vector& in, @@ -338,9 +311,6 @@ class LoadEmitter : public MemoryEmitter { private: std::unique_ptr load_emitter = nullptr; -#ifdef SNIPPETS_DEBUG_CAPS - std::shared_ptr load_node = nullptr; -#endif }; class BroadcastLoadEmitter : public MemoryEmitter { @@ -350,9 +320,6 @@ class BroadcastLoadEmitter : public MemoryEmitter { const ov::snippets::lowered::ExpressionPtr& expr); size_t get_inputs_num() const override {return 0;} -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif private: void emit_impl(const std::vector& in, @@ -360,9 +327,6 @@ class BroadcastLoadEmitter : public MemoryEmitter { template void emit_isa(const std::vector &in, const std::vector &out) const; -#ifdef SNIPPETS_DEBUG_CAPS - std::shared_ptr broadcast_load_node = nullptr; -#endif }; class LoadConvertEmitter : public MemoryEmitter { @@ -372,9 +336,6 @@ class LoadConvertEmitter : public MemoryEmitter { const ov::snippets::lowered::ExpressionPtr& expr); size_t get_inputs_num() const override {return 0;} -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif private: void emit_impl(const std::vector& in, @@ -386,9 +347,6 @@ class LoadConvertEmitter : public MemoryEmitter { private: std::unique_ptr load_emitter = nullptr; -#ifdef SNIPPETS_DEBUG_CAPS - std::shared_ptr load_convert_node = nullptr; -#endif }; class StoreConvertEmitter : public MemoryEmitter { @@ -398,9 +356,6 @@ class StoreConvertEmitter : public MemoryEmitter { const ov::snippets::lowered::ExpressionPtr& expr); size_t get_inputs_num() const override {return 1;} -#ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; -#endif private: void emit_impl(const std::vector& in, @@ -412,9 +367,6 @@ class StoreConvertEmitter : public MemoryEmitter { private: std::unique_ptr store_emitter = nullptr; -#ifdef SNIPPETS_DEBUG_CAPS - std::shared_ptr store_convert_node = nullptr; -#endif }; class BrgemmEmitter : public jit_emitter { @@ -426,7 +378,7 @@ class BrgemmEmitter : public jit_emitter { size_t get_inputs_num() const override { return m_with_scratch ? 3 : 2; } static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); #ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; + friend void print_segfault_detector_result(jit_uni_segfault_detector_emitter* detector_emitter); #endif static size_t get_in_leading_dim(const VectorDims& shape, const std::vector& layout); @@ -456,7 +408,7 @@ class BrgemmEmitter : public jit_emitter { size_t in2_kernel_offset = 0, size_t out0_kernel_offset = 0) const; static void kernel_execute(const dnnl::impl::cpu::x64::brgemm_kernel_t *brg_kernel, const void *A, const void *B, void *C, void *scratch, int with_comp); #ifdef SNIPPETS_DEBUG_CAPS - std::shared_ptr brgemm_node_ptr = nullptr; + std::shared_ptr segfault_detector_emitter = nullptr; #endif brgemmCtx m_brgCtx; @@ -484,7 +436,7 @@ class BrgemmCopyBEmitter : public jit_emitter { return {{element::i8}, {element::bf16}}; } #ifdef SNIPPETS_DEBUG_CAPS - void print_debug_info() const override; + friend void print_segfault_detector_result(jit_uni_segfault_detector_emitter* detector_emitter); #endif private: @@ -503,7 +455,7 @@ class BrgemmCopyBEmitter : public jit_emitter { std::unique_ptr m_kernel; #ifdef SNIPPETS_DEBUG_CAPS - std::shared_ptr brgemm_repack_node = nullptr; + std::shared_ptr segfault_detector_emitter = nullptr; #endif ov::element::Type m_brgemm_prc_in0, m_brgemm_prc_in1; diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index be9c08dcca433e..5ce49cfdce91f6 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -38,7 +38,11 @@ #include "snippets/pass/hash.hpp" #include "snippets/lowered/linear_ir.hpp" + +#if defined(__linux__) && defined(SNIPPETS_DEBUG_CAPS) #include +std::mutex err_print_lock; +#endif using namespace InferenceEngine; using namespace dnnl::impl::utils; @@ -51,10 +55,6 @@ namespace intel_cpu { namespace node { namespace { -#if defined(__linux__) && defined(SNIPPETS_DEBUG_CAPS) -std::mutex err_print_lock; -#endif - struct SnippetKey { Snippet::SnippetAttrs attrs; @@ -522,8 +522,8 @@ void Snippet::SnippetJitExecutor::segfault_detector() { if (enable_segfault_detector) { __sighandler_t signal_handler = [](int signal) { std::lock_guard guard(err_print_lock); - if (auto err_emitter = ov::intel_cpu::g_custom_segfault_handler->local()) - err_emitter->print_debug_info(); + if (auto segfault_detector_emitter = ov::intel_cpu::g_custom_segfault_handler1->local()) + print_segfault_detector_result(segfault_detector_emitter); auto tid = parallel_get_thread_num(); OPENVINO_THROW("Segfault was caught by the signal handler in subgraph node execution on thread " + std::to_string(tid)); }; diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.h b/src/plugins/intel_cpu/src/nodes/subgraph.h index 9689d348b9c7c5..b16410c26a8249 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.h +++ b/src/plugins/intel_cpu/src/nodes/subgraph.h @@ -9,6 +9,7 @@ #include #include #include "emitters/x64/jit_snippets_emitters.hpp" +#include "emitters/x64/jit_segfault_detector_emitter.hpp" #include #include "snippets/op/subgraph.hpp"