Skip to content

Commit

Permalink
separate verbose print and segfault detector, decorator jit_debug_emi…
Browse files Browse the repository at this point in the history
…tter for scalable
  • Loading branch information
chenhu-wang committed Jan 11, 2024
1 parent 8b17c19 commit 6973772
Show file tree
Hide file tree
Showing 17 changed files with 522 additions and 232 deletions.
28 changes: 20 additions & 8 deletions src/plugins/intel_cpu/src/emitters/plugin/x64/jit_emitter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@

#include <set>

#ifdef SNIPPETS_DEBUG_CAPS
#include "emitters/verbose.hpp"
#endif

namespace ov {
namespace intel_cpu {

Expand All @@ -29,6 +33,10 @@ struct emitter_params {
};

class jit_emitter : public ov::snippets::Emitter {
#ifdef SNIPPETS_DEBUG_CAPS
friend class jit_debug_emitter;
#endif

public:
jit_emitter(dnnl::impl::cpu::x64::jit_generator* host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
ov::element::Type exec_prc = ov::element::f32, emitter_in_out_map in_out_type = emitter_in_out_map::vec_to_vec)
Expand All @@ -51,6 +59,14 @@ class jit_emitter : public ov::snippets::Emitter {
*/
static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);

#ifdef SNIPPETS_DEBUG_CAPS
const char *info() const {
if (!info_.is_initialized())
info_.init(this);
return info_.c_str();
}
#endif

protected:
virtual size_t aux_gprs_count() const;

Expand Down Expand Up @@ -124,21 +140,17 @@ class jit_emitter : public ov::snippets::Emitter {
entry_map_.insert(std::make_pair(key, te));
}

void push_entries_of(const table_t &t) {
for (auto it = t.begin(); it != t.end(); it++) {
auto key = (*it).first;
auto te = (*it).second; // copy values from table
push_arg_entry_of(key, te.val, te.bcast);
}
}

void internal_call_preamble() const;
void internal_call_postamble() const;
// align stack on 16-byte as ABI reqiures
// callee is responsible to save and restore rbx. rbx must not be changed after call callee.
void internal_call_rsp_align() const;
void internal_call_rsp_restore() const;

#ifdef SNIPPETS_DEBUG_CAPS
mutable jit_emitter_info_t info_;
#endif

private:
mutable std::vector<size_t> preserved_vec_idxs;
mutable std::vector<size_t> preserved_gpr_idxs;
Expand Down
42 changes: 42 additions & 0 deletions src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,14 @@
#ifdef SNIPPETS_DEBUG_CAPS
#include "emitters/snippets/x64/jit_perf_count_chrono_emitters.hpp"
#include "emitters/snippets/x64/jit_perf_count_rdtsc_emitters.hpp"
#include "emitters/snippets/x64/jit_debug_emitter.hpp"
#include "emitters/snippets/x64/jit_segfault_detector_emitter.hpp"
#include "transformations/snippets/x64/op/perf_count_rdtsc.hpp"
#endif

namespace ov {

#ifndef SNIPPETS_DEBUG_CAPS
#define CREATE_SNIPPETS_EMITTER(e_type) { \
[this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr<snippets::Emitter> { \
return std::make_shared<e_type>(h.get(), isa, expr); \
Expand All @@ -46,6 +49,45 @@ namespace ov {
return e_type::get_supported_precisions(n); \
} \
}
#else
#define CREATE_SNIPPETS_EMITTER(e_type) { \
[this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr<snippets::Emitter> { \
auto emitter = std::make_shared<e_type>(h.get(), isa, expr); \
if (custom_segfault_detector) { \
std::string type = #e_type; \
if (type.find("jit_load_memory_emitter") != std::string::npos || \
type.find("jit_load_broadcast_emitter") != std::string::npos || \
type.find("jit_load_convert_emitter") != std::string::npos) { \
auto segfault_emitter = \
std::make_shared<jit_uni_segfault_detector_emitter>(h.get(), isa, emitter.get(), true, false, expr->get_node()->get_friendly_name()); \
return std::make_shared<jit_debug_emitter>(emitter, segfault_emitter); \
} else if (type.find("jit_store_memory_emitter") != std::string::npos || \
type.find("jit_store_convert_emitter") != std::string::npos) { \
auto segfault_emitter = \
std::make_shared<jit_uni_segfault_detector_emitter>(h.get(), isa, emitter.get(), false, true, expr->get_node()->get_friendly_name()); \
return std::make_shared<jit_debug_emitter>(emitter, segfault_emitter); \
} else if (type.find("jit_brgemm_emitter") != std::string::npos || \
type.find("jit_brgemm_copy_b_emitter") != std::string::npos || \
type.find("jit_kernel_emitter") != std::string::npos) { \
auto segfault_emitter = \
std::make_shared<jit_uni_segfault_detector_emitter>(h.get(), isa, emitter.get(), false, false, expr->get_node()->get_friendly_name()); \
return std::make_shared<jit_debug_emitter>(emitter, segfault_emitter); \
} else { \
return emitter; \
/* use below jit_debug_emitter if want enable segfault detector for all other non-typical memory access emitters */ \
/* auto segfault_emitter = */ \
/* std::make_shared<jit_uni_segfault_detector_emitter>(h.get(), isa, emitter.get(), false, false, expr->get_node()->get_friendly_name()); */ \
/* return std::make_shared<jit_debug_emitter>(emitter, segfault_emitter); */ \
} \
} else { \
return emitter; \
} \
}, \
[](const std::shared_ptr<ov::Node>& n) -> std::set<std::vector<element::Type>> { \
return e_type::get_supported_precisions(n); \
} \
}
#endif

#define CREATE_CPU_EMITTER(e_type) { \
[this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr<snippets::Emitter> { \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,6 @@ jit_brgemm_copy_b_emitter::jit_brgemm_copy_b_emitter(jit_generator* h, cpu_isa_t
const bool isAMXSupported = mayiuse(avx512_core_amx);
const auto use_amx = isAMXSupported && m_brgemm_prc_in0 != ov::element::f32 && (m_K % m_brgemmVNNIFactor == 0) && (m_N % m_brgemmVNNIFactor == 0);
init_brgemm_copy(m_kernel, leading_dimension, m_N_blk, m_N_tail, m_LDB, m_K - m_K_tail, use_amx, dt_in0, dt_in1);
#ifdef SNIPPETS_DEBUG_CAPS
DebugCapsConfig debugCaps;
if (!debugCaps.snippets_segfault_detector.empty())
segfault_detector_emitter.reset(new jit_uni_segfault_detector_emitter(h, isa, this, false, false, brgemm_repack->get_friendly_name()));
#endif
}

void jit_brgemm_copy_b_emitter::init_brgemm_copy(std::unique_ptr<matmul::jit_brgemm_matmul_copy_b_t>& kernel,
Expand Down Expand Up @@ -112,10 +107,6 @@ void jit_brgemm_copy_b_emitter::init_brgemm_copy(std::unique_ptr<matmul::jit_brg

void jit_brgemm_copy_b_emitter::emit_impl(const std::vector<size_t>& in,
const std::vector<size_t>& out) const {
#ifdef SNIPPETS_DEBUG_CAPS
if (segfault_detector_emitter != nullptr)
segfault_detector_emitter->emit_code(in, out);
#endif
if (host_isa_ == cpu::x64::avx512_core) {
Xbyak::Reg64 src(static_cast<int>(in[0]));
Xbyak::Reg64 dst(static_cast<int>(out[0]));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@

#include <cpu/x64/matmul/brgemm_matmul_copy_utils.hpp>

#ifdef SNIPPETS_DEBUG_CAPS
#include "jit_segfault_detector_emitter.hpp"
#endif

namespace ov {
namespace intel_cpu {

Expand Down Expand Up @@ -52,8 +48,7 @@ class jit_brgemm_copy_b_emitter : public jit_emitter {
size_t m_comp_offset = 0lu;

#ifdef SNIPPETS_DEBUG_CAPS
friend class jit_uni_segfault_detector_emitter;
std::shared_ptr<jit_uni_segfault_detector_emitter> segfault_detector_emitter = nullptr;
friend std::string init_info_jit_brgemm_copy_b_emitter(const jit_brgemm_copy_b_emitter *emitter);
#endif
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,6 @@ jit_brgemm_emitter::jit_brgemm_emitter(jit_generator* h, cpu_isa_t isa, const ov
m_store_offset_c = brgemm_node->get_offset_c();
if (m_with_scratch)
m_load_offset_scratch = brgemm_node->get_offset_scratch();

#ifdef SNIPPETS_DEBUG_CAPS
DebugCapsConfig debugCaps;
if (!debugCaps.snippets_segfault_detector.empty())
segfault_detector_emitter.reset(new jit_uni_segfault_detector_emitter(h, isa, this, false, false, brgemm_node->get_friendly_name()));
#endif
}

std::set<std::vector<element::Type>> jit_brgemm_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
Expand Down Expand Up @@ -184,10 +178,6 @@ void jit_brgemm_emitter::validate_arguments(const std::vector<size_t> &in, const

void jit_brgemm_emitter::emit_impl(const std::vector<size_t>& in, const std::vector<size_t>& out) const {
validate_arguments(in, out);
#ifdef SNIPPETS_DEBUG_CAPS
if (segfault_detector_emitter != nullptr)
segfault_detector_emitter->emit_code(in, out);
#endif
if (host_isa_ == cpu::x64::avx512_core) {
Xbyak::Reg64 input_0(static_cast<int>(in[0]));
Xbyak::Reg64 input_1(static_cast<int>(in[1]));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@

#include <cpu/x64/brgemm/brgemm.hpp>

#ifdef SNIPPETS_DEBUG_CAPS
#include "jit_segfault_detector_emitter.hpp"
#endif

namespace ov {
namespace intel_cpu {

Expand Down Expand Up @@ -60,8 +56,7 @@ class jit_brgemm_emitter : public jit_emitter {
std::vector<size_t> io_data_size {};

#ifdef SNIPPETS_DEBUG_CAPS
friend class jit_uni_segfault_detector_emitter;
std::shared_ptr<jit_uni_segfault_detector_emitter> segfault_detector_emitter = nullptr;
friend std::string init_info_jit_brgemm_emitter(const jit_brgemm_emitter *emitter);
#endif
};

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#ifdef SNIPPETS_DEBUG_CAPS

#include "jit_debug_emitter.hpp"
#include <vector>
#include "utils/general_utils.h"

using namespace dnnl::impl::cpu;
using namespace dnnl::impl;
using namespace Xbyak;

namespace ov {
namespace intel_cpu {

size_t jit_debug_emitter::get_inputs_num() const {
return m_target_emitter->get_inputs_num();
}

size_t jit_debug_emitter::get_max_vecs_count() const {
return m_target_emitter->get_max_vecs_count();
}

size_t jit_debug_emitter::get_vec_length() const {
return m_target_emitter->get_vec_length();
}

void jit_debug_emitter::push_vec(const Xbyak::Address &addr, size_t vec_idx) const {
m_target_emitter->push_vec(addr, vec_idx);
}

void jit_debug_emitter::pop_vec(size_t vec_idx, const Xbyak::Address &addr) const {
m_target_emitter->pop_vec(vec_idx, addr);
}

size_t jit_debug_emitter::aux_vecs_count() const {
return m_target_emitter->aux_vecs_count();
}

emitter_in_out_map jit_debug_emitter::get_in_out_type() const {
return m_target_emitter->get_in_out_type();
}

size_t jit_debug_emitter::aux_gprs_count() const {
return m_target_emitter->aux_gprs_count();
}

std::set<std::vector<element::Type>> jit_debug_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
return jit_emitter::get_supported_precisions(node);
}

void jit_debug_emitter::emitter_preamble(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) const {
m_target_emitter->emitter_preamble(in_idxs, out_idxs, pool_vec_idxs, pool_gpr_idxs);
}

void jit_debug_emitter::emitter_postamble() const {
m_target_emitter->emitter_postamble();
}

void jit_debug_emitter::emit_data() const {
m_target_emitter->emit_data();
}

void jit_debug_emitter::prepare_table() {
m_target_emitter->prepare_table();
}

void jit_debug_emitter::emit_code(const std::vector<size_t> &in_idxs, const std::vector<size_t> &out_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) const {
m_decorator_emitter->emit_code(in_idxs, out_idxs, pool_vec_idxs, pool_gpr_idxs);

m_target_emitter->emit_code(in_idxs, out_idxs, pool_vec_idxs, pool_gpr_idxs);
}

void jit_debug_emitter::internal_call_preamble() const {
m_target_emitter->internal_call_preamble();
}

void jit_debug_emitter::internal_call_postamble() const {
m_target_emitter->internal_call_postamble();
}

void jit_debug_emitter::internal_call_rsp_align() const {
m_target_emitter->internal_call_rsp_align();
}

void jit_debug_emitter::internal_call_rsp_restore() const {
m_target_emitter->internal_call_rsp_restore();
}

} // namespace intel_cpu
} // namespace ov

#endif
Loading

0 comments on commit 6973772

Please sign in to comment.