Skip to content

Commit

Permalink
[CPU][ARM]: Implemented CPU plugin just-in-time emitter for Negative …
Browse files Browse the repository at this point in the history
…operation (openvinotoolkit#28258)

### Details:
 - Added JIT emitter for Eltwise Negation operation on ARM64 SIMD
 - Implemented fp32 optimization replacing C++ Math implementation
 - Modified ARM64 executor to support new JIT emitter
 - Updated kernel files to include Negation in Eltwise operations
 - Added test coverage for JIT implementation verification
- Transitioned operation type from Math to Eltwise for better
performance

### Tickets:
 - openvinotoolkit#27500
 

![image](https://github.com/user-attachments/assets/2dd781da-94c7-4edc-abbb-e7a048d00944)
  • Loading branch information
xyz-harshal authored and MirceaDan99 committed Jan 22, 2025
1 parent c421a52 commit 2295ee2
Show file tree
Hide file tree
Showing 10 changed files with 88 additions and 2 deletions.
3 changes: 2 additions & 1 deletion src/plugins/intel_cpu/src/cpu_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,12 +197,12 @@ static const TypeToNameMap& get_type_to_name_tbl() {
{"Atanh", Type::Math},
{"Ceil", Type::Math},
{"Ceiling", Type::Eltwise},
{"Negative", Type::Eltwise},
{"Cos", Type::Math},
{"Cosh", Type::Math},
{"Floor", Type::Eltwise},
{"HardSigmoid", Type::Math},
{"If", Type::If},
{"Neg", Type::Math},
{"Reciprocal", Type::Math},
{"Selu", Type::Math},
{"Sign", Type::Math},
Expand Down Expand Up @@ -425,6 +425,7 @@ std::string algToString(const Algorithm alg) {
CASE(EltwiseFloor);
CASE(EltwiseCeiling);
CASE(EltwiseFloorMod);
CASE(EltwiseNegative);
CASE(EltwiseMod);
CASE(EltwiseMaximum);
CASE(EltwiseMinimum);
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_cpu/src/cpu_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ enum class Algorithm {
EltwiseFloor,
EltwiseCeiling,
EltwiseFloorMod,
EltwiseNegative,
EltwiseMod,
EltwiseMaximum,
EltwiseMinimum,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,46 @@ std::set<std::vector<element::Type>> jit_ceiling_emitter::get_supported_precisio
return {{element::f32}};
}

/// NEGATIVE ///
jit_negative_emitter::jit_negative_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
const std::shared_ptr<ov::Node>& node)
: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {}

jit_negative_emitter::jit_negative_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
const ov::element::Type exec_prc)
: jit_emitter(host, host_isa, exec_prc) {}

size_t jit_negative_emitter::get_inputs_count() const {
return 1;
}

void jit_negative_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs,
const std::vector<size_t>& out_vec_idxs) const {
if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) {
emit_isa<dnnl::impl::cpu::aarch64::asimd>(in_vec_idxs, out_vec_idxs);
} else {
OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel");
}
}

template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
void jit_negative_emitter::emit_isa(const std::vector<size_t>& in_vec_idxs,
const std::vector<size_t>& out_vec_idxs) const {
OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string());

using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits<isa>::TReg;
TReg src = TReg(in_vec_idxs[0]);
TReg dst = TReg(out_vec_idxs[0]);
h->fneg(dst.s, src.s);
}

std::set<std::vector<element::Type>> jit_negative_emitter::get_supported_precisions(
const std::shared_ptr<ov::Node>& node) {
return {{element::f32}};
}

/// GELU_ERF ///
jit_gelu_erf_emitter::jit_gelu_erf_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,28 @@ class jit_ceiling_emitter : public jit_emitter {
void emit_isa(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const;
};

class jit_negative_emitter : public jit_emitter {
public:
jit_negative_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
const ov::element::Type exec_prc = ov::element::f32);

jit_negative_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
const std::shared_ptr<ov::Node>& node);

size_t get_inputs_count() const override;

static std::set<std::vector<element::Type>> get_supported_precisions(
const std::shared_ptr<ov::Node>& node = nullptr);

private:
void emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const override;

template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
void emit_isa(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const;
};

class jit_gelu_erf_emitter : public jit_emitter {
public:
jit_gelu_erf_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1999,6 +1999,10 @@ jit_negative_emitter::jit_negative_emitter(x64::jit_generator* host,
const std::shared_ptr<ov::Node>& node,
ov::element::Type exec_prc)
: jit_emitter(host, host_isa, exec_prc) {}
jit_negative_emitter::jit_negative_emitter(x64::jit_generator* host,
x64::cpu_isa_t host_isa,
ov::element::Type exec_prc)
: jit_emitter(host, host_isa, exec_prc) {}

size_t jit_negative_emitter::get_inputs_num() const {
return 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,9 @@ class jit_sqrt_emitter : public jit_emitter {

class jit_negative_emitter : public jit_emitter {
public:
jit_negative_emitter(dnnl::impl::cpu::x64::jit_generator* host,
dnnl::impl::cpu::x64::cpu_isa_t host_isa,
ov::element::Type exec_prc = ov::element::f32);
jit_negative_emitter(dnnl::impl::cpu::x64::jit_generator* host,
dnnl::impl::cpu::x64::cpu_isa_t host_isa,
const std::shared_ptr<ov::Node>& n,
Expand All @@ -609,7 +612,7 @@ class jit_negative_emitter : public jit_emitter {
const std::shared_ptr<ov::Node>& node = nullptr);

private:
void emit_impl(const std::vector<size_t>& in, const std::vector<size_t>& out) const override;
void emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const override;

template <dnnl::impl::cpu::x64::cpu_isa_t isa>
void emit_isa(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const;
Expand Down
9 changes: 9 additions & 0 deletions src/plugins/intel_cpu/src/nodes/eltwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ std::set<std::vector<element::Type>> eltwise_precision_helper::get_supported_pre
OV_CASE(Algorithm::EltwiseDivide, jit_divide_emitter),
OV_CASE(Algorithm::EltwiseFloor, jit_floor_emitter),
OV_CASE(Algorithm::EltwiseCeiling, jit_ceiling_emitter),
OV_CASE(Algorithm::EltwiseNegative, jit_negative_emitter),
OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter),
OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter),
OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter),
Expand Down Expand Up @@ -664,6 +665,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
OV_CASE(Algorithm::EltwiseDivide, jit_divide_emitter),
OV_CASE(Algorithm::EltwiseFloor, jit_floor_emitter),
OV_CASE(Algorithm::EltwiseCeiling, jit_ceiling_emitter),
OV_CASE(Algorithm::EltwiseNegative, jit_negative_emitter),
OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter),
OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter),
OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter),
Expand Down Expand Up @@ -1149,6 +1151,9 @@ const std::map<const ov::DiscreteTypeInfo, Eltwise::Initializer>& Eltwise::getIn
{ov::op::v0::Ceiling::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
node.algorithm = Algorithm::EltwiseCeiling;
}},
{ov::op::v0::Negative::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
node.algorithm = Algorithm::EltwiseNegative;
}},
{ov::op::v0::Floor::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
node.algorithm = Algorithm::EltwiseFloor;
}},
Expand Down Expand Up @@ -1979,6 +1984,9 @@ class EltwiseRefExecutor : public EltwiseRefBaseExecutor<T> {
case Algorithm::EltwiseFloor:
*dst_ptr_f = floorf(src_f[0]);
break;
case Algorithm::EltwiseNegative:
*dst_ptr_f = -src_f[0];
break;
case Algorithm::EltwiseFloorMod:
*dst_ptr_f = src_f[0] - floorf(src_f[0] / src_f[1]) * src_f[1];
break;
Expand Down Expand Up @@ -2230,6 +2238,7 @@ size_t Eltwise::getOpInputsNum() const {
case Algorithm::EltwiseGeluErf:
case Algorithm::EltwiseGeluTanh:
case Algorithm::EltwiseCeiling:
case Algorithm::EltwiseNegative:
case Algorithm::EltwiseFloor:
case Algorithm::EltwiseElu:
case Algorithm::EltwiseTanh:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ bool JitEltwiseExecutor::isSupported(const Algorithm& algorithm,
Algorithm::EltwiseFloor,
Algorithm::EltwiseFloorMod,
Algorithm::EltwiseCeiling,
Algorithm::EltwiseNegative,
Algorithm::EltwiseGeluErf,
Algorithm::EltwiseGeluTanh,
Algorithm::EltwiseGreater,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ std::shared_ptr<jit_emitter> jit_uni_eltwise_generic<isa>::create_eltwise_emitte
OV_CASE(Algorithm::EltwiseFloor, ov::intel_cpu::aarch64::jit_floor_emitter),
OV_CASE(Algorithm::EltwiseFloorMod, ov::intel_cpu::aarch64::jit_floor_mod_emitter),
OV_CASE(Algorithm::EltwiseCeiling, ov::intel_cpu::aarch64::jit_ceiling_emitter),
OV_CASE(Algorithm::EltwiseNegative, ov::intel_cpu::aarch64::jit_negative_emitter),
OV_CASE(Algorithm::EltwiseHswish, ov::intel_cpu::aarch64::jit_hswish_emitter),
OV_CASE(Algorithm::EltwiseIsFinite, ov::intel_cpu::aarch64::jit_is_finite_emitter),
OV_CASE(Algorithm::EltwiseIsInf, ov::intel_cpu::aarch64::jit_is_inf_emitter),
Expand Down Expand Up @@ -851,6 +852,7 @@ std::set<std::vector<element::Type>> eltwise_precision_helper::get_supported_pre
OV_CASE(Algorithm::EltwiseFloor, jit_floor_emitter),
OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter),
OV_CASE(Algorithm::EltwiseCeiling, jit_ceiling_emitter),
OV_CASE(Algorithm::EltwiseNegative, jit_negative_emitter),
OV_CASE(Algorithm::EltwiseGeluErf, jit_gelu_erf_emitter),
OV_CASE(Algorithm::EltwiseGeluTanh, jit_gelu_tanh_emitter),
OV_CASE(Algorithm::EltwiseGreater, jit_greater_emitter),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ std::string ActivationLayerCPUTest::getPrimitiveType(const utils::ActivationType
(activation_type == utils::ActivationTypes::Exp) ||
(activation_type == utils::ActivationTypes::Floor) ||
(activation_type == utils::ActivationTypes::Ceiling) ||
(activation_type == utils::ActivationTypes::Negative) ||
(activation_type == utils::ActivationTypes::HSwish) ||
(activation_type == utils::ActivationTypes::IsInf) ||
(activation_type == utils::ActivationTypes::HardSigmoid) ||
Expand Down Expand Up @@ -210,6 +211,7 @@ std::string ActivationLayerCPUTest::getPrimitiveType(const utils::ActivationType
#endif
if ((activation_type == utils::ActivationTypes::Floor) ||
(activation_type == utils::ActivationTypes::Ceiling) ||
(activation_type == utils::ActivationTypes::Negative) ||
(activation_type == utils::ActivationTypes::IsNaN) ||
(activation_type == utils::ActivationTypes::IsFinite) ||
(activation_type == utils::ActivationTypes::RoundHalfAwayFromZero) ||
Expand Down Expand Up @@ -253,6 +255,7 @@ const std::map<utils::ActivationTypes, std::vector<std::vector<float>>>& activat
{Elu, {{0.1f}}},
{Floor, {{}}},
{Ceiling, {{}}},
{Negative, {{}}},
{Swish, {{0.1f}}},
{HSwish, {{}}},
{PReLu, {{-0.01f}}},
Expand Down

0 comments on commit 2295ee2

Please sign in to comment.