From 8ee6f407a9993db0c1d8d9347531bec013d53703 Mon Sep 17 00:00:00 2001 From: Paul Youngsoo Ahn Date: Mon, 23 Dec 2024 17:29:06 +0900 Subject: [PATCH 1/8] [GPU] Implement fake_convert (#28065) ### Details: - *implement fake_convert* - *add functional test for fake convert* ### Tickets: - *159263* --- src/core/include/openvino/op/fake_convert.hpp | 1 + src/core/src/op/fake_convert.cpp | 4 + .../intel_gpu/plugin/primitives_list.hpp | 1 + .../intel_gpu/primitives/fake_convert.hpp | 68 +++++++++ .../intel_gpu/src/graph/fake_convert.cpp | 72 +++++++++ .../src/graph/impls/cpu/fake_convert.cpp | 131 ++++++++++++++++ .../src/graph/impls/cpu/register.cpp | 1 + .../src/graph/impls/cpu/register.hpp | 1 + .../impls/registry/fake_convert_impls.cpp | 24 +++ .../src/graph/impls/registry/registry.hpp | 1 + .../src/graph/include/fake_convert_inst.h | 55 +++++++ .../intel_gpu/src/plugin/ops/fake_convert.cpp | 39 +++++ .../single_layer_tests/fake_convert.cpp | 141 ++++++++++++++++++ .../unit/module_tests/impls_registry_test.cpp | 4 +- 14 files changed, 542 insertions(+), 1 deletion(-) create mode 100644 src/plugins/intel_gpu/include/intel_gpu/primitives/fake_convert.hpp create mode 100644 src/plugins/intel_gpu/src/graph/fake_convert.cpp create mode 100644 src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp create mode 100644 src/plugins/intel_gpu/src/graph/impls/registry/fake_convert_impls.cpp create mode 100644 src/plugins/intel_gpu/src/graph/include/fake_convert_inst.h create mode 100644 src/plugins/intel_gpu/src/plugin/ops/fake_convert.cpp create mode 100644 src/plugins/intel_gpu/tests/functional/single_layer_tests/fake_convert.cpp diff --git a/src/core/include/openvino/op/fake_convert.hpp b/src/core/include/openvino/op/fake_convert.hpp index c3eaa43b98a51b..16ef7a0337c15b 100644 --- a/src/core/include/openvino/op/fake_convert.hpp +++ b/src/core/include/openvino/op/fake_convert.hpp @@ -68,6 +68,7 @@ class OPENVINO_API FakeConvert : public Op { bool has_evaluate() const override; std::string get_destination_type() const; + void set_destination_type(ov::element::Type destination_type); const ov::element::Type& get_destination_element_type() const; private: diff --git a/src/core/src/op/fake_convert.cpp b/src/core/src/op/fake_convert.cpp index 5b3c8f8d8e9938..517674402ef872 100644 --- a/src/core/src/op/fake_convert.cpp +++ b/src/core/src/op/fake_convert.cpp @@ -79,6 +79,10 @@ std::string FakeConvert::get_destination_type() const { return m_destination_type.get_type_name(); } +void FakeConvert::set_destination_type(ov::element::Type destination_type) { + m_destination_type = destination_type; +} + const ov::element::Type& FakeConvert::get_destination_element_type() const { return m_destination_type; } diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index c7524f1880157d..0950614897ab43 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -267,6 +267,7 @@ REGISTER_FACTORY(v13, ScaledDotProductAttention); REGISTER_FACTORY(v13, BitwiseAnd); REGISTER_FACTORY(v13, BitwiseOr); REGISTER_FACTORY(v13, BitwiseXor); +REGISTER_FACTORY(v13, FakeConvert); // ------------------------------ Supported v15 ops ----------------------------- // REGISTER_FACTORY(v15, ROIAlignRotated); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/fake_convert.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/fake_convert.hpp new file mode 100644 index 00000000000000..c16af0be51abda --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/fake_convert.hpp @@ -0,0 +1,68 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once +#include "primitive.hpp" +#include + +namespace cldnn { + +/// @brief FakeConvert performs element-wise quantization of input values +/// into a set of values corresponding to a target low-precision type. +struct fake_convert : public primitive_base { + CLDNN_DECLARE_PRIMITIVE(fake_convert) + + fake_convert() : primitive_base("", {}) {} + + /// @brief Constructs fake_convert primitive. + /// @param id This primitive id. + /// @param input Input primitive id. + /// @param scale Scale primitive id. + /// @param shift Shift primitive id. + /// @param destination_type The low precision type to be emulated. + fake_convert(const primitive_id& id, + const input_info& input, + const input_info& scale, + const input_info& shift, + ov::element::Type destination_type = ov::element::Type_t::f8e4m3) + : primitive_base(id, {input, scale, shift}, 1), destination_type(destination_type) {} + + /// @brief Constructs fake_convert primitive. + /// @param id This primitive id. + /// @param input Input primitive id. + /// @param scale Scale primitive id. + /// @param shift Shift primitive id. + /// @param destination_type The low precision type to be emulated. + fake_convert(const primitive_id& id, + const input_info& input, + const input_info& scale, + ov::element::Type destination_type = ov::element::Type_t::f8e4m3) + : primitive_base(id, {input, scale}, 1), destination_type(destination_type) {} + + ov::element::Type destination_type; + + size_t hash() const override { + size_t seed = primitive::hash(); + seed = hash_combine(seed, destination_type.get_type_name()); + return seed; + } + + bool operator==(const primitive& rhs) const override { + if (!compare_common_params(rhs)) + return false; + auto rhs_casted = downcast(rhs); + return (destination_type == rhs_casted.destination_type); + } + + void save(BinaryOutputBuffer& ob) const override { + primitive_base::save(ob); + ob << make_data(&destination_type, sizeof(destination_type)); + } + + void load(BinaryInputBuffer& ib) override { + primitive_base::load(ib); + ib >> make_data(&destination_type, sizeof(destination_type)); + } +}; +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/fake_convert.cpp b/src/plugins/intel_gpu/src/graph/fake_convert.cpp new file mode 100644 index 00000000000000..b201378d52cc8d --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/fake_convert.cpp @@ -0,0 +1,72 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "fake_convert_inst.h" +#include "fake_convert_shape_inference.hpp" + +#include "primitive_type_base.h" +#include "intel_gpu/runtime/error_handler.hpp" +#include "json_object.h" +#include + +namespace cldnn { +GPU_DEFINE_PRIMITIVE_TYPE_ID(fake_convert) + +layout fake_convert_inst::calc_output_layout(fake_convert_node const& node, kernel_impl_params const& impl_param) { + return calc_output_layouts(node, impl_param)[0]; +} + +template +std::vector fake_convert_inst::calc_output_layouts(fake_convert_node const& node, kernel_impl_params const& impl_param) { + const auto& input_layout = impl_param.get_input_layout(0); + auto output_type = ov::element::Type(input_layout.data_type); + + OPENVINO_ASSERT(ov::element::Type::merge(output_type, output_type, ov::element::Type(impl_param.get_input_layout(1).data_type)), + "Mixed input types are not supported."); + + if (impl_param.input_layouts.size() == 3) { + OPENVINO_ASSERT(ov::element::Type::merge(output_type, output_type, ov::element::Type(impl_param.get_input_layout(2).data_type)), + "Mixed input types are not supported."); + } + + switch (output_type) { + case ov::element::bf16: + case ov::element::f16: + case ov::element::f32: + break; + default: + OPENVINO_THROW("The output data type should be a bf16, f16, f32 but got: ", output_type); + } + + return { layout{input_layout.get_partial_shape(), output_type, input_layout.format} }; +} + +template std::vector fake_convert_inst::calc_output_layouts(fake_convert_node const& node, const kernel_impl_params& impl_param); + +std::string fake_convert_inst::to_string(fake_convert_node const& node) { + auto desc = node.get_primitive(); + auto node_info = node.desc_to_json(); + auto& input = node.input(); + auto& scale = node.scale(); + + std::stringstream primitive_description; + + json_composite fake_convert_info; + fake_convert_info.add("input id", input.id()); + fake_convert_info.add("scale id", scale.id()); + if (node.has_shift()) { + fake_convert_info.add("shift id", node.shift().id()); + } + fake_convert_info.add("destination_type", node.get_destination_type().get_type_name()); + + node_info->add("fake_convert info", fake_convert_info); + node_info->dump(primitive_description); + + return primitive_description.str(); +} + +fake_convert_inst::typed_primitive_inst(network& network, fake_convert_node const& node) + : parent(network, node) {} + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp new file mode 100644 index 00000000000000..a5f94741c40bf5 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp @@ -0,0 +1,131 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "impls/cpu/cpu_impl_helpers.hpp" +#include "register.hpp" +#include "fake_convert_inst.h" +#include "impls/registry/implementation_map.hpp" + +#include "openvino/op/fake_convert.hpp" + +namespace cldnn { +namespace cpu { + +struct fake_convert_impl : public typed_primitive_impl { + using parent = typed_primitive_impl; + using parent::parent; + + ov::element::Type destination_type; + + std::shared_ptr op; + + DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::cpu::fake_convert_impl) + + std::unique_ptr clone() const override { + return make_unique(*this); + } + + fake_convert_impl() : parent("fake_convert_cpu_impl") {} + + explicit fake_convert_impl(const fake_convert_node& outer) { + set_node_params(outer); + } + + void set_node_params(const program_node& arg) override { + OPENVINO_ASSERT(arg.is_type(), "[GPU] Incorrect program_node type"); + const auto& node = arg.as(); + destination_type = node.get_destination_type(); + } + + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + ob << make_data(&destination_type, sizeof(destination_type)); + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + ib >> make_data(&destination_type, sizeof(destination_type)); + } + + event::ptr execute_impl(const std::vector& events, fake_convert_inst& instance) override { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "fake_convert::execute_impl"); + auto& stream = instance.get_network().get_stream(); + + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); + + if (!pass_through_events) { + stream.wait_for_events(events); + } + + auto params = instance.get_impl_params(); + + ov::TensorVector input_host_tensors; + ov::TensorVector output_host_tensors; + + if (!op) { + op = std::make_shared(); + op->set_destination_type(destination_type); + } + + std::vector input_mem_ptrs; + for (size_t i = 0; i < instance.dependencies().size(); i++) + input_mem_ptrs.push_back(instance.dep_memory_ptr(i)); + + auto output_mem_ptr = instance.output_memory_ptr(); + + cldnn::mem_lock output_lock(output_mem_ptr, stream); + + for (size_t i = 0; i < input_mem_ptrs.size(); i++) + input_host_tensors.push_back(make_tensor(params->input_layouts[i], input_mem_ptrs[i]->lock(stream, mem_lock_type::read))); + + output_host_tensors.push_back(make_tensor(params->output_layouts[0], output_lock.data())); + + OPENVINO_ASSERT(op->evaluate(output_host_tensors, input_host_tensors), + "[GPU] Couldn't execute fake_convert primitive with id ", instance.id()); + + if (pass_through_events) { + return stream.group_events(events); + } + + return make_output_event(stream, instance.is_output()); + } + + void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} + + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} + +public: + static std::unique_ptr create(const fake_convert_node& arg, const kernel_impl_params& impl_param) { + return make_unique(); + } +}; + + +namespace detail { + +attach_fake_convert_impl::attach_fake_convert_impl() { + auto formats = { + format::bfyx, + format::bfzyx, + format::bfwzyx, + format::bfuwzyx, + format::bfvuwzyx, + }; + + auto types = { + data_types::f32, + data_types::f16, + data_types::bf16 + }; + + implementation_map::add(impl_types::cpu, shape_types::static_shape, fake_convert_impl::create, types, formats); + implementation_map::add(impl_types::cpu, shape_types::dynamic_shape, fake_convert_impl::create, types, formats); +} + +} // namespace detail +} // namespace cpu +} // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cpu::fake_convert_impl) +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::fake_convert) diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp index 2b0dc5b212158c..e86628444de439 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp @@ -31,6 +31,7 @@ void register_implementations() { REGISTER_CPU(tile); REGISTER_CPU(select); REGISTER_CPU(reduce); + REGISTER_CPU(fake_convert); } } // namespace cpu diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp b/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp index cb89eae29d8c56..15cc4b11c077eb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp @@ -56,6 +56,7 @@ REGISTER_CPU(broadcast); REGISTER_CPU(tile); REGISTER_CPU(select); REGISTER_CPU(reduce); +REGISTER_CPU(fake_convert); #undef REGISTER_CPU diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/fake_convert_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/fake_convert_impls.cpp new file mode 100644 index 00000000000000..991ab5aa12657a --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/registry/fake_convert_impls.cpp @@ -0,0 +1,24 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "registry.hpp" +#include "intel_gpu/primitives/fake_convert.hpp" +#include "primitive_inst.h" + +namespace ov { +namespace intel_gpu { + +using namespace cldnn; + +const std::vector>& Registry::get_implementations() { + static const std::vector> impls = { + OV_GPU_GET_INSTANCE_CPU(fake_convert, shape_types::static_shape) + OV_GPU_GET_INSTANCE_CPU(fake_convert, shape_types::dynamic_shape) + }; + + return impls; +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp b/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp index f45d0897f01363..b2778233f41e64 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp @@ -145,6 +145,7 @@ REGISTER_IMPLS(scatter_elements_update); REGISTER_IMPLS(shape_of); REGISTER_IMPLS(strided_slice); REGISTER_IMPLS(tile); +REGISTER_IMPLS(fake_convert); REGISTER_DEFAULT_IMPLS(assign, CPU_S, CPU_D); REGISTER_DEFAULT_IMPLS(read_value, CPU_S, CPU_D); diff --git a/src/plugins/intel_gpu/src/graph/include/fake_convert_inst.h b/src/plugins/intel_gpu/src/graph/include/fake_convert_inst.h new file mode 100644 index 00000000000000..d86c565a5e6b2e --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/fake_convert_inst.h @@ -0,0 +1,55 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once +#include "intel_gpu/primitives/fake_convert.hpp" +#include "primitive_inst.h" + +#include +#include + +namespace cldnn { + +template <> +struct typed_program_node : public typed_program_node_base { + using parent = typed_program_node_base; + typed_program_node(const std::shared_ptr prim, program& prog) + : parent(prim, prog), destination_type(prim->destination_type) { + support_padding_all(true); + } + +public: + using parent::parent; + + program_node& input() const { return get_dependency(0); } + program_node& scale() const { return get_dependency(1); } + program_node& shift() const { return get_dependency(2); } + bool has_shift() const { return (get_dependencies().size() == 3); } + + ov::element::Type get_destination_type() const { return destination_type; } + + std::vector get_shape_infer_dependencies() const override { return {}; } + +private: + ov::element::Type destination_type; +}; + +using fake_convert_node = typed_program_node; + +template <> +class typed_primitive_inst : public typed_primitive_inst_base { + using parent = typed_primitive_inst_base; + using parent::parent; + +public: + template + static std::vector calc_output_layouts(fake_convert_node const& /*node*/, const kernel_impl_params& impl_param); + static layout calc_output_layout(fake_convert_node const& node, kernel_impl_params const& impl_param); + static std::string to_string(fake_convert_node const& node); + + typed_primitive_inst(network& network, fake_convert_node const& node); +}; + +using fake_convert_inst = typed_primitive_inst; +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/plugin/ops/fake_convert.cpp b/src/plugins/intel_gpu/src/plugin/ops/fake_convert.cpp new file mode 100644 index 00000000000000..282a483deab189 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/ops/fake_convert.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/plugin/program_builder.hpp" +#include "intel_gpu/plugin/common_utils.hpp" + +#include "openvino/op/fake_convert.hpp" + +#include "intel_gpu/primitives/fake_convert.hpp" + +namespace ov { +namespace intel_gpu { +static void CreateFakeConvertOp(ProgramBuilder& p, const std::shared_ptr& op) { + validate_inputs_count(op, {2, 3}); + const auto inputs = p.GetInputInfo(op); + const std::string layerName = layer_type_name_ID(op); + ov::element::Type destination_type = op->get_destination_element_type(); + std::shared_ptr fake_convert_prim = nullptr; + if (inputs.size() == 2) { + fake_convert_prim = std::make_shared(layerName, + inputs[0], + inputs[1], + destination_type); + } else { + fake_convert_prim = std::make_shared(layerName, + inputs[0], + inputs[1], + inputs[2], + destination_type); + } + + p.add_primitive(*op, fake_convert_prim); +} + +REGISTER_FACTORY_IMPL(v13, FakeConvert); + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/fake_convert.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/fake_convert.cpp new file mode 100644 index 00000000000000..d1236f5c524421 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/fake_convert.cpp @@ -0,0 +1,141 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/ov_tensor_utils.hpp" +#include "common_test_utils/file_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +#include "openvino/op/parameter.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/fake_convert.hpp" + +namespace { + +namespace fp8 { +constexpr float MAX_F8E4M3 = 448.f; +constexpr float MAX_F8E5M2 = 57344.f; +} // namespace fp8 + +using namespace std; +using namespace ov; +using namespace testing; +using ov::test::InputShape; + +using FakeConvertTestParams = std::tuple< + ov::Shape, // Input shapes + ov::Shape, // Scale shape + ov::Shape, // Shift shape + ov::element::Type, // input precision + ov::element::Type, // destination type + std::string >; // device name + +class FakeConvertTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + ov::Shape input_shape; + ov::Shape scale_shape; + ov::Shape shift_shape; + ov::element::Type prec; + ov::element::Type destination_type; + std::string target_device; + + std::tie(input_shape, scale_shape, shift_shape, prec, destination_type, target_device) = obj.param; + + std::ostringstream result; + result << "IS=("; + result << ov::test::utils::vec2str(input_shape) << "_"; + result << "scale_shape=" << ov::test::utils::vec2str(scale_shape) << "_"; + result << "shift_shape=" << ov::test::utils::vec2str(shift_shape) << "_"; + result << "input_precision=" << prec << "_"; + result << "destination_type=" << destination_type << "_"; + result << "device_type=" << target_device; + return result.str(); + } + +protected: + ov::Shape input_shape, scale_shape, shift_shape; + ov::element::Type destination_type; + + void SetUp() override { + ov::element::Type prec; + std::tie(input_shape, scale_shape, shift_shape, prec, destination_type, targetDevice) = GetParam(); + const float MAX_FP8 = (destination_type == ov::element::f8e4m3) ? fp8::MAX_F8E4M3 : fp8::MAX_F8E5M2; + if (shift_shape.empty()) { + auto data = make_shared(prec, input_shape); + auto scale = op::v0::Constant::create(prec, + scale_shape, + {MAX_FP8 / (MAX_FP8 / 2.f), + 1.0f, + MAX_FP8 / (MAX_FP8 * 3.5f), + MAX_FP8 / (MAX_FP8 * 4.f)}); + + auto op = make_shared(data, scale, destination_type); + + function = make_shared(OutputVector{op}, ParameterVector{data}); + } else { + auto data = make_shared(prec, input_shape); + auto scale = op::v0::Constant::create(prec, + scale_shape, + {MAX_FP8 / (MAX_FP8 / 2.f), + 1.0f, + MAX_FP8 / (MAX_FP8 * 3.5f), + MAX_FP8 / (MAX_FP8 * 4.f)}); + auto shift = op::v0::Constant::create(prec, shift_shape, {0.f, 0.f, 0.f, 0.f}); + + auto op = make_shared(data, scale, shift, destination_type); + + function = make_shared(OutputVector{op}, ParameterVector{data}); + } + } + + void generate_inputs(const std::vector& target_shapes) override { + inputs.clear(); + const float MAX_FP8 = (destination_type == ov::element::f8e4m3) ? fp8::MAX_F8E4M3 : fp8::MAX_F8E5M2; + const auto& func_inputs = function->inputs(); + auto& data_input = func_inputs[0]; + ov::Tensor tensor = ov::Tensor(data_input.get_element_type(), target_shapes[0]); + std::vector input_data{MAX_FP8 / 4.f, + MAX_FP8 / 3.f, + MAX_FP8 / 2.f, + MAX_FP8, + MAX_FP8, + MAX_FP8, + MAX_FP8 * 1.2f, + MAX_FP8 * 2.3f, + MAX_FP8 * 3.4f, + MAX_FP8 * 2.f, + MAX_FP8 * 3.f, + MAX_FP8 * 4.f}; + auto* data_ptr = tensor.data(); + for (size_t i = 0; i < input_data.size(); i++) { + data_ptr[i] = input_data[i]; + } + inputs.insert({data_input.get_node_shared_ptr(), tensor}); + } +}; + +TEST_P(FakeConvertTest, Inference) { + run(); +} + +const std::vector input_precisions = {ov::element::f32}; + +const std::vector input_shapes = {{4, 3}}; + +const ov::Shape scale_shape = {4, 1}; +const std::vector shift_shapes = {{4, 1}, {}}; +const std::vector destination_types = {ov::element::f8e4m3, ov::element::f8e5m2}; + +INSTANTIATE_TEST_SUITE_P(Smoke_FakeConvertTest, + FakeConvertTest, + ::testing::Combine(::testing::ValuesIn(input_shapes), + ::testing::Values(scale_shape), + ::testing::ValuesIn(shift_shapes), + ::testing::ValuesIn(input_precisions), + ::testing::ValuesIn(destination_types), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + FakeConvertTest::getTestCaseName); +} // namespace diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp index a16cd20846a1c7..5dfc450e43905a 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp @@ -85,6 +85,7 @@ #include "intel_gpu/primitives/swiglu.hpp" #include "intel_gpu/primitives/tile.hpp" #include "intel_gpu/primitives/unique.hpp" +#include "intel_gpu/primitives/fake_convert.hpp" #include "primitive_inst.h" #include "test_utils.h" @@ -226,5 +227,6 @@ TEST(registry_test, no_null_impls) { cldnn::unique_count, cldnn::unique_gather, cldnn::scaled_dot_product_attention, - cldnn::rope>(); + cldnn::rope, + cldnn::fake_convert>(); } From 5fc16c8bf3a0693d6aafd1b5ce7a2bf050db1a36 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Mon, 23 Dec 2024 14:33:39 +0400 Subject: [PATCH 2/8] [PT FE][DOCS] Document conversion of PyTorch models from disk (#28175) **Details:** Document conversion of PyTorch models from disk **Ticket:** TBD --------- Signed-off-by: Kazantsev, Roman --- .../convert-model-pytorch.rst | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst b/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst index 6ac806daf0cda0..62cfdf05f2b11f 100644 --- a/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst +++ b/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst @@ -203,6 +203,52 @@ Here is an example of how to convert a model obtained with ``torch.export``: This is an experimental feature. Use it only if you know that you need to. PyTorch version 2.2 is recommended. Dynamic shapes are not supported yet. +Converting a PyTorch Model from Disk +#################################### + +PyTorch provides the capability to save models in two distinct formats: ``torch.jit.ScriptModule`` and ``torch.export.ExportedProgram``. +Both formats can be saved to disk as standalone files, enabling them to be reloaded independently of the original Python code. + +ExportedProgram Format +++++++++++++++++++++++ + +The ``ExportedProgram`` format is saved on disk using `torch.export.save() `__. +Below is an example of how to convert an ``ExportedProgram`` from disk: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: py + :force: + + import openvino as ov + ov_model = ov.convert_model('exported_program.pt2') + + .. tab-item:: CLI + :sync: cli + + .. code-block:: sh + + ovc exported_program.pt2 + +ScriptModule Format ++++++++++++++++++++ + +`torch.jit.save() `__ serializes ``ScriptModule`` object on disk. +To convert the serialized ``ScriptModule`` format, run ``convert_model`` function with ``example_input`` parameter as follows: + +.. code-block:: py + :force: + + from openvino import convert_model + import torch + + convert_model(input_model='script_module.pt', example_input=torch.rand(1, 10)) + +``example_input`` is the required parameter for the conversion because ``torch.jit.ScriptModule`` object is always saved in an untraced state on disk. + Exporting a PyTorch Model to ONNX Format ######################################## From b0ff7090a305f94d6ec86f7b60d1833d0dc87be5 Mon Sep 17 00:00:00 2001 From: Wanglei Shen Date: Mon, 23 Dec 2024 19:12:34 +0800 Subject: [PATCH 3/8] fix coverity scan issue 1568450 (#28139) ### Details: - *fix below coverity scan issue* *** CID 1568450: Concurrent data access violations (MISSING_LOCK) /openvino/src/inference/src/os/lin/lin_system_conf.cpp: 225 in ov::CPU::CPU()::[lambda() (instance 2)]::operator ()() const() 219 return -1; 220 } else if (valid_cpu_mapping_table.size() == (unsigned)_processors) { 221 return 0; 222 } else { 223 _processors = valid_cpu_mapping_table.size(); 224 _cpu_mapping_table.swap(valid_cpu_mapping_table); >>> CID 1568450: Concurrent data access violations (MISSING_LOCK) >>> Accessing "this->this->_proc_type_table" without holding lock "ov::CPU._cpu_mutex". Elsewhere, "ov::CPU._proc_type_table" is written to with "CPU._cpu_mutex" held 2 out of 3 times. 225 update_valid_processor_linux(std::move(phy_core_list), 226 _numa_nodes, 227 _cores, 228 _proc_type_table, 229 _cpu_mapping_table); 230 return 0; ### Tickets: - *CID 1568450* --- src/inference/src/os/lin/lin_system_conf.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/inference/src/os/lin/lin_system_conf.cpp b/src/inference/src/os/lin/lin_system_conf.cpp index f8bd16173b8fce..9b6247c6691814 100644 --- a/src/inference/src/os/lin/lin_system_conf.cpp +++ b/src/inference/src/os/lin/lin_system_conf.cpp @@ -188,6 +188,7 @@ CPU::CPU() { } else if (valid_cpu_mapping_table.size() == (unsigned)_processors) { return 0; } else { + std::lock_guard lock{_cpu_mutex}; _processors = valid_cpu_mapping_table.size(); _cpu_mapping_table.swap(valid_cpu_mapping_table); update_valid_processor_linux(std::move(phy_core_list), From 80115574aeebd79e2bec4050b702076c33deee23 Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Mon, 23 Dec 2024 15:40:14 +0000 Subject: [PATCH 4/8] [NPUW] Extend NPUW_DQ to work with NF4 for CW models (#28125) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- .../intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp index 5abe4b39fd44f2..0260fc9718c444 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp @@ -160,7 +160,8 @@ DQMatMulCWi::DQMatMulCWi(Context::Ref ctx) { auto qcoeff_shape = matched_node_qcoeff->output(0).get_shape(); if ((ov::element::i4 == matched_qweight->get_element_type() || - ov::element::i8 == matched_qweight->get_element_type()) && + ov::element::i8 == matched_qweight->get_element_type() || + ov::element::nf4 == matched_qweight->get_element_type()) && (ov::op::util::is_parameter(matched_node_qcoeff) || ov::op::util::is_constant(matched_node_qcoeff)) && qcoeff_shape[1] == 1 && !matched_matmul->get_transpose_a() && matched_matmul->get_transpose_b()) { auto matched_node_cvtw = node_to_output.at(qcvtw).get_node_shared_ptr(); From ae1fbbe52aa8177ae3799a49bb8066729445a6fd Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Mon, 23 Dec 2024 21:21:17 +0400 Subject: [PATCH 5/8] [GHA][JAX FE] Separate JAX layer tests to special job and have dedicated req file (#28178) **Details:** Separate JAX layer tests to special job and have dedicated req file **Ticket:** TBD --------- Signed-off-by: Kazantsev, Roman --- .github/workflows/job_jax_layer_tests.yml | 133 ++++++++++++++++++ .github/workflows/job_jax_models_tests.yml | 2 +- .github/workflows/job_python_unit_tests.yml | 8 -- .github/workflows/linux_arm64.yml | 10 ++ .github/workflows/mac.yml | 9 ++ .github/workflows/mac_arm64.yml | 9 ++ .github/workflows/ubuntu_22.yml | 10 ++ .github/workflows/ubuntu_24.yml | 10 ++ .github/workflows/windows_vs2019_release.yml | 9 ++ tests/CMakeLists.txt | 2 +- .../test_tf_UnaryOpsAllRealDomain.py | 2 +- tests/model_hub_tests/jax/requirements.txt | 10 -- tests/requirements_jax | 13 ++ tests/requirements_tensorflow | 2 +- 14 files changed, 207 insertions(+), 22 deletions(-) create mode 100644 .github/workflows/job_jax_layer_tests.yml delete mode 100644 tests/model_hub_tests/jax/requirements.txt create mode 100644 tests/requirements_jax diff --git a/.github/workflows/job_jax_layer_tests.yml b/.github/workflows/job_jax_layer_tests.yml new file mode 100644 index 00000000000000..25f171060f43be --- /dev/null +++ b/.github/workflows/job_jax_layer_tests.yml @@ -0,0 +1,133 @@ +name: JAX Layer Tests + +on: + workflow_call: + inputs: + runner: + description: 'Machine on which the tests would run' + type: string + required: true + container: + description: 'JSON to be converted to the value of the "container" configuration for the job' + type: string + required: false + default: '{"image": null}' + affected-components: + description: 'Components that are affected by changes in the commit defined by the Smart CI Action' + type: string + required: true + python-version: + description: 'Python version to setup. E.g., "3.11"' + type: string + required: true + +permissions: read-all + +env: + PIP_CACHE_PATH_LINUX: /mount/caches/pip/linux + PIP_CACHE_PATH_WIN: "C:\\mount\\caches\\pip\\win" + +jobs: + JAX_Layer_Tests: + name: JAX Layer Tests + timeout-minutes: 40 + runs-on: ${{ inputs.runner }} + container: ${{ fromJSON(inputs.container) }} + defaults: + run: + shell: ${{ contains(inputs.runner, 'win') && 'pwsh' || 'bash' }} + env: + DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input + OPENVINO_REPO: ${{ github.workspace }}/openvino + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels + LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests + steps: + - name: Download OpenVINO artifacts (tarballs) + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + pattern: openvino_[tests]* + path: ${{ env.INSTALL_DIR }} + merge-multiple: true + + - name: Download OpenVINO artifacts (wheels) + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + pattern: openvino_[wheels]* + path: ${{ env.INSTALL_WHEELS_DIR }} + merge-multiple: true + + # Needed as ${{ github.workspace }} is not working correctly when using Docker + - name: Setup Variables + if: runner.os != 'Windows' + run: | + echo "OPENVINO_REPO=$GITHUB_WORKSPACE/openvino" >> "$GITHUB_ENV" + echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" + echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" + echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/wheels" >> "$GITHUB_ENV" + echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" + + - name: Install OpenVINO dependencies (mac) + if: runner.os == 'macOS' + run: brew install pigz + + - name: Extract OpenVINO packages (Linux, macOS) + if: runner.os != 'Windows' + run: | + pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} + working-directory: ${{ env.INSTALL_DIR }} + + - name: Extract OpenVINO artifacts (Windows) + if: runner.os == 'Windows' + run: | + Expand-Archive openvino_tests.zip -DestinationPath ${{ env.INSTALL_DIR }} + working-directory: ${{ env.INSTALL_DIR }} + + - name: Fetch setup_python and install wheels actions + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + timeout-minutes: 15 + with: + sparse-checkout: | + .github/actions/setup_python/action.yml + .github/actions/install_ov_wheels/action.yml + sparse-checkout-cone-mode: false + path: 'openvino' + + - name: Setup Python ${{ inputs.python-version }} + uses: ./openvino/.github/actions/setup_python + with: + version: ${{ inputs.python-version }} + pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH_LINUX || env.PIP_CACHE_PATH_WIN }} + should-setup-pip-paths: ${{ runner.os != 'macOS' }} + self-hosted-runner: ${{ runner.os != 'macOS' }} + + - name: Install OpenVINO Python wheels + uses: ./openvino/.github/actions/install_ov_wheels + with: + wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} + wheels-to-install: 'openvino' + + - name: Install JAX Layer tests dependencies + run: | + # jax test requirements + python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_jax + + - name: JAX Layer Tests + if: ${{ fromJSON(inputs.affected-components).JAX_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196 + run: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/jax_tests ${PARALLEL} -m precommit_jax_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-jax.xml + env: + TEST_DEVICE: CPU + TEST_PRECISION: FP16 + JAX_TRACE_MODE: JAXPR + PARALLEL: ${{ runner.os == 'Windows' && ' ' || '-n logical'}} + + - name: Upload Test Results + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + if: ${{ !cancelled() }} + with: + name: test-results-python-jax-layers + path: | + ${{ env.INSTALL_TEST_DIR }}/TEST*.html + ${{ env.INSTALL_TEST_DIR }}/TEST*.xml + if-no-files-found: 'warn' diff --git a/.github/workflows/job_jax_models_tests.yml b/.github/workflows/job_jax_models_tests.yml index 07155db1016057..57eb07a83aa423 100644 --- a/.github/workflows/job_jax_models_tests.yml +++ b/.github/workflows/job_jax_models_tests.yml @@ -89,7 +89,7 @@ jobs: - name: Install JAX tests requirements for precommit run: | - python3 -m pip install -r ${MODEL_HUB_TESTS_INSTALL_DIR}/jax/requirements.txt + python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_jax - name: JAX/Flax Models Tests from Hugging Face if: ${{ inputs.model_scope == 'precommit' || inputs.model_scope == 'nightly' }} diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index b04f719c8e296f..e1532d530ff2db 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -162,14 +162,6 @@ jobs: export LD_LIBRARY_PATH=${PIP_INSTALL_PATH}/openvino/libs:$LD_LIBRARY_PATH python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/py_frontend_tests --junitxml=${INSTALL_TEST_DIR}/TEST-test_py_fontend.xml - - name: JAX Layer Tests - JAX FE - if: ${{ fromJSON(inputs.affected-components).JAX_FE.test && runner.arch != 'ARM64' && runner.os != 'macOS' }} - run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/jax_tests/ -m precommit_jax_fe --junitxml=${INSTALL_TEST_DIR}/TEST-jax_fe.xml - env: - TEST_DEVICE: CPU - TEST_PRECISION: FP16 - JAX_TRACE_MODE: JAXPR - - name: TensorFlow Lite Layer Tests - TFL FE if: fromJSON(inputs.affected-components).TFL_FE.test run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow_lite_tests/ -n logical --junitxml=${INSTALL_TEST_DIR}/TEST-tfl_fe.xml diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 66e825e5d5e126..ca1ca6e056e23d 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -202,6 +202,16 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' + JAX_Layer_Tests: + name: JAX Layer Tests + needs: [ Build, Docker, Smart_CI ] + uses: ./.github/workflows/job_jax_layer_tests.yml + with: + runner: 'aks-linux-16-cores-32gb-arm' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' + CPU_Functional_Tests: name: CPU functional tests if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 5e4335b8151c02..0fbc20cf19594b 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -356,6 +356,15 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' + JAX_Layer_Tests: + name: JAX Layer Tests + needs: [ Build, Smart_CI ] + uses: ./.github/workflows/job_jax_layer_tests.yml + with: + runner: 'macos-13' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' + CPU_Functional_Tests: name: CPU functional tests # if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index 855d76973cc2e4..b60daefa442c83 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -355,6 +355,15 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' + JAX_Layer_Tests: + name: JAX Layer Tests + needs: [ Build, Smart_CI ] + uses: ./.github/workflows/job_jax_layer_tests.yml + with: + runner: 'macos-13-xlarge' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' + CPU_Functional_Tests: name: CPU functional tests if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml index 5aed74bbb242b8..e5c7d25003de1e 100644 --- a/.github/workflows/ubuntu_22.yml +++ b/.github/workflows/ubuntu_22.yml @@ -334,6 +334,16 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' + JAX_Layer_Tests: + name: JAX Layer Tests + needs: [ Docker, Build, Smart_CI ] + uses: ./.github/workflows/job_jax_layer_tests.yml + with: + runner: 'aks-linux-4-cores-16gb' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_22_04_x64 }}", "volumes": ["/mount:/mount"]}' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' + CPU_Functional_Tests: name: CPU functional tests if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test diff --git a/.github/workflows/ubuntu_24.yml b/.github/workflows/ubuntu_24.yml index 25be095e692d35..beac15bfbda97d 100644 --- a/.github/workflows/ubuntu_24.yml +++ b/.github/workflows/ubuntu_24.yml @@ -156,6 +156,16 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.12' + JAX_Layer_Tests: + name: JAX Layer Tests + needs: [ Docker, Build, Smart_CI ] + uses: ./.github/workflows/job_jax_layer_tests.yml + with: + runner: 'aks-linux-4-cores-16gb' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_24_04_x64 }}", "volumes": ["/mount:/mount"]}' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.12' + TensorFlow_Layer_Tests: name: TensorFlow Layer Tests needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ] diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml index f1fd0be596baa2..de33f2603d7430 100644 --- a/.github/workflows/windows_vs2019_release.yml +++ b/.github/workflows/windows_vs2019_release.yml @@ -499,6 +499,15 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' + JAX_Layer_Tests: + name: JAX Layer Tests + needs: [ Build, Smart_CI ] + uses: ./.github/workflows/job_jax_layer_tests.yml + with: + runner: 'aks-win-8-cores-16gb' + affected-components: ${{ needs.smart_ci.outputs.affected_components }} + python-version: '3.11' + CXX_Unit_Tests: name: C++ unit tests needs: [ Build, Smart_CI ] diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 08b4308479ef03..de3ad80280d603 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,5 +7,5 @@ add_subdirectory(model_hub_tests) add_subdirectory(samples_tests) add_subdirectory(e2e_tests) -install(FILES requirements_pytorch requirements_tensorflow requirements_onnx +install(FILES requirements_pytorch requirements_tensorflow requirements_onnx requirements_jax DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_UnaryOpsAllRealDomain.py b/tests/layer_tests/tensorflow_tests/test_tf_UnaryOpsAllRealDomain.py index 4ff4d589cbae32..5c1037e38cfc84 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_UnaryOpsAllRealDomain.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_UnaryOpsAllRealDomain.py @@ -67,4 +67,4 @@ def test_unary_ops(self, input_shape, input_type, op_type, pytest.skip("159585: accuracy error on ARM") self._test(*self.create_unary_net(input_shape, input_type, op_type), ie_device, precision, ir_version, temp_dir=temp_dir, - use_legacy_frontend=use_legacy_frontend, custom_eps=1e-3) + use_legacy_frontend=use_legacy_frontend, custom_eps=3 * 1e-3) diff --git a/tests/model_hub_tests/jax/requirements.txt b/tests/model_hub_tests/jax/requirements.txt deleted file mode 100644 index 328084ac050ca6..00000000000000 --- a/tests/model_hub_tests/jax/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ --c ../../constraints.txt -numpy -pytest -pytest-html -transformers -requests -jax -jaxlib -flax -pillow \ No newline at end of file diff --git a/tests/requirements_jax b/tests/requirements_jax new file mode 100644 index 00000000000000..c392df4359bee3 --- /dev/null +++ b/tests/requirements_jax @@ -0,0 +1,13 @@ +numpy==1.26.4; python_version < "3.12" or platform_system == "Darwin" and platform_machine == "x86_64" +numpy==2.2.1; python_version >= "3.12" and (platform_system != "Darwin" or platform_machine != "x86_64") +pytest==7.0.1 +pytest-xdist[psutil]==3.6.1 +pytest-html==4.1.1 +jax==0.4.38; (platform_system != "Darwin" or platform_machine != "x86_64") and python_version > "3.9" +# tensorflow 2.16.2 depends on ml-dtypes~=0.3.1 and jax 0.4.35 depends on ml-dtypes>=0.4.0 +jax==0.4.33; (platform_system == "Darwin" and platform_machine == "x86_64") and python_version > "3.9" +jax==0.4.30; python_version <= "3.9" +flax==0.10.2 +transformers==4.47.1 +defusedxml +pillow diff --git a/tests/requirements_tensorflow b/tests/requirements_tensorflow index 5369b0135f7618..8e0d1141695ef9 100644 --- a/tests/requirements_tensorflow +++ b/tests/requirements_tensorflow @@ -17,7 +17,7 @@ wrapt==1.15.0; python_version >= "3.12" # tensorflow-text is not available for both Windows and ARM platforms tensorflow-text==2.18.0; python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64" tensorflow-hub==0.16.1 -jax==0.4.35; (platform_system != "Darwin" or platform_machine != "x86_64") and python_version > "3.9" +jax==0.4.38; (platform_system != "Darwin" or platform_machine != "x86_64") and python_version > "3.9" # tensorflow 2.16.2 depends on ml-dtypes~=0.3.1 and jax 0.4.35 depends on ml-dtypes>=0.4.0 jax==0.4.33; (platform_system == "Darwin" and platform_machine == "x86_64") and python_version > "3.9" jax==0.4.30; python_version <= "3.9" From 92edc910c54e8b322dc75558c3e7fed0738e9797 Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Tue, 24 Dec 2024 10:07:24 +0900 Subject: [PATCH 6/8] [GPU] Fix ConvolutionKernel_b_fs_yx_fsv16_1x1 to support input0 feature dynamic case (#28156) ### Details: - Fix ConvolutionKernel_b_fs_yx_fsv16_1x1 to support input0 feature dynamic case ### Tickets: - 146681 --- .../cl_kernels/convolution_gpu_bfyx_f16_1x1.cl | 5 ++--- .../convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp | 2 ++ .../tests/unit/test_cases/convolution_gpu_test.cpp | 9 ++++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl index 542fa69ebc241b..109fa2de9841aa 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl @@ -122,8 +122,8 @@ KERNEL(convolution_b_fs_yx_fsv16_1x1)( { #endif // SLM_DIV_FACTOR > 1 vec_t src = 0; -#if INPUT_LEFTOVERS - if ((k + 1) * FEATURE_SLICE_SIZE >= INPUT0_FEATURE_NUM) + + if (INPUT_LEFTOVERS && ((k + 1) * FEATURE_SLICE_SIZE >= INPUT0_FEATURE_NUM)) { if (k * FEATURE_SLICE_SIZE + sglid < INPUT0_FEATURE_NUM) { @@ -143,7 +143,6 @@ KERNEL(convolution_b_fs_yx_fsv16_1x1)( } } else -#endif // INPUT_LEFTOVERS { #if PADDED_INPUT #if X_BLOCK_SIZE > 1 diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp index 6fd074f8d8506d..7150d51ecf1e48 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp @@ -264,6 +264,8 @@ JitConstants ConvolutionKernel_b_fs_yx_fsv16_1x1::GetJitConstants(const convolut } if (params.inputs[0].Feature().v % tuning_data.feature_block_size != 0) { jit.AddConstant(MakeJitConstant("INPUT_LEFTOVERS", 1)); + } else { + jit.AddConstant(MakeJitConstant("INPUT_LEFTOVERS", 0)); } } else { DimensionAccessHelperJit input0_dims(params.inputs[0]); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index f0243f055c3670..13934020bfdf66 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -10820,7 +10820,14 @@ TEST_P(conv_dyn_test, convolution_gpu_fsv16_1x1_no_bias) { return outputs_ref.at("conv").get_memory(); }; - auto in_layout = layout{ov::PartialShape{ov::Dimension(), ov::Dimension(p.in_shape[1]), ov::Dimension(), ov::Dimension()}, data_types::f16, format::b_fs_yx_fsv16}; + cldnn::layout in_layout; + if (p.in_shape[2] % 2 == 0) { + // input feature is static + in_layout = layout{ov::PartialShape{ov::Dimension(), ov::Dimension(p.in_shape[1]), ov::Dimension(), ov::Dimension()}, data_types::f16, format::b_fs_yx_fsv16}; + } else { + // input feature is dynamic + in_layout = layout{ov::PartialShape{ov::Dimension(), ov::Dimension(), ov::Dimension(), ov::Dimension()}, data_types::f16, format::b_fs_yx_fsv16}; + } auto input = engine.allocate_memory({ p.in_shape, data_types::f16, format::b_fs_yx_fsv16 }); auto weights = engine.allocate_memory({p.wei_shape, data_types::f16, is_grouped ? format::bfzyx : format::bfyx}); From f62b94f0cd924ba9414b892dd270248059ff16ba Mon Sep 17 00:00:00 2001 From: Wanglei Shen Date: Tue, 24 Dec 2024 11:09:29 +0800 Subject: [PATCH 7/8] support offline CPU in Linux (#28149) ### Details: - *support offline CPU in Linux* - *Ignore SOC Ecore of MTL* - *enable Ecore of LNL* - *similar PR of [PR 27870](https://github.com/openvinotoolkit/openvino/pull/27870) which is reverted* ### Tickets: - *CVS-154222, CVS-159641* - *[issues-26889](https://github.com/openvinotoolkit/openvino/issues/26889)* --- src/inference/src/os/lin/lin_system_conf.cpp | 366 ++++++++++-------- .../cpu_map_parser/cache_parser_linux.cpp | 245 ++++++++++++ .../unit/cpu_map_parser/freq_parser_linux.cpp | 183 +++++++++ 3 files changed, 642 insertions(+), 152 deletions(-) diff --git a/src/inference/src/os/lin/lin_system_conf.cpp b/src/inference/src/os/lin/lin_system_conf.cpp index 9b6247c6691814..64da4cb0ac836a 100644 --- a/src/inference/src/os/lin/lin_system_conf.cpp +++ b/src/inference/src/os/lin/lin_system_conf.cpp @@ -23,76 +23,107 @@ CPU::CPU() { std::vector> system_info_table; std::vector node_info_table; - auto get_cache_info_linux = [&]() { + constexpr int cache_info_mode = 1; + constexpr int freq_info_mode = 2; + + auto get_info_linux = [&](int mode) { int cpu_index = 0; - int cache_index = 0; - int cache_files = 3; + int file_index = 0; + int max_files = 3; - std::vector one_info(cache_files); + std::string one_info; - while (1) { - for (int n = 0; n < cache_files; n++) { - cache_index = (n == 0) ? n : n + 1; - - std::ifstream cache_file("/sys/devices/system/cpu/cpu" + std::to_string(cpu_index) + "/cache/index" + - std::to_string(cache_index) + "/shared_cpu_list"); - if (!cache_file.is_open()) { - cache_index = -1; - break; - } - std::string cache_info; - std::getline(cache_file, cache_info); - one_info[n] = std::move(cache_info); - } + std::string::size_type pos = 0; + std::string::size_type endpos = 0; + std::string sub_str; - if (cache_index == -1) { - if (cpu_index == 0) { - return -1; - } else { - return 0; - } - } else { - system_info_table.push_back(one_info); - cpu_index++; - } + int core_1; + int core_2; + + system_info_table.clear(); + + std::ifstream possible_file("/sys/devices/system/cpu/possible"); + std::string possible_info; + + if (possible_file.is_open()) { + std::getline(possible_file, possible_info); + } else { + return -1; } - return 0; - }; + if ((endpos = possible_info.find('-', pos)) != std::string::npos) { + sub_str = possible_info.substr(pos, endpos - pos); + core_1 = std::stoi(sub_str); + sub_str = possible_info.substr(endpos + 1); + core_2 = std::stoi(sub_str); + system_info_table.resize(core_2 + 1, std::vector(max_files, "")); + } else { + return -1; + } - auto get_freq_info_linux = [&]() { - int cpu_index = 0; - int cache_index = 0; + std::ifstream online_file("/sys/devices/system/cpu/online"); + std::string online_info; - std::vector file_name = {"/topology/core_cpus_list", - "/topology/physical_package_id", - "/cpufreq/cpuinfo_max_freq"}; - int num_of_files = file_name.size(); - std::vector one_info(num_of_files); + if (online_file.is_open()) { + std::getline(online_file, online_info); + } else { + system_info_table.clear(); + return -1; + } while (1) { - for (int n = 0; n < num_of_files; n++) { - cache_index = n; + if ((endpos = online_info.find('-', pos)) != std::string::npos) { + sub_str = online_info.substr(pos, endpos - pos); + core_1 = std::stoi(sub_str); + sub_str = online_info.substr(endpos + 1); + core_2 = std::stoi(sub_str); - std::ifstream cache_file("/sys/devices/system/cpu/cpu" + std::to_string(cpu_index) + file_name[n]); - if (!cache_file.is_open()) { - cache_index = -1; - break; + for (cpu_index = core_1; cpu_index <= core_2; cpu_index++) { + if (mode == cache_info_mode) { + for (int n = 0; n < max_files; n++) { + file_index = (n == 0) ? n : n + 1; + one_info.clear(); + + std::ifstream cache_file("/sys/devices/system/cpu/cpu" + std::to_string(cpu_index) + + "/cache/index" + std::to_string(file_index) + "/shared_cpu_list"); + if (cache_file.is_open()) { + std::getline(cache_file, one_info); + } else { + if ((cpu_index == core_1) && (n == 0)) { + system_info_table.clear(); + return -1; + } + } + system_info_table[cpu_index][n] = std::move(one_info); + } + } else { + std::vector file_name = {"/topology/core_cpus_list", + "/topology/physical_package_id", + "/cpufreq/cpuinfo_max_freq"}; + + for (int n = 0; n < max_files; n++) { + one_info.clear(); + + std::ifstream cache_file("/sys/devices/system/cpu/cpu" + std::to_string(cpu_index) + + file_name[n]); + if (cache_file.is_open()) { + std::getline(cache_file, one_info); + } else { + if ((cpu_index == core_1) && (n == 2)) { + system_info_table.clear(); + return -1; + } + } + system_info_table[cpu_index][n] = std::move(one_info); + } + } } - std::string cache_info; - std::getline(cache_file, cache_info); - one_info[n] = std::move(cache_info); } - if (cache_index == -1) { - if (cpu_index == 0) { - return -1; - } else { - return 0; - } + if ((pos = online_info.find(',', endpos)) != std::string::npos) { + pos++; } else { - system_info_table.push_back(one_info); - cpu_index++; + break; } } @@ -202,7 +233,7 @@ CPU::CPU() { get_node_info_linux(); - if (!get_cache_info_linux()) { + if (!get_info_linux(cache_info_mode)) { parse_cache_info_linux(system_info_table, node_info_table, _processors, @@ -216,7 +247,7 @@ CPU::CPU() { if ((_proc_type_table.size() == 0) || ((_proc_type_table[0][MAIN_CORE_PROC] == 0) && (_proc_type_table[0][ALL_PROC] > 0) && (_proc_type_table[0][ALL_PROC] != _proc_type_table[0][EFFICIENT_CORE_PROC]))) { - if (!get_freq_info_linux()) { + if (!get_info_linux(freq_info_mode)) { parse_freq_info_linux(system_info_table, node_info_table, _processors, @@ -472,56 +503,73 @@ void parse_cache_info_linux(const std::vector> system_i const std::vector line_value_0({0, 0, 0, 0, -1, -1}); - for (int n = 0; n < _processors; n++) { - if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) { - std::string::size_type pos = 0; - std::string::size_type endpos = 0; - std::string sub_str; - - int core_1; - int core_2; + std::vector offline_list; + int info_index = 0; - if (0 == _sockets) { - _proc_type_table.push_back(line_value_0); - } else { - _proc_type_table.push_back(_proc_type_table[0]); - _proc_type_table[0] = line_value_0; - } - - while (1) { - if ((endpos = system_info_table[n][2].find('-', pos)) != std::string::npos) { - sub_str = system_info_table[n][2].substr(pos, endpos - pos); - core_1 = std::stoi(sub_str); - sub_str = system_info_table[n][2].substr(endpos + 1); - core_2 = std::stoi(sub_str); + for (int n = 0; n < _processors; n++) { + if ((system_info_table[n][2].size() > 0) || (system_info_table[n][1].size() > 0)) { + info_index = system_info_table[n][2].size() > 0 ? 2 : 1; + if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) { + std::string::size_type pos = 0; + std::string::size_type endpos = 0; + std::string sub_str; + + int core_1; + int core_2; + + if (0 == _sockets) { + _proc_type_table.push_back(line_value_0); + } else { + _proc_type_table.push_back(_proc_type_table[0]); + _proc_type_table[0] = line_value_0; + } - for (int m = core_1; m <= core_2; m++) { - _cpu_mapping_table[m][CPU_MAP_SOCKET_ID] = _sockets; - _cpu_mapping_table[m][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[m][CPU_MAP_SOCKET_ID]; - update_proc_map_info(m); + while (1) { + if ((endpos = system_info_table[n][info_index].find('-', pos)) != std::string::npos) { + sub_str = system_info_table[n][info_index].substr(pos, endpos - pos); + core_1 = std::stoi(sub_str); + sub_str = system_info_table[n][info_index].substr(endpos + 1); + core_2 = std::stoi(sub_str); + + if ((info_index == 1) && (core_2 - core_1 == 1)) { + offline_list.push_back(n); + break; + } + for (int m = core_1; m <= core_2; m++) { + _cpu_mapping_table[m][CPU_MAP_SOCKET_ID] = _sockets; + _cpu_mapping_table[m][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[m][CPU_MAP_SOCKET_ID]; + update_proc_map_info(m); + if (_processors == 0) { + return; + }; + } + } else if (pos != std::string::npos) { + sub_str = system_info_table[n][info_index].substr(pos); + core_1 = std::stoi(sub_str); + _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = _sockets; + _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = + _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; + update_proc_map_info(core_1); if (_processors == 0) { return; }; + endpos = pos; } - } else if (pos != std::string::npos) { - sub_str = system_info_table[n][2].substr(pos); - core_1 = std::stoi(sub_str); - _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = _sockets; - _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; - update_proc_map_info(core_1); - if (_processors == 0) { - return; - }; - endpos = pos; - } - if ((pos = system_info_table[n][2].find(',', endpos)) != std::string::npos) { - pos++; - } else { - break; + if ((pos = system_info_table[n][2].find(',', endpos)) != std::string::npos) { + pos++; + } else { + break; + } + } + _sockets++; + if (_proc_type_table[0][ALL_PROC] == 0) { + _proc_type_table.erase(_proc_type_table.begin()); + _sockets--; } } - _sockets++; + } else { + offline_list.push_back(n); } } @@ -541,6 +589,11 @@ void parse_cache_info_linux(const std::vector> system_i _numa_nodes = node_info_table.size(); parse_node_info_linux(node_info_table, _numa_nodes, _sockets, _proc_type_table, _cpu_mapping_table); } + + for (size_t n = 0; n < offline_list.size(); n++) { + _cpu_mapping_table.erase(_cpu_mapping_table.begin() + offline_list[n] - n); + _processors--; + } }; void get_cpu_mapping_from_cores(const int _processors, @@ -616,7 +669,6 @@ void parse_freq_info_linux(const std::vector> system_in std::vector>& _cpu_mapping_table) { int freq_max = 0; bool ecore_enabled = false; - bool ht_enabled = false; _processors = system_info_table.size(); _numa_nodes = 0; @@ -626,6 +678,8 @@ void parse_freq_info_linux(const std::vector> system_in std::vector line_value_0(PROC_TYPE_TABLE_SIZE, 0); + std::vector offline_list; + auto clean_up_output = [&]() { _processors = 0; _cores = 0; @@ -637,65 +691,68 @@ void parse_freq_info_linux(const std::vector> system_in }; for (int n = 0; n < _processors; n++) { - if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) { - std::string::size_type pos = 0; - std::string::size_type endpos1 = 0; - std::string::size_type endpos2 = 0; - std::string sub_str; - - int core_1 = 0; - int core_2 = 0; - - if (((endpos1 = system_info_table[n][0].find(',', pos)) != std::string::npos) || - ((endpos2 = system_info_table[n][0].find('-', pos)) != std::string::npos)) { - endpos1 = (endpos1 != std::string::npos) ? endpos1 : endpos2; - sub_str = system_info_table[n][0].substr(pos, endpos1 - pos); - core_1 = std::stoi(sub_str); - sub_str = system_info_table[n][0].substr(endpos1 + 1); - core_2 = std::stoi(sub_str); - if ((core_1 != n) && (core_2 != n)) { - clean_up_output(); - return; - } - - _cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1; - _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]); - _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; - _cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores; - _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = HYPER_THREADING_PROC; - _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = _cores; + if (system_info_table[n][2].size() > 0) { + if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) { + std::string::size_type pos = 0; + std::string::size_type endpos1 = 0; + std::string::size_type endpos2 = 0; + std::string sub_str; + + int core_1 = 0; + int core_2 = 0; + + if (((endpos1 = system_info_table[n][0].find(',', pos)) != std::string::npos) || + ((endpos2 = system_info_table[n][0].find('-', pos)) != std::string::npos)) { + endpos1 = (endpos1 != std::string::npos) ? endpos1 : endpos2; + sub_str = system_info_table[n][0].substr(pos, endpos1 - pos); + core_1 = std::stoi(sub_str); + sub_str = system_info_table[n][0].substr(endpos1 + 1); + core_2 = std::stoi(sub_str); + if ((core_1 != n) && (core_2 != n)) { + clean_up_output(); + return; + } - _cpu_mapping_table[core_2][CPU_MAP_PROCESSOR_ID] = core_2; - _cpu_mapping_table[core_2][CPU_MAP_SOCKET_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; - _cpu_mapping_table[core_2][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; - _cpu_mapping_table[core_2][CPU_MAP_CORE_ID] = _cpu_mapping_table[core_1][CPU_MAP_CORE_ID]; - _cpu_mapping_table[core_2][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC; - _cpu_mapping_table[core_2][CPU_MAP_GROUP_ID] = _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID]; + _cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1; + _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]); + _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; + _cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores; + _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = HYPER_THREADING_PROC; + _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = _cores; + + _cpu_mapping_table[core_2][CPU_MAP_PROCESSOR_ID] = core_2; + _cpu_mapping_table[core_2][CPU_MAP_SOCKET_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; + _cpu_mapping_table[core_2][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; + _cpu_mapping_table[core_2][CPU_MAP_CORE_ID] = _cpu_mapping_table[core_1][CPU_MAP_CORE_ID]; + _cpu_mapping_table[core_2][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC; + _cpu_mapping_table[core_2][CPU_MAP_GROUP_ID] = _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID]; + + int core_freq = std::stoi(system_info_table[core_1][2]); + freq_max = std::max(core_freq, freq_max); + } else if (system_info_table[n][0].size() > 0) { + core_1 = std::stoi(system_info_table[n][0]); - ht_enabled = true; - int core_freq = std::stoi(system_info_table[core_1][2]); - freq_max = std::max(core_freq, freq_max); - } else if (system_info_table[n][0].size() > 0) { - core_1 = std::stoi(system_info_table[n][0]); + _cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1; + _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]); + _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; + _cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores; - _cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1; - _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]); - _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]; - _cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores; + int core_freq = std::stoi(system_info_table[core_1][2]); + if ((0 == freq_max) || (core_freq >= freq_max * 0.97)) { + freq_max = std::max(core_freq, freq_max); + _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC; + } else { + _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = EFFICIENT_CORE_PROC; + ecore_enabled = true; + } - int core_freq = std::stoi(system_info_table[core_1][2]); - if (((0 == freq_max) || (core_freq >= freq_max * 0.95)) && (!ht_enabled)) { - freq_max = std::max(core_freq, freq_max); - _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC; - } else { - _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = EFFICIENT_CORE_PROC; - ecore_enabled = true; + _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = _cores; } - - _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = _cores; + _sockets = std::max(_sockets, _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]); + _cores++; } - _sockets = std::max(_sockets, _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]); - _cores++; + } else { + offline_list.push_back(n); } } @@ -734,6 +791,11 @@ void parse_freq_info_linux(const std::vector> system_in _numa_nodes = node_info_table.size(); parse_node_info_linux(node_info_table, _numa_nodes, _sockets, _proc_type_table, _cpu_mapping_table); } + + for (size_t n = 0; n < offline_list.size(); n++) { + _cpu_mapping_table.erase(_cpu_mapping_table.begin() + offline_list[n] - n); + _processors--; + } }; void update_valid_processor_linux(const std::vector phy_core_list, diff --git a/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp b/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp index 8679090b9ae491..9ea43bd0604296 100644 --- a/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp +++ b/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp @@ -385,6 +385,188 @@ LinuxCpuMapTestCase cache_1sockets_96cores = { {"0-95"}, }, }; +LinuxCpuMapTestCase cache_2sockets_56cores_hyperthreading = { + 110, + 2, + 2, + 56, + {{110, 56, 0, 54, -1, -1}, {54, 28, 0, 26, 0, 0}, {56, 28, 0, 28, 1, 1}}, + { + {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, + {2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, + {4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, + {6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1}, + {8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1}, + {11, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {12, 0, 0, 11, HYPER_THREADING_PROC, 11, -1}, + {13, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {14, 0, 0, 13, HYPER_THREADING_PROC, 13, -1}, + {15, 0, 0, 14, HYPER_THREADING_PROC, 14, -1}, {16, 0, 0, 15, HYPER_THREADING_PROC, 15, -1}, + {17, 0, 0, 16, HYPER_THREADING_PROC, 16, -1}, {18, 0, 0, 17, HYPER_THREADING_PROC, 17, -1}, + {19, 0, 0, 18, HYPER_THREADING_PROC, 18, -1}, {21, 0, 0, 19, HYPER_THREADING_PROC, 19, -1}, + {22, 0, 0, 20, HYPER_THREADING_PROC, 20, -1}, {23, 0, 0, 21, HYPER_THREADING_PROC, 21, -1}, + {24, 0, 0, 22, HYPER_THREADING_PROC, 22, -1}, {25, 0, 0, 23, HYPER_THREADING_PROC, 23, -1}, + {26, 0, 0, 24, HYPER_THREADING_PROC, 24, -1}, {27, 0, 0, 25, HYPER_THREADING_PROC, 25, -1}, + {28, 1, 1, 28, HYPER_THREADING_PROC, 28, -1}, {29, 1, 1, 29, HYPER_THREADING_PROC, 29, -1}, + {30, 1, 1, 30, HYPER_THREADING_PROC, 30, -1}, {31, 1, 1, 31, HYPER_THREADING_PROC, 31, -1}, + {32, 1, 1, 32, HYPER_THREADING_PROC, 32, -1}, {33, 1, 1, 33, HYPER_THREADING_PROC, 33, -1}, + {34, 1, 1, 34, HYPER_THREADING_PROC, 34, -1}, {35, 1, 1, 35, HYPER_THREADING_PROC, 35, -1}, + {36, 1, 1, 36, HYPER_THREADING_PROC, 36, -1}, {37, 1, 1, 37, HYPER_THREADING_PROC, 37, -1}, + {38, 1, 1, 38, HYPER_THREADING_PROC, 38, -1}, {39, 1, 1, 39, HYPER_THREADING_PROC, 39, -1}, + {40, 1, 1, 40, HYPER_THREADING_PROC, 40, -1}, {41, 1, 1, 41, HYPER_THREADING_PROC, 41, -1}, + {42, 1, 1, 42, HYPER_THREADING_PROC, 42, -1}, {43, 1, 1, 43, HYPER_THREADING_PROC, 43, -1}, + {44, 1, 1, 44, HYPER_THREADING_PROC, 44, -1}, {45, 1, 1, 45, HYPER_THREADING_PROC, 45, -1}, + {46, 1, 1, 46, HYPER_THREADING_PROC, 46, -1}, {47, 1, 1, 47, HYPER_THREADING_PROC, 47, -1}, + {48, 1, 1, 48, HYPER_THREADING_PROC, 48, -1}, {49, 1, 1, 49, HYPER_THREADING_PROC, 49, -1}, + {50, 1, 1, 50, HYPER_THREADING_PROC, 50, -1}, {51, 1, 1, 51, HYPER_THREADING_PROC, 51, -1}, + {52, 1, 1, 52, HYPER_THREADING_PROC, 52, -1}, {53, 1, 1, 53, HYPER_THREADING_PROC, 53, -1}, + {54, 1, 1, 54, HYPER_THREADING_PROC, 54, -1}, {55, 1, 1, 55, HYPER_THREADING_PROC, 55, -1}, + {56, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {57, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, + {58, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {59, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, + {60, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {61, 0, 0, 5, MAIN_CORE_PROC, 5, -1}, + {62, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {63, 0, 0, 7, MAIN_CORE_PROC, 7, -1}, + {64, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {65, 0, 0, 9, MAIN_CORE_PROC, 9, -1}, + {66, 0, 0, 26, MAIN_CORE_PROC, 26, -1}, {67, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, + {68, 0, 0, 11, MAIN_CORE_PROC, 11, -1}, {69, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, + {70, 0, 0, 13, MAIN_CORE_PROC, 13, -1}, {71, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, + {72, 0, 0, 15, MAIN_CORE_PROC, 15, -1}, {73, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, + {74, 0, 0, 17, MAIN_CORE_PROC, 17, -1}, {75, 0, 0, 18, MAIN_CORE_PROC, 18, -1}, + {76, 0, 0, 27, MAIN_CORE_PROC, 27, -1}, {77, 0, 0, 19, MAIN_CORE_PROC, 19, -1}, + {78, 0, 0, 20, MAIN_CORE_PROC, 20, -1}, {79, 0, 0, 21, MAIN_CORE_PROC, 21, -1}, + {80, 0, 0, 22, MAIN_CORE_PROC, 22, -1}, {81, 0, 0, 23, MAIN_CORE_PROC, 23, -1}, + {82, 0, 0, 24, MAIN_CORE_PROC, 24, -1}, {83, 0, 0, 25, MAIN_CORE_PROC, 25, -1}, + {84, 1, 1, 28, MAIN_CORE_PROC, 28, -1}, {85, 1, 1, 29, MAIN_CORE_PROC, 29, -1}, + {86, 1, 1, 30, MAIN_CORE_PROC, 30, -1}, {87, 1, 1, 31, MAIN_CORE_PROC, 31, -1}, + {88, 1, 1, 32, MAIN_CORE_PROC, 32, -1}, {89, 1, 1, 33, MAIN_CORE_PROC, 33, -1}, + {90, 1, 1, 34, MAIN_CORE_PROC, 34, -1}, {91, 1, 1, 35, MAIN_CORE_PROC, 35, -1}, + {92, 1, 1, 36, MAIN_CORE_PROC, 36, -1}, {93, 1, 1, 37, MAIN_CORE_PROC, 37, -1}, + {94, 1, 1, 38, MAIN_CORE_PROC, 38, -1}, {95, 1, 1, 39, MAIN_CORE_PROC, 39, -1}, + {96, 1, 1, 40, MAIN_CORE_PROC, 40, -1}, {97, 1, 1, 41, MAIN_CORE_PROC, 41, -1}, + {98, 1, 1, 42, MAIN_CORE_PROC, 42, -1}, {99, 1, 1, 43, MAIN_CORE_PROC, 43, -1}, + {100, 1, 1, 44, MAIN_CORE_PROC, 44, -1}, {101, 1, 1, 45, MAIN_CORE_PROC, 45, -1}, + {102, 1, 1, 46, MAIN_CORE_PROC, 46, -1}, {103, 1, 1, 47, MAIN_CORE_PROC, 47, -1}, + {104, 1, 1, 48, MAIN_CORE_PROC, 48, -1}, {105, 1, 1, 49, MAIN_CORE_PROC, 49, -1}, + {106, 1, 1, 50, MAIN_CORE_PROC, 50, -1}, {107, 1, 1, 51, MAIN_CORE_PROC, 51, -1}, + {108, 1, 1, 52, MAIN_CORE_PROC, 52, -1}, {109, 1, 1, 53, MAIN_CORE_PROC, 53, -1}, + {110, 1, 1, 54, MAIN_CORE_PROC, 54, -1}, {111, 1, 1, 55, MAIN_CORE_PROC, 55, -1}, + }, + { + {"0,56", "0,56", "0-9,11-19,21-27,56-83"}, + {"1,57", "1,57", "0-9,11-19,21-27,56-83"}, + {"2,58", "2,58", "0-9,11-19,21-27,56-83"}, + {"3,59", "3,59", "0-9,11-19,21-27,56-83"}, + {"4,60", "4,60", "0-9,11-19,21-27,56-83"}, + {"5,61", "5,61", "0-9,11-19,21-27,56-83"}, + {"6,62", "6,62", "0-9,11-19,21-27,56-83"}, + {"7,63", "7,63", "0-9,11-19,21-27,56-83"}, + {"8,64", "8,64", "0-9,11-19,21-27,56-83"}, + {"9,65", "9,65", "0-9,11-19,21-27,56-83"}, + {"", "", ""}, + {"11,67", "11,67", "0-9,11-19,21-27,56-83"}, + {"12,68", "12,68", "0-9,11-19,21-27,56-83"}, + {"13,69", "13,69", "0-9,11-19,21-27,56-83"}, + {"14,70", "14,70", "0-9,11-19,21-27,56-83"}, + {"15,71", "15,71", "0-9,11-19,21-27,56-83"}, + {"16,72", "16,72", "0-9,11-19,21-27,56-83"}, + {"17,73", "17,73", "0-9,11-19,21-27,56-83"}, + {"18,74", "18,74", "0-9,11-19,21-27,56-83"}, + {"19,75", "19,75", "0-9,11-19,21-27,56-83"}, + {"", "", ""}, + {"21,77", "21,77", "0-9,11-19,21-27,56-83"}, + {"22,78", "22,78", "0-9,11-19,21-27,56-83"}, + {"23,79", "23,79", "0-9,11-19,21-27,56-83"}, + {"24,80", "24,80", "0-9,11-19,21-27,56-83"}, + {"25,81", "25,81", "0-9,11-19,21-27,56-83"}, + {"26,82", "26,82", "0-9,11-19,21-27,56-83"}, + {"27,83", "27,83", "0-9,11-19,21-27,56-83"}, + {"28,84", "28,84", "28-55,84-111"}, + {"29,85", "29,85", "28-55,84-111"}, + {"30,86", "30,86", "28-55,84-111"}, + {"31,87", "31,87", "28-55,84-111"}, + {"32,88", "32,88", "28-55,84-111"}, + {"33,89", "33,89", "28-55,84-111"}, + {"34,90", "34,90", "28-55,84-111"}, + {"35,91", "35,91", "28-55,84-111"}, + {"36,92", "36,92", "28-55,84-111"}, + {"37,93", "37,93", "28-55,84-111"}, + {"38,94", "38,94", "28-55,84-111"}, + {"39,95", "39,95", "28-55,84-111"}, + {"40,96", "40,96", "28-55,84-111"}, + {"41,97", "41,97", "28-55,84-111"}, + {"42,98", "42,98", "28-55,84-111"}, + {"43,99", "43,99", "28-55,84-111"}, + {"44,100", "44,100", "28-55,84-111"}, + {"45,101", "45,101", "28-55,84-111"}, + {"46,102", "46,102", "28-55,84-111"}, + {"47,103", "47,103", "28-55,84-111"}, + {"48,104", "48,104", "28-55,84-111"}, + {"49,105", "49,105", "28-55,84-111"}, + {"50,106", "50,106", "28-55,84-111"}, + {"51,107", "51,107", "28-55,84-111"}, + {"52,108", "52,108", "28-55,84-111"}, + {"53,109", "53,109", "28-55,84-111"}, + {"54,110", "54,110", "28-55,84-111"}, + {"55,111", "55,111", "28-55,84-111"}, + {"0,56", "0,56", "0-9,11-19,21-27,56-83"}, + {"1,57", "1,57", "0-9,11-19,21-27,56-83"}, + {"2,58", "2,58", "0-9,11-19,21-27,56-83"}, + {"3,59", "3,59", "0-9,11-19,21-27,56-83"}, + {"4,60", "4,60", "0-9,11-19,21-27,56-83"}, + {"5,61", "5,61", "0-9,11-19,21-27,56-83"}, + {"6,62", "6,62", "0-9,11-19,21-27,56-83"}, + {"7,63", "7,63", "0-9,11-19,21-27,56-83"}, + {"8,64", "8,64", "0-9,11-19,21-27,56-83"}, + {"9,65", "9,65", "0-9,11-19,21-27,56-83"}, + {"66", "66", "0-9,11-19,21-27,56-83"}, + {"11,67", "11,67", "0-9,11-19,21-27,56-83"}, + {"12,68", "12,68", "0-9,11-19,21-27,56-83"}, + {"13,69", "13,69", "0-9,11-19,21-27,56-83"}, + {"14,70", "14,70", "0-9,11-19,21-27,56-83"}, + {"15,71", "15,71", "0-9,11-19,21-27,56-83"}, + {"16,72", "16,72", "0-9,11-19,21-27,56-83"}, + {"17,73", "17,73", "0-9,11-19,21-27,56-83"}, + {"18,74", "18,74", "0-9,11-19,21-27,56-83"}, + {"19,75", "19,75", "0-9,11-19,21-27,56-83"}, + {"76", "76", "0-9,11-19,21-27,56-83"}, + {"21,77", "21,77", "0-9,11-19,21-27,56-83"}, + {"22,78", "22,78", "0-9,11-19,21-27,56-83"}, + {"23,79", "23,79", "0-9,11-19,21-27,56-83"}, + {"24,80", "24,80", "0-9,11-19,21-27,56-83"}, + {"25,81", "25,81", "0-9,11-19,21-27,56-83"}, + {"26,82", "26,82", "0-9,11-19,21-27,56-83"}, + {"27,83", "27,83", "0-9,11-19,21-27,56-83"}, + {"28,84", "28,84", "28-55,84-111"}, + {"29,85", "29,85", "28-55,84-111"}, + {"30,86", "30,86", "28-55,84-111"}, + {"31,87", "31,87", "28-55,84-111"}, + {"32,88", "32,88", "28-55,84-111"}, + {"33,89", "33,89", "28-55,84-111"}, + {"34,90", "34,90", "28-55,84-111"}, + {"35,91", "35,91", "28-55,84-111"}, + {"36,92", "36,92", "28-55,84-111"}, + {"37,93", "37,93", "28-55,84-111"}, + {"38,94", "38,94", "28-55,84-111"}, + {"39,95", "39,95", "28-55,84-111"}, + {"40,96", "40,96", "28-55,84-111"}, + {"41,97", "41,97", "28-55,84-111"}, + {"42,98", "42,98", "28-55,84-111"}, + {"43,99", "43,99", "28-55,84-111"}, + {"44,100", "44,100", "28-55,84-111"}, + {"45,101", "45,101", "28-55,84-111"}, + {"46,102", "46,102", "28-55,84-111"}, + {"47,103", "47,103", "28-55,84-111"}, + {"48,104", "48,104", "28-55,84-111"}, + {"49,105", "49,105", "28-55,84-111"}, + {"50,106", "50,106", "28-55,84-111"}, + {"51,107", "51,107", "28-55,84-111"}, + {"52,108", "52,108", "28-55,84-111"}, + {"53,109", "53,109", "28-55,84-111"}, + {"54,110", "54,110", "28-55,84-111"}, + {"55,111", "55,111", "28-55,84-111"}, + }, + { + {"0-9,11-19,21-27,56-83"}, + {"28-55,84-111"}, + }, +}; LinuxCpuMapTestCase cache_2sockets_48cores_hyperthreading = { 96, 2, @@ -1005,6 +1187,36 @@ LinuxCpuMapTestCase cache_2sockets_20cores_hyperthreading_1 = { }, {}, }; +LinuxCpuMapTestCase cache_1sockets_16cores_hyperthreading = { + 20, + 1, + 1, + 14, + {{20, 6, 8, 6, 0, 0}}, + { + {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, + {2, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, {3, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, + {4, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {5, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, + {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, {7, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, + {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {9, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, + {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 0, 0, 5, MAIN_CORE_PROC, 5, -1}, + {12, 0, 0, 6, EFFICIENT_CORE_PROC, 6, -1}, {13, 0, 0, 7, EFFICIENT_CORE_PROC, 6, -1}, + {14, 0, 0, 8, EFFICIENT_CORE_PROC, 6, -1}, {15, 0, 0, 9, EFFICIENT_CORE_PROC, 6, -1}, + {16, 0, 0, 10, EFFICIENT_CORE_PROC, 7, -1}, {17, 0, 0, 11, EFFICIENT_CORE_PROC, 7, -1}, + {18, 0, 0, 12, EFFICIENT_CORE_PROC, 7, -1}, {19, 0, 0, 13, EFFICIENT_CORE_PROC, 7, -1}, + }, + { + {"0,5", "0,5", "0-19"}, {"1-2", "1-2", "0-19"}, {"1-2", "1-2", "0-19"}, {"3-4", "3-4", "0-19"}, + {"3-4", "3-4", "0-19"}, {"0,5", "0,5", "0-19"}, {"6-7", "6-7", "0-19"}, {"6-7", "6-7", "0-19"}, + {"8-9", "8-9", "0-19"}, {"8-9", "8-9", "0-19"}, {"10-11", "10-11", "0-19"}, {"10-11", "10-11", "0-19"}, + {"12", "12-15", "0-19"}, {"13", "12-15", "0-19"}, {"14", "12-15", "0-19"}, {"15", "12-15", "0-19"}, + {"16", "16-19", "0-19"}, {"17", "16-19", "0-19"}, {"18", "16-19", "0-19"}, {"19", "16-19", "0-19"}, + {"20", "20-21", ""}, {"21", "20-21", ""}, + }, + { + {"0-21"}, + }, +}; LinuxCpuMapTestCase cache_1sockets_14cores_hyperthreading = { 20, 1, @@ -1135,6 +1347,36 @@ LinuxCpuMapTestCase cache_1sockets_8cores_hyperthreading = { }, {{"0-11"}}, }; +LinuxCpuMapTestCase cache_1sockets_8cores_hyperthreading_1 = { + 8, + 1, + 1, + 8, + {{8, 4, 4, 0, 0, 0}}, + { + {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, + {1, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, + {2, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, + {3, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, + {4, 0, 0, 4, EFFICIENT_CORE_PROC, 4, -1}, + {5, 0, 0, 5, EFFICIENT_CORE_PROC, 4, -1}, + {6, 0, 0, 6, EFFICIENT_CORE_PROC, 4, -1}, + {7, 0, 0, 7, EFFICIENT_CORE_PROC, 4, -1}, + }, + { + {"0", "0", "0-3"}, + {"1", "1", "0-3"}, + {"2", "2", "0-3"}, + {"3", "3", "0-3"}, + {"4", "4-7", ""}, + {"5", "4-7", ""}, + {"6", "4-7", ""}, + {"7", "4-7", ""}, + }, + { + {"0-7"}, + }, +}; LinuxCpuMapTestCase cache_1sockets_6cores_hyperthreading = { 12, 1, @@ -1220,6 +1462,7 @@ INSTANTIATE_TEST_SUITE_P(CPUMap, LinuxCpuMapCacheParserTests, testing::Values(cache_2sockets_104cores_hyperthreading, cache_1sockets_96cores, + cache_2sockets_56cores_hyperthreading, cache_2sockets_48cores_hyperthreading, cache_2sockets_48cores_hyperthreading_1, cache_2sockets_24cores_hyperthreading, @@ -1229,10 +1472,12 @@ INSTANTIATE_TEST_SUITE_P(CPUMap, cache_2sockets_48cores_2, cache_2sockets_20cores_hyperthreading, cache_2sockets_20cores_hyperthreading_1, + cache_1sockets_16cores_hyperthreading, cache_1sockets_14cores_hyperthreading, cache_1sockets_14cores_hyperthreading_1, cache_1sockets_10cores_hyperthreading, cache_1sockets_8cores_hyperthreading, + cache_1sockets_8cores_hyperthreading_1, cache_1sockets_6cores_hyperthreading, cache_1sockets_4cores, cache_VM_cache_0)); diff --git a/src/inference/tests/unit/cpu_map_parser/freq_parser_linux.cpp b/src/inference/tests/unit/cpu_map_parser/freq_parser_linux.cpp index 04ab617961b953..8ccdfad011d19c 100644 --- a/src/inference/tests/unit/cpu_map_parser/freq_parser_linux.cpp +++ b/src/inference/tests/unit/cpu_map_parser/freq_parser_linux.cpp @@ -258,6 +258,188 @@ LinuxCpuMapTestCase freq_2sockets_112cores_hyperthreading = { }, // param[in]: The CPU frequency information table of this simulated platform {{"0-55,112-167"}, {"56-111,168-223"}}, // param[in]: The numa node information table of this simulated platform }; +LinuxCpuMapTestCase freq_2sockets_56cores_hyperthreading = { + 110, + 2, + 2, + 56, + {{110, 56, 0, 54, -1, -1}, {54, 28, 0, 26, 0, 0}, {56, 28, 0, 28, 1, 1}}, + { + {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, + {2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, + {4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, + {6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1}, + {8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1}, + {11, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {12, 0, 0, 11, HYPER_THREADING_PROC, 11, -1}, + {13, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {14, 0, 0, 13, HYPER_THREADING_PROC, 13, -1}, + {15, 0, 0, 14, HYPER_THREADING_PROC, 14, -1}, {16, 0, 0, 15, HYPER_THREADING_PROC, 15, -1}, + {17, 0, 0, 16, HYPER_THREADING_PROC, 16, -1}, {18, 0, 0, 17, HYPER_THREADING_PROC, 17, -1}, + {19, 0, 0, 18, HYPER_THREADING_PROC, 18, -1}, {21, 0, 0, 19, HYPER_THREADING_PROC, 19, -1}, + {22, 0, 0, 20, HYPER_THREADING_PROC, 20, -1}, {23, 0, 0, 21, HYPER_THREADING_PROC, 21, -1}, + {24, 0, 0, 22, HYPER_THREADING_PROC, 22, -1}, {25, 0, 0, 23, HYPER_THREADING_PROC, 23, -1}, + {26, 0, 0, 24, HYPER_THREADING_PROC, 24, -1}, {27, 0, 0, 25, HYPER_THREADING_PROC, 25, -1}, + {28, 1, 1, 26, HYPER_THREADING_PROC, 26, -1}, {29, 1, 1, 27, HYPER_THREADING_PROC, 27, -1}, + {30, 1, 1, 28, HYPER_THREADING_PROC, 28, -1}, {31, 1, 1, 29, HYPER_THREADING_PROC, 29, -1}, + {32, 1, 1, 30, HYPER_THREADING_PROC, 30, -1}, {33, 1, 1, 31, HYPER_THREADING_PROC, 31, -1}, + {34, 1, 1, 32, HYPER_THREADING_PROC, 32, -1}, {35, 1, 1, 33, HYPER_THREADING_PROC, 33, -1}, + {36, 1, 1, 34, HYPER_THREADING_PROC, 34, -1}, {37, 1, 1, 35, HYPER_THREADING_PROC, 35, -1}, + {38, 1, 1, 36, HYPER_THREADING_PROC, 36, -1}, {39, 1, 1, 37, HYPER_THREADING_PROC, 37, -1}, + {40, 1, 1, 38, HYPER_THREADING_PROC, 38, -1}, {41, 1, 1, 39, HYPER_THREADING_PROC, 39, -1}, + {42, 1, 1, 40, HYPER_THREADING_PROC, 40, -1}, {43, 1, 1, 41, HYPER_THREADING_PROC, 41, -1}, + {44, 1, 1, 42, HYPER_THREADING_PROC, 42, -1}, {45, 1, 1, 43, HYPER_THREADING_PROC, 43, -1}, + {46, 1, 1, 44, HYPER_THREADING_PROC, 44, -1}, {47, 1, 1, 45, HYPER_THREADING_PROC, 45, -1}, + {48, 1, 1, 46, HYPER_THREADING_PROC, 46, -1}, {49, 1, 1, 47, HYPER_THREADING_PROC, 47, -1}, + {50, 1, 1, 48, HYPER_THREADING_PROC, 48, -1}, {51, 1, 1, 49, HYPER_THREADING_PROC, 49, -1}, + {52, 1, 1, 50, HYPER_THREADING_PROC, 50, -1}, {53, 1, 1, 51, HYPER_THREADING_PROC, 51, -1}, + {54, 1, 1, 52, HYPER_THREADING_PROC, 52, -1}, {55, 1, 1, 53, HYPER_THREADING_PROC, 53, -1}, + {56, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {57, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, + {58, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {59, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, + {60, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {61, 0, 0, 5, MAIN_CORE_PROC, 5, -1}, + {62, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {63, 0, 0, 7, MAIN_CORE_PROC, 7, -1}, + {64, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {65, 0, 0, 9, MAIN_CORE_PROC, 9, -1}, + {66, 0, 0, 54, MAIN_CORE_PROC, 54, -1}, {67, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, + {68, 0, 0, 11, MAIN_CORE_PROC, 11, -1}, {69, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, + {70, 0, 0, 13, MAIN_CORE_PROC, 13, -1}, {71, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, + {72, 0, 0, 15, MAIN_CORE_PROC, 15, -1}, {73, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, + {74, 0, 0, 17, MAIN_CORE_PROC, 17, -1}, {75, 0, 0, 18, MAIN_CORE_PROC, 18, -1}, + {76, 0, 0, 55, MAIN_CORE_PROC, 55, -1}, {77, 0, 0, 19, MAIN_CORE_PROC, 19, -1}, + {78, 0, 0, 20, MAIN_CORE_PROC, 20, -1}, {79, 0, 0, 21, MAIN_CORE_PROC, 21, -1}, + {80, 0, 0, 22, MAIN_CORE_PROC, 22, -1}, {81, 0, 0, 23, MAIN_CORE_PROC, 23, -1}, + {82, 0, 0, 24, MAIN_CORE_PROC, 24, -1}, {83, 0, 0, 25, MAIN_CORE_PROC, 25, -1}, + {84, 1, 1, 26, MAIN_CORE_PROC, 26, -1}, {85, 1, 1, 27, MAIN_CORE_PROC, 27, -1}, + {86, 1, 1, 28, MAIN_CORE_PROC, 28, -1}, {87, 1, 1, 29, MAIN_CORE_PROC, 29, -1}, + {88, 1, 1, 30, MAIN_CORE_PROC, 30, -1}, {89, 1, 1, 31, MAIN_CORE_PROC, 31, -1}, + {90, 1, 1, 32, MAIN_CORE_PROC, 32, -1}, {91, 1, 1, 33, MAIN_CORE_PROC, 33, -1}, + {92, 1, 1, 34, MAIN_CORE_PROC, 34, -1}, {93, 1, 1, 35, MAIN_CORE_PROC, 35, -1}, + {94, 1, 1, 36, MAIN_CORE_PROC, 36, -1}, {95, 1, 1, 37, MAIN_CORE_PROC, 37, -1}, + {96, 1, 1, 38, MAIN_CORE_PROC, 38, -1}, {97, 1, 1, 39, MAIN_CORE_PROC, 39, -1}, + {98, 1, 1, 40, MAIN_CORE_PROC, 40, -1}, {99, 1, 1, 41, MAIN_CORE_PROC, 41, -1}, + {100, 1, 1, 42, MAIN_CORE_PROC, 42, -1}, {101, 1, 1, 43, MAIN_CORE_PROC, 43, -1}, + {102, 1, 1, 44, MAIN_CORE_PROC, 44, -1}, {103, 1, 1, 45, MAIN_CORE_PROC, 45, -1}, + {104, 1, 1, 46, MAIN_CORE_PROC, 46, -1}, {105, 1, 1, 47, MAIN_CORE_PROC, 47, -1}, + {106, 1, 1, 48, MAIN_CORE_PROC, 48, -1}, {107, 1, 1, 49, MAIN_CORE_PROC, 49, -1}, + {108, 1, 1, 50, MAIN_CORE_PROC, 50, -1}, {109, 1, 1, 51, MAIN_CORE_PROC, 51, -1}, + {110, 1, 1, 52, MAIN_CORE_PROC, 52, -1}, {111, 1, 1, 53, MAIN_CORE_PROC, 53, -1}, + }, + { + {"0,56", "0", "3500000"}, + {"1,57", "0", "3500000"}, + {"2,58", "0", "3500000"}, + {"3,59", "0", "3500000"}, + {"4,60", "0", "3500000"}, + {"5,61", "0", "3500000"}, + {"6,62", "0", "3500000"}, + {"7,63", "0", "3500000"}, + {"8,64", "0", "3500000"}, + {"9,65", "0", "3500000"}, + {"", "", ""}, + {"11,67", "0", "3500000"}, + {"12,68", "0", "3500000"}, + {"13,69", "0", "3500000"}, + {"14,70", "0", "3500000"}, + {"15,71", "0", "3500000"}, + {"16,72", "0", "3500000"}, + {"17,73", "0", "3500000"}, + {"18,74", "0", "3500000"}, + {"19,75", "0", "3500000"}, + {"", "", ""}, + {"21,77", "0", "3500000"}, + {"22,78", "0", "3500000"}, + {"23,79", "0", "3500000"}, + {"24,80", "0", "3500000"}, + {"25,81", "0", "3500000"}, + {"26,82", "0", "3500000"}, + {"27,83", "0", "3500000"}, + {"28,84", "1", "3500000"}, + {"29,85", "1", "3500000"}, + {"30,86", "1", "3500000"}, + {"31,87", "1", "3500000"}, + {"32,88", "1", "3500000"}, + {"33,89", "1", "3500000"}, + {"34,90", "1", "3500000"}, + {"35,91", "1", "3500000"}, + {"36,92", "1", "3500000"}, + {"37,93", "1", "3500000"}, + {"38,94", "1", "3500000"}, + {"39,95", "1", "3500000"}, + {"40,96", "1", "3500000"}, + {"41,97", "1", "3500000"}, + {"42,98", "1", "3500000"}, + {"43,99", "1", "3500000"}, + {"44,100", "1", "3500000"}, + {"45,101", "1", "3500000"}, + {"46,102", "1", "3500000"}, + {"47,103", "1", "3500000"}, + {"48,104", "1", "3500000"}, + {"49,105", "1", "3500000"}, + {"50,106", "1", "3500000"}, + {"51,107", "1", "3500000"}, + {"52,108", "1", "3500000"}, + {"53,109", "1", "3500000"}, + {"54,110", "1", "3500000"}, + {"55,111", "1", "3500000"}, + {"0,56", "0", "3500000"}, + {"1,57", "0", "3500000"}, + {"2,58", "0", "3500000"}, + {"3,59", "0", "3500000"}, + {"4,60", "0", "3500000"}, + {"5,61", "0", "3500000"}, + {"6,62", "0", "3500000"}, + {"7,63", "0", "3500000"}, + {"8,64", "0", "3500000"}, + {"9,65", "0", "3500000"}, + {"66", "0", "3500000"}, + {"11,67", "0", "3500000"}, + {"12,68", "0", "3500000"}, + {"13,69", "0", "3500000"}, + {"14,70", "0", "3500000"}, + {"15,71", "0", "3500000"}, + {"16,72", "0", "3500000"}, + {"17,73", "0", "3500000"}, + {"18,74", "0", "3500000"}, + {"19,75", "0", "3500000"}, + {"76", "0", "3500000"}, + {"21,77", "0", "3500000"}, + {"22,78", "0", "3500000"}, + {"23,79", "0", "3500000"}, + {"24,80", "0", "3500000"}, + {"25,81", "0", "3500000"}, + {"26,82", "0", "3500000"}, + {"27,83", "0", "3500000"}, + {"28,84", "1", "3500000"}, + {"29,85", "1", "3500000"}, + {"30,86", "1", "3500000"}, + {"31,87", "1", "3500000"}, + {"32,88", "1", "3500000"}, + {"33,89", "1", "3500000"}, + {"34,90", "1", "3500000"}, + {"35,91", "1", "3500000"}, + {"36,92", "1", "3500000"}, + {"37,93", "1", "3500000"}, + {"38,94", "1", "3500000"}, + {"39,95", "1", "3500000"}, + {"40,96", "1", "3500000"}, + {"41,97", "1", "3500000"}, + {"42,98", "1", "3500000"}, + {"43,99", "1", "3500000"}, + {"44,100", "1", "3500000"}, + {"45,101", "1", "3500000"}, + {"46,102", "1", "3500000"}, + {"47,103", "1", "3500000"}, + {"48,104", "1", "3500000"}, + {"49,105", "1", "3500000"}, + {"50,106", "1", "3500000"}, + {"51,107", "1", "3500000"}, + {"52,108", "1", "3500000"}, + {"53,109", "1", "3500000"}, + {"54,110", "1", "3500000"}, + {"55,111", "1", "3500000"}, + }, + { + {"0-9,11-19,21-27,56-83"}, + {"28-55,84-111"}, + }, +}; LinuxCpuMapTestCase freq_2sockets_48cores_hyperthreading = { 96, 2, @@ -987,6 +1169,7 @@ TEST_P(LinuxCpuMapFreqParserTests, LinuxFreq) {} INSTANTIATE_TEST_SUITE_P(CPUMap, LinuxCpuMapFreqParserTests, testing::Values(freq_2sockets_112cores_hyperthreading, + freq_2sockets_56cores_hyperthreading, freq_2sockets_48cores_hyperthreading, freq_2sockets_48cores_hyperthreading_1, freq_2sockets_24cores_hyperthreading, From b4c81e0f39e9cd30752879e4db32487dadba7db0 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 24 Dec 2024 09:02:27 +0400 Subject: [PATCH 8/8] [TRANSFORMATIONS][GPU] SDPA Fusion passes (#28042) ### Details: - Added basic SDPA fusion pass and QK scaling fusion into SDPA T5 case --------- Signed-off-by: Vladimir Paramuzov --- .../common_optimizations/sdpa_fusion.hpp | 60 +++++ .../sdpa_scale_fusion.hpp | 58 +++++ .../moc_transformations.cpp | 2 + .../common_optimizations/sdpa_fusion.cpp | 127 ++++++++++ .../sdpa_scale_fusion.cpp | 140 +++++++++++ .../common_optimizations/sdpa_fusion_test.cpp | 234 ++++++++++++++++++ .../sdpa_scale_fusion_test.cpp | 228 +++++++++++++++++ .../transformation_pipeline.cpp | 2 + .../src/plugin/transformations_pipeline.cpp | 2 + 9 files changed, 853 insertions(+) create mode 100644 src/common/transformations/include/transformations/common_optimizations/sdpa_fusion.hpp create mode 100644 src/common/transformations/include/transformations/common_optimizations/sdpa_scale_fusion.hpp create mode 100644 src/common/transformations/src/transformations/common_optimizations/sdpa_fusion.cpp create mode 100644 src/common/transformations/src/transformations/common_optimizations/sdpa_scale_fusion.cpp create mode 100644 src/common/transformations/tests/common_optimizations/sdpa_fusion_test.cpp create mode 100644 src/common/transformations/tests/common_optimizations/sdpa_scale_fusion_test.cpp diff --git a/src/common/transformations/include/transformations/common_optimizations/sdpa_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/sdpa_fusion.hpp new file mode 100644 index 00000000000000..84383b777604ea --- /dev/null +++ b/src/common/transformations/include/transformations/common_optimizations/sdpa_fusion.hpp @@ -0,0 +1,60 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +/// This pass transforms the following sub-graph to a single Scaled Dot Product Attention operation. +/// Before: +/// ┌───────┐ ┌───────┐ ┌───────┐ +/// │ Q │ │ K │ │ V │ +/// └───┬───┘ └───┬───┘ └───┬───┘ +/// │ │ │ +/// │ │ │ +/// ┌───┴───┐ ┌─────┴──────┐ │ +/// │ MatMul│<──│ Transpose │ │ +/// └───┬───┘ | (Optional) │ │ +/// │ └────────────┘ │ +/// ┌───┴───┐ ┌─────────────┐ │ +/// │ Add │<───│AttentionMask│ │ +/// └───┬───┘ | (Optional) │ │ +/// │ └─────────────┘ │ +/// ┌───┴───┐ │ +/// │Softmax│ │ +/// └───┬───┘ │ +/// │ │ +/// ┌───┴───┐ │ +/// │ MatMul│<─────────────────────┘ +/// └───┬───┘ +/// ┌───┴───┐ +/// │ Output│ +/// └───────┘ +/// +/// After: +/// ┌───────┐ ┌───────┐ ┌───────┐ ┌─────────────┐ +/// │ Q │ │ K │ │ V │ │AttentionMask│ +/// └───┬───┘ └───┬───┘ └───┬───┘ └──────┬──────┘ +/// │ │ │ │ +/// │ │ │ │ +/// ┌───┴────────────┴────────────┴───────────────┴─┐ +/// │ ScaledDotProductAttention │ +/// └────────────────────┬──────────────────────────┘ +/// │ +/// │ +/// ┌────┴────┐ +/// │ Output │ +/// └─────────┘ +class TRANSFORMATIONS_API SDPAFusion : public ov::pass::MatcherPass { +public: + OPENVINO_MATCHER_PASS_RTTI("SDPAFusion", "0"); + SDPAFusion(); +}; + +} // namespace pass +} // namespace ov diff --git a/src/common/transformations/include/transformations/common_optimizations/sdpa_scale_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/sdpa_scale_fusion.hpp new file mode 100644 index 00000000000000..cae0363e785f4e --- /dev/null +++ b/src/common/transformations/include/transformations/common_optimizations/sdpa_scale_fusion.hpp @@ -0,0 +1,58 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +/// Merges explicit multiplication by scalar value for Q and K into scale attribute of SDPA op +/// Before: +/// ┌───────┐ ┌───────┐ ┌───────┐ ┌─────────────┐ ┌─────────────┐ +/// │ Q │ │ K │ │ V │ │AttentionMask│ │ Scale | +/// └───┬───┘ └───┬───┘ └───┬───┘ │ (Optional) │ │ (Optional) │ +/// │ │ │ └──────┬──────┘ └───────┬─────┘ +/// │ │ │ │ | +/// ┌───┴───┐ ┌───┴───┐ │ │ | +/// │ Mul | │ Mul │ | │ | +/// └───┬───┘ └───┬───┘ │ │ │ +/// │ │ │ │ │ +/// | │ │ │ │ +/// ┌───┴────────────┴────────────┴─────────────┴─┐ | +/// │ ScaledDotProductAttention │──────────────────┘ +/// └────────────────────┬────────────────────────┘ +/// │ +/// │ +/// ┌────┴────┐ +/// │ Output │ +/// └─────────┘ +/// After: +/// ┌───────┐ ┌───────┐ ┌───────┐ ┌─────────────┐ ┌───────┐ +/// │ Q │ │ K │ │ V │ │AttentionMask│ │ Scale | +/// └───┬───┘ └───┬───┘ └───┬───┘ └──────┬──────┘ └───┬───┘ +/// │ │ │ │ | +/// │ │ │ │ | +/// | │ │ │ | +/// ┌───┴────────────┴────────────┴─────────────┴─┐ | +/// │ ScaledDotProductAttention │───────────┘ +/// └────────────────────┬────────────────────────┘ +/// │ +/// │ +/// ┌────┴────┐ +/// │ Output │ +/// └─────────┘ +/// Multiply ops for Q and K are eliminated in the following cases: +/// 1. Q_scale and K_scale are constant +/// 2. Q_scale * SDPA_Scale == 1 or K_scale * SDPA_Scale == 1 +class TRANSFORMATIONS_API SDPAScaleFusion : public ov::pass::MatcherPass { +public: + OPENVINO_MATCHER_PASS_RTTI("SDPAScaleFusion", "0"); + SDPAScaleFusion(); +}; + +} // namespace pass +} // namespace ov diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp index 185ae84ec83642..23fbf882024bdc 100644 --- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp @@ -65,6 +65,7 @@ #include "transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp" #include "transformations/common_optimizations/reshape_sequence_fusion.hpp" #include "transformations/common_optimizations/ric_fusion.hpp" +#include "transformations/common_optimizations/sdpa_fusion.hpp" #include "transformations/common_optimizations/select_with_one_value_condition.hpp" #include "transformations/common_optimizations/sequence_fusion.hpp" #include "transformations/common_optimizations/shared_ops_optimization.hpp" @@ -229,6 +230,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr ADD_MATCHER(common_fusions, ConvertTensorIteratorToSequence) ADD_MATCHER(common_fusions, SplitConcatPairToInterpolateFusion, m_use_shapes) ADD_MATCHER(common_fusions, ConvolutionToGroupConvolutionFusion) + ADD_MATCHER(common_fusions, SDPAFusion) if (m_use_shapes) { ADD_MATCHER(common_fusions, NearestNeighborUpsamplingFusion) } diff --git a/src/common/transformations/src/transformations/common_optimizations/sdpa_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/sdpa_fusion.cpp new file mode 100644 index 00000000000000..fc581580f70001 --- /dev/null +++ b/src/common/transformations/src/transformations/common_optimizations/sdpa_fusion.cpp @@ -0,0 +1,127 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/sdpa_fusion.hpp" + +#include "openvino/core/rt_info.hpp" +#include "openvino/core/type.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/scaled_dot_product_attention.hpp" +#include "openvino/op/softmax.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/unsqueeze.hpp" +#include "openvino/pass/pattern/op/optional.hpp" +#include "openvino/pass/pattern/op/pattern.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/gen_pattern.hpp" + +namespace ov { +namespace pass { + +SDPAFusion::SDPAFusion() { + using namespace ov::pass::pattern; + using namespace ov::gen_pattern; + + auto q = makePattern(ov::Rank(4)); + auto k = makePattern(ov::Rank(4)); + auto v = makePattern(ov::Rank(4)); + auto mask = makePattern(); + + auto k_transpose_order = pattern::wrap_type([](const Output& node) { + auto axis_order = + std::dynamic_pointer_cast(node.get_node_shared_ptr())->cast_vector(); + return axis_order == std::vector{0, 1, 3, 2}; + }); + + auto k_t = pattern::wrap_type({k, k_transpose_order}); + auto qk_nn = makePattern({q, k_t}, {{"transpose_a", false}, {"transpose_b", false}}); + auto qk_nt = makePattern({q, k}, {{"transpose_a", false}, {"transpose_b", true}}); + auto qk = qk_nt | qk_nn; + auto optional_add_mask = optional({qk, mask}); + auto softmax = makePattern({optional_add_mask}, {{"axis", "-1"}}); + auto qkv = makePattern({softmax, v}, {{"transpose_a", false}, {"transpose_b", false}}); + + auto valid_qk_shapes = [](const std::shared_ptr& qk_matmul) { + auto q_pshape = qk_matmul->get_input_partial_shape(0); + auto k_pshape = qk_matmul->get_input_partial_shape(1); + + const size_t q_head_size_idx = 3; + const size_t k_head_size_idx = qk_matmul->get_transpose_b() ? 3 : 2; + + return q_pshape.size() == 4 && k_pshape.size() == 4 && q_pshape[q_head_size_idx].is_static() && + k_pshape[k_head_size_idx].is_static() && + q_pshape[q_head_size_idx].get_length() == k_pshape[k_head_size_idx].get_length(); + }; + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + if (transformation_callback(m.get_match_root())) { + return false; + } + + auto q_node = pattern_map.at(q); + auto k_node = pattern_map.at(k); + auto v_node = pattern_map.at(v); + + if (!valid_qk_shapes(ov::as_type_ptr(pattern_map.at(qk).get_node_shared_ptr()))) { + return false; + } + + if (pattern_map.at(qk).get_target_inputs().size() > 1 || + pattern_map.at(softmax).get_target_inputs().size() > 1) { + return false; + } + if (pattern_map.count(optional_add_mask) && (pattern_map.at(optional_add_mask).get_target_inputs().size() > 1 || + pattern_map.at(mask).get_partial_shape().size() > 4)) { + return false; + } + + Output mask_value; + Output mask_input; + if (pattern_map.find(optional_add_mask) != pattern_map.end()) { + mask_value = pattern_map.at(mask); + } else { + mask_value = ov::op::v0::Constant::create(q_node.get_element_type(), ov::Shape{}, std::vector{0}); + } + + if (mask_value.get_partial_shape().size() > 4) { + return false; + } + + if (mask_value.get_partial_shape().rank() == 0 || mask_value.get_partial_shape().rank() == 4) { + mask_input = mask_value; + } else { + size_t rank_diff = q_node.get_partial_shape().size() - mask_value.get_partial_shape().size(); + std::vector axes(rank_diff); + std::iota(axes.begin(), axes.end(), 0); + mask_input = std::make_shared( + mask_value, + ov::op::v0::Constant::create(ov::element::i64, ov::Shape{rank_diff}, axes)); + } + + std::shared_ptr scale_node = + ov::op::v0::Constant::create(q_node.get_element_type(), ov::Shape{}, std::vector{1.0f}); + + std::shared_ptr sdpa = std::make_shared(q_node, + k_node, + v_node, + mask_input, + scale_node, + false); + + sdpa->set_friendly_name(m.get_match_root()->get_friendly_name()); + ov::copy_runtime_info(m.get_matched_nodes(), sdpa); + ov::replace_node(m.get_match_root(), sdpa); + + return true; + }; + + auto m = std::make_shared(qkv, "SDPAFusion"); + this->register_matcher(m, callback); +} + +} // namespace pass +} // namespace ov diff --git a/src/common/transformations/src/transformations/common_optimizations/sdpa_scale_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/sdpa_scale_fusion.cpp new file mode 100644 index 00000000000000..3d750fe38a868e --- /dev/null +++ b/src/common/transformations/src/transformations/common_optimizations/sdpa_scale_fusion.cpp @@ -0,0 +1,140 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/sdpa_scale_fusion.hpp" + +#include + +#include "openvino/core/node.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/core/type.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/scaled_dot_product_attention.hpp" +#include "openvino/pass/pattern/op/optional.hpp" +#include "openvino/pass/pattern/op/pattern.hpp" +#include "transformations/utils/gen_pattern.hpp" + +namespace ov { +namespace pass { + +SDPAScaleFusion::SDPAScaleFusion() { + using namespace ov::pass::pattern; + using namespace ov::gen_pattern; + + auto q = makePattern(ov::Rank(4)); + auto k = makePattern(ov::Rank(4)); + auto v = makePattern(ov::Rank(4)); + auto mask = makePattern(); + auto sdpa_scale = makeConst({}); + auto scale_q = makePattern("[]") | makePattern("[1]"); + auto scale_k = makePattern("[]") | makePattern("[1]"); + + auto scaled_q = optional({q, scale_q}); + auto scaled_k = optional({k, scale_k}); + auto sdpa_mask_scale = + makePattern({scaled_q, scaled_k, v, mask, sdpa_scale}, + {{"causal", false}}); + auto sdpa_mask = + makePattern({scaled_q, scaled_k, v, mask}, {{"causal", false}}); + auto sdpa_simple = + makePattern({scaled_q, scaled_k, v}, {{"causal", false}}); + auto sdpa = sdpa_simple | sdpa_mask | sdpa_mask_scale; + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + if (transformation_callback(m.get_match_root())) { + return false; + } + + auto sdpa = m.get_match_root(); + + const bool has_q_scale = pattern_map.count(scaled_q); + const bool has_k_scale = pattern_map.count(scaled_k); + + // Nothing to do + if (!has_q_scale && !has_k_scale) + return false; + + auto prev_scale_value = 1.0f; + auto scale_q_value = 1.0f; + auto scale_k_value = 1.0f; + auto scale_et = sdpa->get_output_element_type(0); + + Output q_input = sdpa->get_input_source_output(0); + Output k_input = sdpa->get_input_source_output(1); + + std::shared_ptr scale_q_node = nullptr; + std::shared_ptr scale_k_node = nullptr; + + if (pattern_map.find(sdpa_scale) != pattern_map.end()) { + auto prev_scale_node = + ov::as_type_ptr(pattern_map.at(sdpa_scale).get_node_shared_ptr()); + prev_scale_value = prev_scale_node->cast_vector()[0]; + scale_et = prev_scale_node->get_output_element_type(0); + } else { + auto head_size = q_input.get_partial_shape()[3]; + if (head_size.is_dynamic()) + return false; + + prev_scale_value = 1.0f / std::sqrt(static_cast(head_size.get_length())); + } + + // Extract scalar scale values for Q and K if those are constant and set new inputs for SDPA + if (has_q_scale) { + scale_q_node = pattern_map.at(scale_q).get_node_shared_ptr(); + if (ov::is_type(scale_q_node)) { + scale_q_value = ov::as_type_ptr(scale_q_node)->cast_vector()[0]; + q_input = pattern_map.at(q); + } + } + if (has_k_scale) { + scale_k_node = pattern_map.at(scale_k).get_node_shared_ptr(); + if (ov::is_type(scale_k_node)) { + scale_k_value = ov::as_type_ptr(scale_k_node)->cast_vector()[0]; + k_input = pattern_map.at(k); + } + } + + Output new_scale_node; + auto new_scale_val = prev_scale_value * scale_q_value * scale_k_value; + + // If new scale is 1 and we have non-constant scale node for either Q or K, then we can make it a scale of SDPA + if (new_scale_val == 1.0f) { + if (has_q_scale && !ov::is_type(scale_q_node)) { + new_scale_node = pattern_map.at(scale_q); + q_input = pattern_map.at(q); + } else if (has_k_scale && !ov::is_type(scale_k_node)) { + new_scale_node = pattern_map.at(scale_k); + k_input = pattern_map.at(k); + } else { + new_scale_node = ov::op::v0::Constant::create(scale_et, ov::Shape{}, std::vector{new_scale_val}); + } + } else { + new_scale_node = ov::op::v0::Constant::create(scale_et, ov::Shape{}, std::vector{new_scale_val}); + } + + OutputVector new_inputs = {q_input, k_input, pattern_map.at(v)}; + if (pattern_map.find(mask) != pattern_map.end()) { + new_inputs.push_back(pattern_map.at(mask)); + } else { + new_inputs.push_back( + ov::op::v0::Constant::create(new_scale_node.get_element_type(), ov::Shape{}, std::vector{0.0f})); + } + + new_inputs.push_back(new_scale_node); + + auto new_sdpa = sdpa->clone_with_new_inputs(new_inputs); + new_sdpa->set_friendly_name(sdpa->get_friendly_name()); + ov::copy_runtime_info(sdpa, new_sdpa); + ov::replace_node(sdpa, new_sdpa); + + return true; + }; + + auto m = std::make_shared(sdpa, "SDPAScaleFusion"); + this->register_matcher(m, callback); +} + +} // namespace pass +} // namespace ov diff --git a/src/common/transformations/tests/common_optimizations/sdpa_fusion_test.cpp b/src/common/transformations/tests/common_optimizations/sdpa_fusion_test.cpp new file mode 100644 index 00000000000000..52c10ba5967bd8 --- /dev/null +++ b/src/common/transformations/tests/common_optimizations/sdpa_fusion_test.cpp @@ -0,0 +1,234 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/softmax.hpp" +#include "openvino/op/transpose.hpp" + +using namespace testing; +using namespace ov::pass; +using namespace ov; + +TEST_F(TransformationTestsF, SDPAFusionTest1) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f32, query_shape); + const auto key = std::make_shared(element::f32, key_shape); + const auto value = std::make_shared(element::f32, value_shape); + const auto casual = false; + { + const auto qk = std::make_shared(query, key, false, true); + const auto softmax = std::make_shared(qk, -1); + const auto qkv = std::make_shared(softmax, value, false, false); + + model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value}); + manager.register_pass(); + } + + { + const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{1.0f}); + const auto mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{0.0f}); + const auto sdpa = std::make_shared(query, + key, + value, + mask_const, + scale_const, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAFusionTest2) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f16, query_shape); + const auto key = std::make_shared(element::f16, key_shape); + const auto value = std::make_shared(element::f16, value_shape); + const auto casual = false; + { + const auto qk = std::make_shared(query, key, false, true); + const auto softmax = std::make_shared(qk, -1); + const auto qkv = std::make_shared(softmax, value, false, false); + + model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value}); + manager.register_pass(); + } + + { + const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{1.0f}); + const auto mask_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{0.0f}); + const auto sdpa = std::make_shared(query, + key, + value, + mask_const, + scale_const, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAFusionTest3) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f16, query_shape); + const auto key = std::make_shared(element::f16, key_shape); + const auto value = std::make_shared(element::f16, value_shape); + const auto casual = false; + { + const auto key_t = + std::make_shared(key, + op::v0::Constant::create(element::i64, Shape{4}, {0, 1, 3, 2})); + const auto qk = std::make_shared(query, key_t, false, false); + const auto softmax = std::make_shared(qk, -1); + const auto qkv = std::make_shared(softmax, value, false, false); + + model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value}); + manager.register_pass(); + } + + { + const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{1.0f}); + const auto mask_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{0.0f}); + const auto sdpa = std::make_shared(query, + key, + value, + mask_const, + scale_const, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAFusionTest4) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, 32, -1}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f16, query_shape); + const auto key = std::make_shared(element::f16, key_shape); + const auto value = std::make_shared(element::f16, value_shape); + { + const auto qk = std::make_shared(query, key, false, false); + const auto softmax = std::make_shared(qk, -1); + const auto qkv = std::make_shared(softmax, value, false, false); + + model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value}); + manager.register_pass(); + } + + model_ref = model->clone(); + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAFusionTest5) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + const PartialShape attention_mask_shape{1, 32, -1, -1}; + + const auto query = std::make_shared(element::f16, query_shape); + const auto key = std::make_shared(element::f16, key_shape); + const auto value = std::make_shared(element::f16, value_shape); + const auto mask = std::make_shared(element::f16, attention_mask_shape); + const auto casual = false; + { + const auto qk = std::make_shared(query, key, false, true); + const auto mask_add = std::make_shared(qk, mask); + const auto softmax = std::make_shared(mask_add, -1); + const auto qkv = std::make_shared(softmax, value, false, false); + + model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value, mask}); + manager.register_pass(); + } + + { + const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{1.0f}); + const auto sdpa = + std::make_shared(query, key, value, mask, scale_const, casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value, mask}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAFusionTest6) { + const PartialShape query_shape{1, 32, 10, 32}; + const PartialShape key_shape{1, 32, 10, 32}; + const PartialShape value_shape{1, 32, 10, 32}; + const PartialShape attention_mask_shape{1, 1, 10, 10}; + + const auto query = std::make_shared(element::f16, query_shape); + const auto key = std::make_shared(element::f16, key_shape); + const auto value = std::make_shared(element::f16, value_shape); + const auto mask = std::make_shared(element::f16, attention_mask_shape); + const auto casual = false; + { + const auto qk = std::make_shared(query, key, false, true); + const auto mask_add = std::make_shared(qk, mask); + const auto softmax = std::make_shared(mask_add, -1); + const auto qkv = std::make_shared(softmax, value, false, false); + + model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value, mask}); + manager.register_pass(); + } + + { + const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector{1.0f}); + const auto sdpa = + std::make_shared(query, key, value, mask, scale_const, casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value, mask}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAFusionTest7) { + const PartialShape query_shape{1, 8, -1, 32}; + const PartialShape key_shape{-1, 1, 8, 32}; + const PartialShape value_shape{1, 8, -1, 32}; + + const auto query = std::make_shared(element::f16, query_shape); + const auto key = std::make_shared(element::f16, key_shape); + const auto value = std::make_shared(element::f16, value_shape); + { + const auto key_t = + std::make_shared(key, + op::v0::Constant::create(element::i64, Shape{4}, {1, 2, 3, 0})); + const auto qk = std::make_shared(query, key_t, false, false); + const auto softmax = std::make_shared(qk, -1); + const auto qkv = std::make_shared(softmax, value, false, false); + + model = std::make_shared(NodeVector{qkv}, ParameterVector{query, key, value}); + manager.register_pass(); + } +} diff --git a/src/common/transformations/tests/common_optimizations/sdpa_scale_fusion_test.cpp b/src/common/transformations/tests/common_optimizations/sdpa_scale_fusion_test.cpp new file mode 100644 index 00000000000000..f922f030a9c43b --- /dev/null +++ b/src/common/transformations/tests/common_optimizations/sdpa_scale_fusion_test.cpp @@ -0,0 +1,228 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/scaled_dot_product_attention.hpp" + +using namespace testing; +using namespace ov::pass; +using namespace ov; + +TEST_F(TransformationTestsF, SDPAScaleFusionTest1) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f32, query_shape); + const auto key = std::make_shared(element::f32, key_shape); + const auto value = std::make_shared(element::f32, value_shape); + const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{8.0f}); + const auto v_scaled = std::make_shared(value, scale_const); + const auto casual = false; + { + const auto q_scaled = std::make_shared(query, scale_const); + const auto k_scaled = std::make_shared(key, scale_const); + const auto sdpa = + std::make_shared(q_scaled, k_scaled, v_scaled, casual); + + model = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + manager.register_pass(); + } + + { + const auto new_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{0.0f}); + const auto new_scale_const = + ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{64.0f / std::sqrt(32.0f)}); + const auto sdpa = std::make_shared(query, + key, + v_scaled, + new_mask_const, + new_scale_const, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAScaleFusionTest2) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f32, query_shape); + const auto key = std::make_shared(element::f32, key_shape); + const auto value = std::make_shared(element::f32, value_shape); + const auto sdpa_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{0.0f}); + const auto sdpa_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{2.0f}); + const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{8.0f}); + const auto v_scaled = std::make_shared(value, scale_const); + const auto casual = false; + { + const auto q_scaled = std::make_shared(query, scale_const); + const auto k_scaled = std::make_shared(key, scale_const); + const auto sdpa = std::make_shared(q_scaled, + k_scaled, + v_scaled, + sdpa_mask_const, + sdpa_scale_const, + casual); + + model = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + manager.register_pass(); + } + + { + const auto new_scale_const = + ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{128.0f}); + const auto sdpa = std::make_shared(query, + key, + v_scaled, + sdpa_mask_const, + new_scale_const, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAScaleFusionTest3) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f32, query_shape); + const auto key = std::make_shared(element::f32, key_shape); + const auto value = std::make_shared(element::f32, value_shape); + const auto sdpa_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{0.0f}); + const auto sdpa_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{2.0f}); + const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{8.0f}); + const auto v_scaled = std::make_shared(value, scale_const); + const auto casual = false; + { + const auto q_scaled = std::make_shared(query, scale_const); + const auto sdpa = std::make_shared(q_scaled, + key, + v_scaled, + sdpa_mask_const, + sdpa_scale_const, + casual); + + model = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + manager.register_pass(); + } + + { + const auto new_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{16.0f}); + const auto sdpa = std::make_shared(query, + key, + v_scaled, + sdpa_mask_const, + new_scale_const, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAScaleFusionTest4) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f32, query_shape); + const auto key = std::make_shared(element::f32, key_shape); + const auto value = std::make_shared(element::f32, value_shape); + const auto sdpa_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{0.0f}); + const auto sdpa_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{2.0f}); + const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{8.0f}); + const auto scale_dyn = std::make_shared(element::f32, ov::Shape{}); + const auto v_scaled = std::make_shared(value, scale_const); + const auto casual = false; + const auto q_scaled = std::make_shared(query, scale_dyn); + { + const auto k_scaled = std::make_shared(key, scale_const); + const auto sdpa = std::make_shared(q_scaled, + k_scaled, + v_scaled, + sdpa_mask_const, + sdpa_scale_const, + casual); + + model = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value, scale_dyn}); + manager.register_pass(); + } + + { + const auto new_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{16.0f}); + const auto sdpa = std::make_shared(q_scaled, + key, + v_scaled, + sdpa_mask_const, + new_scale_const, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value, scale_dyn}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, SDPAScaleFusionTest5) { + const PartialShape query_shape{1, 32, -1, 32}; + const PartialShape key_shape{1, 32, -1, 32}; + const PartialShape value_shape{1, 32, -1, 32}; + + const auto query = std::make_shared(element::f32, query_shape); + const auto key = std::make_shared(element::f32, key_shape); + const auto value = std::make_shared(element::f32, value_shape); + const auto sdpa_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{0.0f}); + const auto sdpa_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{1.0f}); + const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector{1.0f}); + const auto scale_dyn = std::make_shared(element::f32, ov::Shape{}); + const auto v_scaled = std::make_shared(value, scale_const); + const auto casual = false; + { + const auto q_scaled = std::make_shared(query, scale_dyn); + const auto k_scaled = std::make_shared(key, scale_const); + const auto sdpa = std::make_shared(q_scaled, + k_scaled, + v_scaled, + sdpa_mask_const, + sdpa_scale_const, + casual); + + model = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value, scale_dyn}); + manager.register_pass(); + } + + { + const auto sdpa = std::make_shared(query, + key, + v_scaled, + sdpa_mask_const, + scale_dyn, + casual); + model_ref = std::make_shared(NodeVector{sdpa}, ParameterVector{query, key, value, scale_dyn}); + } + + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index a63377312ecb95..fb9e0925bc89e2 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -37,6 +37,7 @@ #include "transformations/common_optimizations/nop_elimination.hpp" #include "transformations/common_optimizations/reshape_prelu.hpp" #include "transformations/common_optimizations/rms_fusion.hpp" +#include "transformations/common_optimizations/sdpa_fusion.hpp" #include "transformations/common_optimizations/transpose_sinking.hpp" #include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp" #include "transformations/common_optimizations/wrap_interpolate_into_transposes.hpp" @@ -695,6 +696,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis CPU_DISABLE_PASS_COMMON(manager, ov::pass::MatMulConstTransposesExtraction); CPU_DISABLE_PASS_COMMON(manager, ov::pass::ConvertScatterNDUpdate15ToScatterNDUpdate3); CPU_DISABLE_PASS_COMMON(manager, ov::pass::ConvertSliceScatter); + CPU_DISABLE_PASS_COMMON(manager, ov::pass::SDPAFusion); CPU_DISABLE_PASS_X64(manager, ov::pass::HSigmoidDecomposition); CPU_DISABLE_PASS_X64(manager, ov::pass::ReduceL1Decomposition); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 53ab9aa188b7aa..7c7c09adcd182f 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -92,6 +92,7 @@ #include "transformations/common_optimizations/lstm_cell_fusion.hpp" #include "transformations/common_optimizations/move_eltwise_up_data_movement.hpp" #include "transformations/common_optimizations/mvn_fusion.hpp" +#include "transformations/common_optimizations/sdpa_scale_fusion.hpp" #include "transformations/common_optimizations/softmax_fusion.hpp" #include "transformations/common_optimizations/glu_fusion.hpp" #include "transformations/common_optimizations/transpose_sinking.hpp" @@ -941,6 +942,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { if (!disable_horizontal_fc_fusion) manager.register_pass(); + manager.register_pass(); manager.register_pass(); auto pass_config = manager.get_pass_config(); manager.register_pass();