From 8ee6f407a9993db0c1d8d9347531bec013d53703 Mon Sep 17 00:00:00 2001
From: Paul Youngsoo Ahn <paul.y.ahn@intel.com>
Date: Mon, 23 Dec 2024 17:29:06 +0900
Subject: [PATCH 1/8] [GPU] Implement fake_convert (#28065)

### Details:
 - *implement fake_convert*
 - *add functional test for fake convert*

### Tickets:
 - *159263*
---
 src/core/include/openvino/op/fake_convert.hpp |   1 +
 src/core/src/op/fake_convert.cpp              |   4 +
 .../intel_gpu/plugin/primitives_list.hpp      |   1 +
 .../intel_gpu/primitives/fake_convert.hpp     |  68 +++++++++
 .../intel_gpu/src/graph/fake_convert.cpp      |  72 +++++++++
 .../src/graph/impls/cpu/fake_convert.cpp      | 131 ++++++++++++++++
 .../src/graph/impls/cpu/register.cpp          |   1 +
 .../src/graph/impls/cpu/register.hpp          |   1 +
 .../impls/registry/fake_convert_impls.cpp     |  24 +++
 .../src/graph/impls/registry/registry.hpp     |   1 +
 .../src/graph/include/fake_convert_inst.h     |  55 +++++++
 .../intel_gpu/src/plugin/ops/fake_convert.cpp |  39 +++++
 .../single_layer_tests/fake_convert.cpp       | 141 ++++++++++++++++++
 .../unit/module_tests/impls_registry_test.cpp |   4 +-
 14 files changed, 542 insertions(+), 1 deletion(-)
 create mode 100644 src/plugins/intel_gpu/include/intel_gpu/primitives/fake_convert.hpp
 create mode 100644 src/plugins/intel_gpu/src/graph/fake_convert.cpp
 create mode 100644 src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp
 create mode 100644 src/plugins/intel_gpu/src/graph/impls/registry/fake_convert_impls.cpp
 create mode 100644 src/plugins/intel_gpu/src/graph/include/fake_convert_inst.h
 create mode 100644 src/plugins/intel_gpu/src/plugin/ops/fake_convert.cpp
 create mode 100644 src/plugins/intel_gpu/tests/functional/single_layer_tests/fake_convert.cpp
diff --git a/src/core/include/openvino/op/fake_convert.hpp b/src/core/include/openvino/op/fake_convert.hpp
index c3eaa43b98a51b..16ef7a0337c15b 100644
--- a/src/core/include/openvino/op/fake_convert.hpp
+++ b/src/core/include/openvino/op/fake_convert.hpp
@@ -68,6 +68,7 @@ class OPENVINO_API FakeConvert : public Op {
     bool has_evaluate() const override;
 
     std::string get_destination_type() const;
+    void set_destination_type(ov::element::Type destination_type);
     const ov::element::Type& get_destination_element_type() const;
 
 private:
diff --git a/src/core/src/op/fake_convert.cpp b/src/core/src/op/fake_convert.cpp
index 5b3c8f8d8e9938..517674402ef872 100644
--- a/src/core/src/op/fake_convert.cpp
+++ b/src/core/src/op/fake_convert.cpp
@@ -79,6 +79,10 @@ std::string FakeConvert::get_destination_type() const {
     return m_destination_type.get_type_name();
 }
 
+void FakeConvert::set_destination_type(ov::element::Type destination_type) {
+    m_destination_type = destination_type;
+}
+
 const ov::element::Type& FakeConvert::get_destination_element_type() const {
     return m_destination_type;
 }
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
index c7524f1880157d..0950614897ab43 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
@@ -267,6 +267,7 @@ REGISTER_FACTORY(v13, ScaledDotProductAttention);
 REGISTER_FACTORY(v13, BitwiseAnd);
 REGISTER_FACTORY(v13, BitwiseOr);
 REGISTER_FACTORY(v13, BitwiseXor);
+REGISTER_FACTORY(v13, FakeConvert);
 
 // ------------------------------ Supported v15 ops ----------------------------- //
 REGISTER_FACTORY(v15, ROIAlignRotated);
diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/fake_convert.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/fake_convert.hpp
new file mode 100644
index 00000000000000..c16af0be51abda
--- /dev/null
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/fake_convert.hpp
@@ -0,0 +1,68 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include "primitive.hpp"
+#include <vector>
+
+namespace cldnn {
+
+/// @brief FakeConvert performs element-wise quantization of input values
+///        into a set of values corresponding to a target low-precision type.
+struct fake_convert : public primitive_base<fake_convert> {
+    CLDNN_DECLARE_PRIMITIVE(fake_convert)
+
+    fake_convert() : primitive_base("", {}) {}
+
+    /// @brief Constructs fake_convert primitive.
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param scale Scale primitive id.
+    /// @param shift Shift primitive id.
+    /// @param destination_type The low precision type to be emulated.
+    fake_convert(const primitive_id& id,
+             const input_info& input,
+             const input_info& scale,
+             const input_info& shift,
+             ov::element::Type destination_type = ov::element::Type_t::f8e4m3)
+        : primitive_base(id, {input, scale, shift}, 1), destination_type(destination_type) {}
+
+    /// @brief Constructs fake_convert primitive.
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param scale Scale primitive id.
+    /// @param shift Shift primitive id.
+    /// @param destination_type The low precision type to be emulated.
+    fake_convert(const primitive_id& id,
+             const input_info& input,
+             const input_info& scale,
+             ov::element::Type destination_type = ov::element::Type_t::f8e4m3)
+        : primitive_base(id, {input, scale}, 1), destination_type(destination_type) {}
+
+    ov::element::Type destination_type;
+
+    size_t hash() const override {
+        size_t seed = primitive::hash();
+        seed = hash_combine(seed, destination_type.get_type_name());
+        return seed;
+    }
+
+    bool operator==(const primitive& rhs) const override {
+        if (!compare_common_params(rhs))
+            return false;
+        auto rhs_casted = downcast<const fake_convert>(rhs);
+        return (destination_type == rhs_casted.destination_type);
+    }
+
+    void save(BinaryOutputBuffer& ob) const override {
+        primitive_base<fake_convert>::save(ob);
+        ob << make_data(&destination_type, sizeof(destination_type));
+    }
+
+    void load(BinaryInputBuffer& ib) override {
+        primitive_base<fake_convert>::load(ib);
+        ib >> make_data(&destination_type, sizeof(destination_type));
+    }
+};
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/graph/fake_convert.cpp b/src/plugins/intel_gpu/src/graph/fake_convert.cpp
new file mode 100644
index 00000000000000..b201378d52cc8d
--- /dev/null
+++ b/src/plugins/intel_gpu/src/graph/fake_convert.cpp
@@ -0,0 +1,72 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "fake_convert_inst.h"
+#include "fake_convert_shape_inference.hpp"
+
+#include "primitive_type_base.h"
+#include "intel_gpu/runtime/error_handler.hpp"
+#include "json_object.h"
+#include <string>
+
+namespace cldnn {
+GPU_DEFINE_PRIMITIVE_TYPE_ID(fake_convert)
+
+layout fake_convert_inst::calc_output_layout(fake_convert_node const& node, kernel_impl_params const& impl_param) {
+    return calc_output_layouts<ov::PartialShape>(node, impl_param)[0];
+}
+
+template<typename ShapeType>
+std::vector<layout> fake_convert_inst::calc_output_layouts(fake_convert_node const& node, kernel_impl_params const& impl_param) {
+    const auto& input_layout = impl_param.get_input_layout(0);
+    auto output_type = ov::element::Type(input_layout.data_type);
+
+    OPENVINO_ASSERT(ov::element::Type::merge(output_type, output_type, ov::element::Type(impl_param.get_input_layout(1).data_type)),
+        "Mixed input types are not supported.");
+
+    if (impl_param.input_layouts.size() == 3) {
+        OPENVINO_ASSERT(ov::element::Type::merge(output_type, output_type, ov::element::Type(impl_param.get_input_layout(2).data_type)),
+            "Mixed input types are not supported.");
+    }
+
+    switch (output_type) {
+    case ov::element::bf16:
+    case ov::element::f16:
+    case ov::element::f32:
+        break;
+    default:
+        OPENVINO_THROW("The output data type should be a bf16, f16, f32 but got: ", output_type);
+    }
+
+    return { layout{input_layout.get_partial_shape(), output_type, input_layout.format} };
+}
+
+template std::vector<layout> fake_convert_inst::calc_output_layouts<ov::PartialShape>(fake_convert_node const& node, const kernel_impl_params& impl_param);
+
+std::string fake_convert_inst::to_string(fake_convert_node const& node) {
+    auto desc = node.get_primitive();
+    auto node_info = node.desc_to_json();
+    auto& input = node.input();
+    auto& scale = node.scale();
+
+    std::stringstream primitive_description;
+
+    json_composite fake_convert_info;
+    fake_convert_info.add("input id", input.id());
+    fake_convert_info.add("scale id", scale.id());
+    if (node.has_shift()) {
+        fake_convert_info.add("shift id", node.shift().id());
+    }
+    fake_convert_info.add("destination_type", node.get_destination_type().get_type_name());
+
+    node_info->add("fake_convert info", fake_convert_info);
+    node_info->dump(primitive_description);
+
+    return primitive_description.str();
+}
+
+fake_convert_inst::typed_primitive_inst(network& network, fake_convert_node const& node)
+    : parent(network, node) {}
+
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp
new file mode 100644
index 00000000000000..a5f94741c40bf5
--- /dev/null
+++ b/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp
@@ -0,0 +1,131 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "impls/cpu/cpu_impl_helpers.hpp"
+#include "register.hpp"
+#include "fake_convert_inst.h"
+#include "impls/registry/implementation_map.hpp"
+
+#include "openvino/op/fake_convert.hpp"
+
+namespace cldnn {
+namespace cpu {
+
+struct fake_convert_impl : public typed_primitive_impl<fake_convert> {
+    using parent = typed_primitive_impl<fake_convert>;
+    using parent::parent;
+
+    ov::element::Type destination_type;
+
+    std::shared_ptr<ov::op::v13::FakeConvert> op;
+
+    DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::cpu::fake_convert_impl)
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<fake_convert_impl>(*this);
+    }
+
+    fake_convert_impl() : parent("fake_convert_cpu_impl") {}
+
+    explicit fake_convert_impl(const fake_convert_node& outer) {
+        set_node_params(outer);
+    }
+
+    void set_node_params(const program_node& arg) override {
+        OPENVINO_ASSERT(arg.is_type<fake_convert>(), "[GPU] Incorrect program_node type");
+        const auto& node = arg.as<fake_convert>();
+        destination_type = node.get_destination_type();
+    }
+
+    void save(BinaryOutputBuffer& ob) const override {
+        parent::save(ob);
+        ob << make_data(&destination_type, sizeof(destination_type));
+    }
+
+    void load(BinaryInputBuffer& ib) override {
+        parent::load(ib);
+        ib >> make_data(&destination_type, sizeof(destination_type));
+    }
+
+    event::ptr execute_impl(const std::vector<event::ptr>& events, fake_convert_inst& instance) override {
+        OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "fake_convert::execute_impl");
+        auto& stream = instance.get_network().get_stream();
+
+        const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl();
+
+        if (!pass_through_events) {
+            stream.wait_for_events(events);
+        }
+
+        auto params = instance.get_impl_params();
+
+        ov::TensorVector input_host_tensors;
+        ov::TensorVector output_host_tensors;
+
+        if (!op) {
+            op = std::make_shared<ov::op::v13::FakeConvert>();
+            op->set_destination_type(destination_type);
+        }
+
+        std::vector<memory::ptr> input_mem_ptrs;
+        for (size_t i = 0; i < instance.dependencies().size(); i++)
+            input_mem_ptrs.push_back(instance.dep_memory_ptr(i));
+
+        auto output_mem_ptr = instance.output_memory_ptr();
+
+        cldnn::mem_lock<uint8_t, mem_lock_type::read> output_lock(output_mem_ptr, stream);
+
+        for (size_t i = 0; i < input_mem_ptrs.size(); i++)
+            input_host_tensors.push_back(make_tensor(params->input_layouts[i], input_mem_ptrs[i]->lock(stream, mem_lock_type::read)));
+
+        output_host_tensors.push_back(make_tensor(params->output_layouts[0], output_lock.data()));
+
+        OPENVINO_ASSERT(op->evaluate(output_host_tensors, input_host_tensors),
+                        "[GPU] Couldn't execute fake_convert primitive with id ", instance.id());
+
+        if (pass_through_events) {
+            return stream.group_events(events);
+        }
+
+        return make_output_event(stream, instance.is_output());
+    }
+
+    void init_kernels(const kernels_cache& , const kernel_impl_params&) override {}
+
+    void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {}
+
+public:
+    static std::unique_ptr<primitive_impl> create(const fake_convert_node& arg, const kernel_impl_params& impl_param) {
+        return make_unique<fake_convert_impl>();
+    }
+};
+
+
+namespace detail {
+
+attach_fake_convert_impl::attach_fake_convert_impl() {
+    auto formats = {
+        format::bfyx,
+        format::bfzyx,
+        format::bfwzyx,
+        format::bfuwzyx,
+        format::bfvuwzyx,
+    };
+
+    auto types = {
+        data_types::f32,
+        data_types::f16,
+        data_types::bf16
+    };
+
+    implementation_map<fake_convert>::add(impl_types::cpu, shape_types::static_shape, fake_convert_impl::create, types, formats);
+    implementation_map<fake_convert>::add(impl_types::cpu, shape_types::dynamic_shape, fake_convert_impl::create, types, formats);
+}
+
+}  // namespace detail
+}  // namespace cpu
+}  // namespace cldnn
+
+BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cpu::fake_convert_impl)
+BIND_BINARY_BUFFER_WITH_TYPE(cldnn::fake_convert)
diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp
index 2b0dc5b212158c..e86628444de439 100644
--- a/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp
@@ -31,6 +31,7 @@ void register_implementations() {
     REGISTER_CPU(tile);
     REGISTER_CPU(select);
     REGISTER_CPU(reduce);
+    REGISTER_CPU(fake_convert);
 }
 
 }  // namespace cpu
diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp b/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp
index cb89eae29d8c56..15cc4b11c077eb 100644
--- a/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp
@@ -56,6 +56,7 @@ REGISTER_CPU(broadcast);
 REGISTER_CPU(tile);
 REGISTER_CPU(select);
 REGISTER_CPU(reduce);
+REGISTER_CPU(fake_convert);
 
 #undef REGISTER_CPU
 
diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/fake_convert_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/fake_convert_impls.cpp
new file mode 100644
index 00000000000000..991ab5aa12657a
--- /dev/null
+++ b/src/plugins/intel_gpu/src/graph/impls/registry/fake_convert_impls.cpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "registry.hpp"
+#include "intel_gpu/primitives/fake_convert.hpp"
+#include "primitive_inst.h"
+
+namespace ov {
+namespace intel_gpu {
+
+using namespace cldnn;
+
+const std::vector<std::shared_ptr<cldnn::ImplementationManager>>& Registry<fake_convert>::get_implementations() {
+    static const std::vector<std::shared_ptr<ImplementationManager>> impls = {
+        OV_GPU_GET_INSTANCE_CPU(fake_convert, shape_types::static_shape)
+        OV_GPU_GET_INSTANCE_CPU(fake_convert, shape_types::dynamic_shape)
+    };
+
+    return impls;
+}
+
+}  // namespace intel_gpu
+}  // namespace ov
diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp b/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp
index f45d0897f01363..b2778233f41e64 100644
--- a/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp
@@ -145,6 +145,7 @@ REGISTER_IMPLS(scatter_elements_update);
 REGISTER_IMPLS(shape_of);
 REGISTER_IMPLS(strided_slice);
 REGISTER_IMPLS(tile);
+REGISTER_IMPLS(fake_convert);
 
 REGISTER_DEFAULT_IMPLS(assign, CPU_S, CPU_D);
 REGISTER_DEFAULT_IMPLS(read_value, CPU_S, CPU_D);
diff --git a/src/plugins/intel_gpu/src/graph/include/fake_convert_inst.h b/src/plugins/intel_gpu/src/graph/include/fake_convert_inst.h
new file mode 100644
index 00000000000000..d86c565a5e6b2e
--- /dev/null
+++ b/src/plugins/intel_gpu/src/graph/include/fake_convert_inst.h
@@ -0,0 +1,55 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include "intel_gpu/primitives/fake_convert.hpp"
+#include "primitive_inst.h"
+
+#include <memory>
+#include <string>
+
+namespace cldnn {
+
+template <>
+struct typed_program_node<fake_convert> : public typed_program_node_base<fake_convert> {
+    using parent = typed_program_node_base<fake_convert>;
+    typed_program_node(const std::shared_ptr<fake_convert> prim, program& prog)
+        : parent(prim, prog), destination_type(prim->destination_type) {
+        support_padding_all(true);
+    }
+
+public:
+    using parent::parent;
+
+    program_node& input() const { return get_dependency(0); }
+    program_node& scale() const { return get_dependency(1); }
+    program_node& shift() const { return get_dependency(2); }
+    bool has_shift() const { return (get_dependencies().size() == 3); }
+
+    ov::element::Type get_destination_type() const { return destination_type; }
+
+    std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
+
+private:
+    ov::element::Type destination_type;
+};
+
+using fake_convert_node = typed_program_node<fake_convert>;
+
+template <>
+class typed_primitive_inst<fake_convert> : public typed_primitive_inst_base<fake_convert> {
+    using parent = typed_primitive_inst_base<fake_convert>;
+    using parent::parent;
+
+public:
+    template<typename ShapeType>
+    static std::vector<layout> calc_output_layouts(fake_convert_node const& /*node*/, const kernel_impl_params& impl_param);
+    static layout calc_output_layout(fake_convert_node const& node, kernel_impl_params const& impl_param);
+    static std::string to_string(fake_convert_node const& node);
+
+    typed_primitive_inst(network& network, fake_convert_node const& node);
+};
+
+using fake_convert_inst = typed_primitive_inst<fake_convert>;
+}  // namespace cldnn
diff --git a/src/plugins/intel_gpu/src/plugin/ops/fake_convert.cpp b/src/plugins/intel_gpu/src/plugin/ops/fake_convert.cpp
new file mode 100644
index 00000000000000..282a483deab189
--- /dev/null
+++ b/src/plugins/intel_gpu/src/plugin/ops/fake_convert.cpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_gpu/plugin/program_builder.hpp"
+#include "intel_gpu/plugin/common_utils.hpp"
+
+#include "openvino/op/fake_convert.hpp"
+
+#include "intel_gpu/primitives/fake_convert.hpp"
+
+namespace ov {
+namespace intel_gpu {
+static void CreateFakeConvertOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v13::FakeConvert>& op) {
+    validate_inputs_count(op, {2, 3});
+    const auto inputs = p.GetInputInfo(op);
+    const std::string layerName = layer_type_name_ID(op);
+    ov::element::Type destination_type = op->get_destination_element_type();
+    std::shared_ptr<cldnn::fake_convert> fake_convert_prim = nullptr;
+    if (inputs.size() == 2) {
+        fake_convert_prim = std::make_shared<cldnn::fake_convert>(layerName,
+                                        inputs[0],
+                                        inputs[1],
+                                        destination_type);
+    } else {
+        fake_convert_prim = std::make_shared<cldnn::fake_convert>(layerName,
+                                        inputs[0],
+                                        inputs[1],
+                                        inputs[2],
+                                        destination_type);
+    }
+
+    p.add_primitive(*op, fake_convert_prim);
+}
+
+REGISTER_FACTORY_IMPL(v13, FakeConvert);
+
+}  // namespace intel_gpu
+}  // namespace ov
diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/fake_convert.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/fake_convert.cpp
new file mode 100644
index 00000000000000..d1236f5c524421
--- /dev/null
+++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/fake_convert.cpp
@@ -0,0 +1,141 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "common_test_utils/ov_tensor_utils.hpp"
+#include "common_test_utils/file_utils.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"
+
+#include "openvino/op/parameter.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/gather.hpp"
+#include "openvino/op/fake_convert.hpp"
+
+namespace {
+
+namespace fp8 {
+constexpr float MAX_F8E4M3  = 448.f;
+constexpr float MAX_F8E5M2  = 57344.f;
+}  // namespace fp8
+
+using namespace std;
+using namespace ov;
+using namespace testing;
+using ov::test::InputShape;
+
+using FakeConvertTestParams = std::tuple<
+                                    ov::Shape,                  // Input shapes
+                                    ov::Shape,                  // Scale shape
+                                    ov::Shape,                  // Shift shape
+                                    ov::element::Type,          // input precision
+                                    ov::element::Type,          // destination type
+                                    std::string >;              // device name
+
+class FakeConvertTest : public testing::WithParamInterface<FakeConvertTestParams>,
+                     virtual public ov::test::SubgraphBaseStaticTest {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<FakeConvertTestParams> obj) {
+        ov::Shape input_shape;
+        ov::Shape scale_shape;
+        ov::Shape shift_shape;
+        ov::element::Type prec;
+        ov::element::Type destination_type;
+        std::string target_device;
+
+        std::tie(input_shape, scale_shape, shift_shape, prec, destination_type, target_device) = obj.param;
+
+        std::ostringstream result;
+        result << "IS=(";
+        result << ov::test::utils::vec2str(input_shape) << "_";
+        result << "scale_shape=" << ov::test::utils::vec2str(scale_shape) << "_";
+        result << "shift_shape=" << ov::test::utils::vec2str(shift_shape) << "_";
+        result << "input_precision=" << prec << "_";
+        result << "destination_type=" << destination_type << "_";
+        result << "device_type=" << target_device;
+        return result.str();
+    }
+
+protected:
+    ov::Shape input_shape, scale_shape, shift_shape;
+    ov::element::Type destination_type;
+
+    void SetUp() override {
+        ov::element::Type prec;
+        std::tie(input_shape, scale_shape, shift_shape, prec, destination_type, targetDevice) = GetParam();
+        const float MAX_FP8 = (destination_type == ov::element::f8e4m3) ? fp8::MAX_F8E4M3 : fp8::MAX_F8E5M2;
+        if (shift_shape.empty()) {
+            auto data = make_shared<op::v0::Parameter>(prec, input_shape);
+            auto scale = op::v0::Constant::create(prec,
+                                                scale_shape,
+                                                {MAX_FP8 / (MAX_FP8 / 2.f),
+                                                1.0f,
+                                                MAX_FP8 / (MAX_FP8 * 3.5f),
+                                                MAX_FP8 / (MAX_FP8 * 4.f)});
+
+            auto op = make_shared<op::v13::FakeConvert>(data, scale, destination_type);
+
+            function = make_shared<Model>(OutputVector{op}, ParameterVector{data});
+        } else {
+            auto data = make_shared<op::v0::Parameter>(prec, input_shape);
+            auto scale = op::v0::Constant::create(prec,
+                                                scale_shape,
+                                                {MAX_FP8 / (MAX_FP8 / 2.f),
+                                                1.0f,
+                                                MAX_FP8 / (MAX_FP8 * 3.5f),
+                                                MAX_FP8 / (MAX_FP8 * 4.f)});
+            auto shift = op::v0::Constant::create(prec, shift_shape, {0.f, 0.f, 0.f, 0.f});
+
+            auto op = make_shared<op::v13::FakeConvert>(data, scale, shift, destination_type);
+
+            function = make_shared<Model>(OutputVector{op}, ParameterVector{data});
+        }
+    }
+
+    void generate_inputs(const std::vector<ov::Shape>& target_shapes) override {
+        inputs.clear();
+        const float MAX_FP8 = (destination_type == ov::element::f8e4m3) ? fp8::MAX_F8E4M3 : fp8::MAX_F8E5M2;
+        const auto& func_inputs = function->inputs();
+        auto& data_input = func_inputs[0];
+        ov::Tensor tensor = ov::Tensor(data_input.get_element_type(), target_shapes[0]);
+        std::vector<float> input_data{MAX_FP8 / 4.f,
+                                    MAX_FP8 / 3.f,
+                                    MAX_FP8 / 2.f,
+                                    MAX_FP8,
+                                    MAX_FP8,
+                                    MAX_FP8,
+                                    MAX_FP8 * 1.2f,
+                                    MAX_FP8 * 2.3f,
+                                    MAX_FP8 * 3.4f,
+                                    MAX_FP8 * 2.f,
+                                    MAX_FP8 * 3.f,
+                                    MAX_FP8 * 4.f};
+        auto* data_ptr = tensor.data<float>();
+        for (size_t i = 0; i < input_data.size(); i++) {
+            data_ptr[i] = input_data[i];
+        }
+        inputs.insert({data_input.get_node_shared_ptr(), tensor});
+    }
+};
+
+TEST_P(FakeConvertTest, Inference) {
+    run();
+}
+
+const std::vector<ov::element::Type> input_precisions = {ov::element::f32};
+
+const std::vector<ov::Shape> input_shapes = {{4, 3}};
+
+const ov::Shape scale_shape = {4, 1};
+const std::vector<ov::Shape> shift_shapes = {{4, 1}, {}};
+const std::vector<ov::element::Type> destination_types = {ov::element::f8e4m3, ov::element::f8e5m2};
+
+INSTANTIATE_TEST_SUITE_P(Smoke_FakeConvertTest,
+                         FakeConvertTest,
+                         ::testing::Combine(::testing::ValuesIn(input_shapes),
+                                            ::testing::Values(scale_shape),
+                                            ::testing::ValuesIn(shift_shapes),
+                                            ::testing::ValuesIn(input_precisions),
+                                            ::testing::ValuesIn(destination_types),
+                                            ::testing::Values<std::string>(ov::test::utils::DEVICE_GPU)),
+                                            FakeConvertTest::getTestCaseName);
+} // namespace
diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp
index a16cd20846a1c7..5dfc450e43905a 100644
--- a/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp
@@ -85,6 +85,7 @@
 #include "intel_gpu/primitives/swiglu.hpp"
 #include "intel_gpu/primitives/tile.hpp"
 #include "intel_gpu/primitives/unique.hpp"
+#include "intel_gpu/primitives/fake_convert.hpp"
 #include "primitive_inst.h"
 #include "test_utils.h"
 
@@ -226,5 +227,6 @@ TEST(registry_test, no_null_impls) {
                 cldnn::unique_count,
                 cldnn::unique_gather,
                 cldnn::scaled_dot_product_attention,
-                cldnn::rope>();
+                cldnn::rope,
+                cldnn::fake_convert>();
 }

From 5fc16c8bf3a0693d6aafd1b5ce7a2bf050db1a36 Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Mon, 23 Dec 2024 14:33:39 +0400
Subject: [PATCH 2/8] [PT FE][DOCS] Document conversion of PyTorch models from
 disk (#28175)

**Details:** Document conversion of PyTorch models from disk

**Ticket:** TBD

---------

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 .../convert-model-pytorch.rst                 | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst b/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst
index 6ac806daf0cda0..62cfdf05f2b11f 100644
--- a/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst
+++ b/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst
@@ -203,6 +203,52 @@ Here is an example of how to convert a model obtained with ``torch.export``:
    This is an experimental feature. Use it only if you know that you need to. PyTorch version 2.2
    is recommended. Dynamic shapes are not supported yet.
 
+Converting a PyTorch Model from Disk
+####################################
+
+PyTorch provides the capability to save models in two distinct formats: ``torch.jit.ScriptModule`` and ``torch.export.ExportedProgram``.
+Both formats can be saved to disk as standalone files, enabling them to be reloaded independently of the original Python code.
+
+ExportedProgram Format
+++++++++++++++++++++++
+
+The ``ExportedProgram`` format is saved on disk using `torch.export.save() <https://pytorch.org/docs/stable/export.html#serialization>`__.
+Below is an example of how to convert an ``ExportedProgram`` from disk:
+
+.. tab-set::
+
+   .. tab-item:: Python
+      :sync: py
+
+      .. code-block:: py
+         :force:
+
+         import openvino as ov
+         ov_model = ov.convert_model('exported_program.pt2')
+
+   .. tab-item:: CLI
+      :sync: cli
+
+      .. code-block:: sh
+
+         ovc exported_program.pt2
+
+ScriptModule Format
++++++++++++++++++++
+
+`torch.jit.save() <https://pytorch.org/docs/stable/generated/torch.jit.save.html>`__ serializes ``ScriptModule`` object on disk.
+To convert the serialized ``ScriptModule`` format, run ``convert_model`` function with ``example_input`` parameter as follows:
+
+.. code-block:: py
+   :force:
+
+   from openvino import convert_model
+   import torch
+
+   convert_model(input_model='script_module.pt', example_input=torch.rand(1, 10))
+
+``example_input`` is the required parameter for the conversion because ``torch.jit.ScriptModule`` object is always saved in an untraced state on disk.
+
 Exporting a PyTorch Model to ONNX Format
 ########################################
 

From b0ff7090a305f94d6ec86f7b60d1833d0dc87be5 Mon Sep 17 00:00:00 2001
From: Wanglei Shen <wanglei.shen@intel.com>
Date: Mon, 23 Dec 2024 19:12:34 +0800
Subject: [PATCH 3/8] fix coverity scan issue 1568450 (#28139)

### Details:
- *fix below coverity scan issue*

*** CID 1568450:  Concurrent data access violations  (MISSING_LOCK)
/openvino/src/inference/src/os/lin/lin_system_conf.cpp: 225 in
ov::CPU::CPU()::[lambda() (instance 2)]::operator ()() const()
219                 return -1;
220 } else if (valid_cpu_mapping_table.size() == (unsigned)_processors)
{
221                 return 0;
222             } else {
223                 _processors = valid_cpu_mapping_table.size();
224                 _cpu_mapping_table.swap(valid_cpu_mapping_table);
>>>     CID 1568450:  Concurrent data access violations  (MISSING_LOCK)
>>> Accessing "this->this->_proc_type_table" without holding lock
"ov::CPU._cpu_mutex". Elsewhere, "ov::CPU._proc_type_table" is written
to with "CPU._cpu_mutex" held 2 out of 3 times.
225 update_valid_processor_linux(std::move(phy_core_list),
226                                              _numa_nodes,
227                                              _cores,
228                                              _proc_type_table,
229                                              _cpu_mapping_table);
230                 return 0;

### Tickets:
 - *CID 1568450*
---
 src/inference/src/os/lin/lin_system_conf.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/inference/src/os/lin/lin_system_conf.cpp b/src/inference/src/os/lin/lin_system_conf.cpp
index f8bd16173b8fce..9b6247c6691814 100644
--- a/src/inference/src/os/lin/lin_system_conf.cpp
+++ b/src/inference/src/os/lin/lin_system_conf.cpp
@@ -188,6 +188,7 @@ CPU::CPU() {
         } else if (valid_cpu_mapping_table.size() == (unsigned)_processors) {
             return 0;
         } else {
+            std::lock_guard<std::mutex> lock{_cpu_mutex};
             _processors = valid_cpu_mapping_table.size();
             _cpu_mapping_table.swap(valid_cpu_mapping_table);
             update_valid_processor_linux(std::move(phy_core_list),

From 80115574aeebd79e2bec4050b702076c33deee23 Mon Sep 17 00:00:00 2001
From: Anatoliy Talamanov <anatoliy.talamanov@intel.com>
Date: Mon, 23 Dec 2024 15:40:14 +0000
Subject: [PATCH 4/8] [NPUW] Extend NPUW_DQ to work with NF4 for CW models
 (#28125)

### Details:
 - *item1*
 - *...*

### Tickets:
 - *ticket-id*
---
 .../intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
index 5abe4b39fd44f2..0260fc9718c444 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/opt.cpp
@@ -160,7 +160,8 @@ DQMatMulCWi::DQMatMulCWi(Context::Ref ctx) {
         auto qcoeff_shape = matched_node_qcoeff->output(0).get_shape();
 
         if ((ov::element::i4 == matched_qweight->get_element_type() ||
-             ov::element::i8 == matched_qweight->get_element_type()) &&
+             ov::element::i8 == matched_qweight->get_element_type() ||
+             ov::element::nf4 == matched_qweight->get_element_type()) &&
             (ov::op::util::is_parameter(matched_node_qcoeff) || ov::op::util::is_constant(matched_node_qcoeff)) &&
             qcoeff_shape[1] == 1 && !matched_matmul->get_transpose_a() && matched_matmul->get_transpose_b()) {
             auto matched_node_cvtw = node_to_output.at(qcvtw).get_node_shared_ptr();

From ae1fbbe52aa8177ae3799a49bb8066729445a6fd Mon Sep 17 00:00:00 2001
From: Roman Kazantsev <roman.kazantsev@intel.com>
Date: Mon, 23 Dec 2024 21:21:17 +0400
Subject: [PATCH 5/8] [GHA][JAX FE] Separate JAX layer tests to special job and
 have dedicated req file (#28178)

**Details:** Separate JAX layer tests to special job and have dedicated
req file

**Ticket:** TBD

---------

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
---
 .github/workflows/job_jax_layer_tests.yml     | 133 ++++++++++++++++++
 .github/workflows/job_jax_models_tests.yml    |   2 +-
 .github/workflows/job_python_unit_tests.yml   |   8 --
 .github/workflows/linux_arm64.yml             |  10 ++
 .github/workflows/mac.yml                     |   9 ++
 .github/workflows/mac_arm64.yml               |   9 ++
 .github/workflows/ubuntu_22.yml               |  10 ++
 .github/workflows/ubuntu_24.yml               |  10 ++
 .github/workflows/windows_vs2019_release.yml  |   9 ++
 tests/CMakeLists.txt                          |   2 +-
 .../test_tf_UnaryOpsAllRealDomain.py          |   2 +-
 tests/model_hub_tests/jax/requirements.txt    |  10 --
 tests/requirements_jax                        |  13 ++
 tests/requirements_tensorflow                 |   2 +-
 14 files changed, 207 insertions(+), 22 deletions(-)
 create mode 100644 .github/workflows/job_jax_layer_tests.yml
 delete mode 100644 tests/model_hub_tests/jax/requirements.txt
 create mode 100644 tests/requirements_jax

diff --git a/.github/workflows/job_jax_layer_tests.yml b/.github/workflows/job_jax_layer_tests.yml
new file mode 100644
index 00000000000000..25f171060f43be
--- /dev/null
+++ b/.github/workflows/job_jax_layer_tests.yml
@@ -0,0 +1,133 @@
+name: JAX Layer Tests
+
+on:
+  workflow_call:
+    inputs:
+      runner:
+        description: 'Machine on which the tests would run'
+        type: string
+        required: true
+      container:
+        description: 'JSON to be converted to the value of the "container" configuration for the job'
+        type: string
+        required: false
+        default: '{"image": null}'
+      affected-components:
+        description: 'Components that are affected by changes in the commit defined by the Smart CI Action'
+        type: string
+        required: true
+      python-version:
+        description: 'Python version to setup. E.g., "3.11"'
+        type: string
+        required: true
+
+permissions: read-all
+
+env:
+  PIP_CACHE_PATH_LINUX: /mount/caches/pip/linux
+  PIP_CACHE_PATH_WIN: "C:\\mount\\caches\\pip\\win"
+
+jobs:
+  JAX_Layer_Tests:
+    name: JAX Layer Tests
+    timeout-minutes: 40
+    runs-on: ${{ inputs.runner }}
+    container: ${{ fromJSON(inputs.container) }}
+    defaults:
+      run:
+        shell: ${{ contains(inputs.runner, 'win') && 'pwsh' || 'bash' }}
+    env:
+      DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input
+      OPENVINO_REPO: ${{ github.workspace }}/openvino
+      INSTALL_DIR: ${{ github.workspace }}/install
+      INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests
+      INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels
+      LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests
+    steps:
+      - name: Download OpenVINO artifacts (tarballs)
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          pattern: openvino_[tests]*
+          path: ${{ env.INSTALL_DIR }}
+          merge-multiple: true
+          
+      - name: Download OpenVINO artifacts (wheels)
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          pattern: openvino_[wheels]*
+          path: ${{ env.INSTALL_WHEELS_DIR }}
+          merge-multiple: true
+
+      # Needed as ${{ github.workspace }} is not working correctly when using Docker
+      - name: Setup Variables
+        if: runner.os != 'Windows'
+        run: |
+          echo "OPENVINO_REPO=$GITHUB_WORKSPACE/openvino" >> "$GITHUB_ENV"
+          echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV"
+          echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV"
+          echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/wheels" >> "$GITHUB_ENV"
+          echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV"
+  
+      - name: Install OpenVINO dependencies (mac)
+        if: runner.os == 'macOS'
+        run: brew install pigz
+          
+      - name: Extract OpenVINO packages (Linux, macOS)
+        if: runner.os != 'Windows'
+        run: |
+            pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR}
+        working-directory: ${{ env.INSTALL_DIR }}
+
+      - name: Extract OpenVINO artifacts (Windows)
+        if: runner.os == 'Windows'
+        run: |
+            Expand-Archive openvino_tests.zip -DestinationPath ${{ env.INSTALL_DIR }}
+        working-directory: ${{ env.INSTALL_DIR }}
+
+      - name: Fetch setup_python and install wheels actions
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        timeout-minutes: 15
+        with:
+          sparse-checkout: |
+            .github/actions/setup_python/action.yml
+            .github/actions/install_ov_wheels/action.yml
+          sparse-checkout-cone-mode: false
+          path: 'openvino'
+
+      - name: Setup Python ${{ inputs.python-version }}
+        uses: ./openvino/.github/actions/setup_python
+        with:
+          version: ${{ inputs.python-version }}
+          pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH_LINUX || env.PIP_CACHE_PATH_WIN }}
+          should-setup-pip-paths: ${{ runner.os != 'macOS' }}
+          self-hosted-runner: ${{ runner.os != 'macOS' }}
+
+      - name: Install OpenVINO Python wheels
+        uses: ./openvino/.github/actions/install_ov_wheels
+        with:
+          wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }}
+          wheels-to-install: 'openvino'
+
+      - name: Install JAX Layer tests dependencies
+        run: |
+          # jax test requirements
+          python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_jax
+
+      - name: JAX Layer Tests
+        if: ${{ fromJSON(inputs.affected-components).JAX_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287, 142196
+        run: python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/jax_tests ${PARALLEL} -m precommit_jax_fe --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-jax.xml
+        env:
+          TEST_DEVICE: CPU
+          TEST_PRECISION: FP16
+          JAX_TRACE_MODE: JAXPR
+          PARALLEL: ${{ runner.os == 'Windows' && ' ' || '-n logical'}}
+
+      - name: Upload Test Results
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
+        if: ${{ !cancelled() }}
+        with:
+          name: test-results-python-jax-layers
+          path: |
+            ${{ env.INSTALL_TEST_DIR }}/TEST*.html
+            ${{ env.INSTALL_TEST_DIR }}/TEST*.xml
+          if-no-files-found: 'warn'
diff --git a/.github/workflows/job_jax_models_tests.yml b/.github/workflows/job_jax_models_tests.yml
index 07155db1016057..57eb07a83aa423 100644
--- a/.github/workflows/job_jax_models_tests.yml
+++ b/.github/workflows/job_jax_models_tests.yml
@@ -89,7 +89,7 @@ jobs:
 
       - name: Install JAX tests requirements for precommit
         run: |
-          python3 -m pip install -r ${MODEL_HUB_TESTS_INSTALL_DIR}/jax/requirements.txt
+          python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/requirements_jax
 
       - name: JAX/Flax Models Tests from Hugging Face
         if: ${{ inputs.model_scope == 'precommit' || inputs.model_scope == 'nightly' }}
diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml
index b04f719c8e296f..e1532d530ff2db 100644
--- a/.github/workflows/job_python_unit_tests.yml
+++ b/.github/workflows/job_python_unit_tests.yml
@@ -162,14 +162,6 @@ jobs:
           export LD_LIBRARY_PATH=${PIP_INSTALL_PATH}/openvino/libs:$LD_LIBRARY_PATH
           python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/py_frontend_tests --junitxml=${INSTALL_TEST_DIR}/TEST-test_py_fontend.xml
 
-      - name: JAX Layer Tests - JAX FE
-        if: ${{ fromJSON(inputs.affected-components).JAX_FE.test && runner.arch != 'ARM64' && runner.os != 'macOS' }}
-        run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/jax_tests/ -m precommit_jax_fe --junitxml=${INSTALL_TEST_DIR}/TEST-jax_fe.xml
-        env:
-          TEST_DEVICE: CPU
-          TEST_PRECISION: FP16
-          JAX_TRACE_MODE: JAXPR
-
       - name: TensorFlow Lite Layer Tests - TFL FE
         if: fromJSON(inputs.affected-components).TFL_FE.test
         run: python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/tensorflow_lite_tests/ -n logical --junitxml=${INSTALL_TEST_DIR}/TEST-tfl_fe.xml
diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml
index 66e825e5d5e126..ca1ca6e056e23d 100644
--- a/.github/workflows/linux_arm64.yml
+++ b/.github/workflows/linux_arm64.yml
@@ -202,6 +202,16 @@ jobs:
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
       python-version: '3.11'
 
+  JAX_Layer_Tests:
+    name: JAX Layer Tests
+    needs: [ Build, Docker, Smart_CI ]
+    uses: ./.github/workflows/job_jax_layer_tests.yml
+    with:
+      runner: 'aks-linux-16-cores-32gb-arm'
+      container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}'
+      affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
+
   CPU_Functional_Tests:
     name: CPU functional tests
     if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test
diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index 5e4335b8151c02..0fbc20cf19594b 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -356,6 +356,15 @@ jobs:
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
       python-version: '3.11'
 
+  JAX_Layer_Tests:
+    name: JAX Layer Tests
+    needs: [ Build, Smart_CI ]
+    uses: ./.github/workflows/job_jax_layer_tests.yml
+    with:
+      runner: 'macos-13'
+      affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
+
   CPU_Functional_Tests:
     name: CPU functional tests
     # if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test
diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml
index 855d76973cc2e4..b60daefa442c83 100644
--- a/.github/workflows/mac_arm64.yml
+++ b/.github/workflows/mac_arm64.yml
@@ -355,6 +355,15 @@ jobs:
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
       python-version: '3.11'
 
+  JAX_Layer_Tests:
+    name: JAX Layer Tests
+    needs: [ Build, Smart_CI ]
+    uses: ./.github/workflows/job_jax_layer_tests.yml
+    with:
+      runner: 'macos-13-xlarge'
+      affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
+
   CPU_Functional_Tests:
     name: CPU functional tests
     if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test
diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml
index 5aed74bbb242b8..e5c7d25003de1e 100644
--- a/.github/workflows/ubuntu_22.yml
+++ b/.github/workflows/ubuntu_22.yml
@@ -334,6 +334,16 @@ jobs:
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
       python-version: '3.11'
 
+  JAX_Layer_Tests:
+    name: JAX Layer Tests
+    needs: [ Docker, Build, Smart_CI ]
+    uses: ./.github/workflows/job_jax_layer_tests.yml
+    with:
+      runner: 'aks-linux-4-cores-16gb'
+      container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_22_04_x64 }}", "volumes": ["/mount:/mount"]}'
+      affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
+
   CPU_Functional_Tests:
     name: CPU functional tests
     if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test
diff --git a/.github/workflows/ubuntu_24.yml b/.github/workflows/ubuntu_24.yml
index 25be095e692d35..beac15bfbda97d 100644
--- a/.github/workflows/ubuntu_24.yml
+++ b/.github/workflows/ubuntu_24.yml
@@ -156,6 +156,16 @@ jobs:
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
       python-version: '3.12'
 
+  JAX_Layer_Tests:
+    name: JAX Layer Tests
+    needs: [ Docker, Build, Smart_CI ]
+    uses: ./.github/workflows/job_jax_layer_tests.yml
+    with:
+      runner: 'aks-linux-4-cores-16gb'
+      container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_24_04_x64 }}", "volumes": ["/mount:/mount"]}'
+      affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.12'
+
   TensorFlow_Layer_Tests:
     name: TensorFlow Layer Tests
     needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ]
diff --git a/.github/workflows/windows_vs2019_release.yml b/.github/workflows/windows_vs2019_release.yml
index f1fd0be596baa2..de33f2603d7430 100644
--- a/.github/workflows/windows_vs2019_release.yml
+++ b/.github/workflows/windows_vs2019_release.yml
@@ -499,6 +499,15 @@ jobs:
       affected-components: ${{ needs.smart_ci.outputs.affected_components }}
       python-version: '3.11'
 
+  JAX_Layer_Tests:
+    name: JAX Layer Tests
+    needs: [ Build, Smart_CI ]
+    uses: ./.github/workflows/job_jax_layer_tests.yml
+    with:
+      runner: 'aks-win-8-cores-16gb'
+      affected-components: ${{ needs.smart_ci.outputs.affected_components }}
+      python-version: '3.11'
+
   CXX_Unit_Tests:
     name: C++ unit tests
     needs: [ Build, Smart_CI ]
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 08b4308479ef03..de3ad80280d603 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -7,5 +7,5 @@ add_subdirectory(model_hub_tests)
 add_subdirectory(samples_tests)
 add_subdirectory(e2e_tests)
 
-install(FILES requirements_pytorch requirements_tensorflow requirements_onnx
+install(FILES requirements_pytorch requirements_tensorflow requirements_onnx requirements_jax
         DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL)
diff --git a/tests/layer_tests/tensorflow_tests/test_tf_UnaryOpsAllRealDomain.py b/tests/layer_tests/tensorflow_tests/test_tf_UnaryOpsAllRealDomain.py
index 4ff4d589cbae32..5c1037e38cfc84 100644
--- a/tests/layer_tests/tensorflow_tests/test_tf_UnaryOpsAllRealDomain.py
+++ b/tests/layer_tests/tensorflow_tests/test_tf_UnaryOpsAllRealDomain.py
@@ -67,4 +67,4 @@ def test_unary_ops(self, input_shape, input_type, op_type,
             pytest.skip("159585: accuracy error on ARM")
         self._test(*self.create_unary_net(input_shape, input_type, op_type),
                    ie_device, precision, ir_version, temp_dir=temp_dir,
-                   use_legacy_frontend=use_legacy_frontend, custom_eps=1e-3)
+                   use_legacy_frontend=use_legacy_frontend, custom_eps=3 * 1e-3)
diff --git a/tests/model_hub_tests/jax/requirements.txt b/tests/model_hub_tests/jax/requirements.txt
deleted file mode 100644
index 328084ac050ca6..00000000000000
--- a/tests/model_hub_tests/jax/requirements.txt
+++ /dev/null
@@ -1,10 +0,0 @@
--c ../../constraints.txt
-numpy
-pytest
-pytest-html
-transformers
-requests
-jax
-jaxlib
-flax
-pillow
\ No newline at end of file
diff --git a/tests/requirements_jax b/tests/requirements_jax
new file mode 100644
index 00000000000000..c392df4359bee3
--- /dev/null
+++ b/tests/requirements_jax
@@ -0,0 +1,13 @@
+numpy==1.26.4; python_version < "3.12" or platform_system == "Darwin" and platform_machine == "x86_64"
+numpy==2.2.1; python_version >= "3.12" and (platform_system != "Darwin" or platform_machine != "x86_64")
+pytest==7.0.1
+pytest-xdist[psutil]==3.6.1
+pytest-html==4.1.1
+jax==0.4.38; (platform_system != "Darwin" or platform_machine != "x86_64") and python_version > "3.9"
+# tensorflow 2.16.2 depends on ml-dtypes~=0.3.1 and jax 0.4.35 depends on ml-dtypes>=0.4.0
+jax==0.4.33; (platform_system == "Darwin" and platform_machine == "x86_64") and python_version > "3.9"
+jax==0.4.30; python_version <= "3.9"
+flax==0.10.2
+transformers==4.47.1
+defusedxml
+pillow
diff --git a/tests/requirements_tensorflow b/tests/requirements_tensorflow
index 5369b0135f7618..8e0d1141695ef9 100644
--- a/tests/requirements_tensorflow
+++ b/tests/requirements_tensorflow
@@ -17,7 +17,7 @@ wrapt==1.15.0; python_version >= "3.12"
 # tensorflow-text is not available for both Windows and ARM platforms
 tensorflow-text==2.18.0; python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
 tensorflow-hub==0.16.1
-jax==0.4.35; (platform_system != "Darwin" or platform_machine != "x86_64") and python_version > "3.9"
+jax==0.4.38; (platform_system != "Darwin" or platform_machine != "x86_64") and python_version > "3.9"
 # tensorflow 2.16.2 depends on ml-dtypes~=0.3.1 and jax 0.4.35 depends on ml-dtypes>=0.4.0
 jax==0.4.33; (platform_system == "Darwin" and platform_machine == "x86_64") and python_version > "3.9"
 jax==0.4.30; python_version <= "3.9"

From 92edc910c54e8b322dc75558c3e7fed0738e9797 Mon Sep 17 00:00:00 2001
From: Wilson Seok <wilson.seok@intel.com>
Date: Tue, 24 Dec 2024 10:07:24 +0900
Subject: [PATCH 6/8] [GPU] Fix ConvolutionKernel_b_fs_yx_fsv16_1x1 to support
 input0 feature dynamic case (#28156)

### Details:
- Fix ConvolutionKernel_b_fs_yx_fsv16_1x1 to support input0 feature
dynamic case

### Tickets:
 - 146681
---
 .../cl_kernels/convolution_gpu_bfyx_f16_1x1.cl           | 5 ++---
 .../convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp | 2 ++
 .../tests/unit/test_cases/convolution_gpu_test.cpp       | 9 ++++++++-
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl
index 542fa69ebc241b..109fa2de9841aa 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_f16_1x1.cl
@@ -122,8 +122,8 @@ KERNEL(convolution_b_fs_yx_fsv16_1x1)(
     {
 #endif // SLM_DIV_FACTOR > 1
         vec_t src = 0;
-#if INPUT_LEFTOVERS
-        if ((k + 1) * FEATURE_SLICE_SIZE >= INPUT0_FEATURE_NUM)
+
+        if (INPUT_LEFTOVERS && ((k + 1) * FEATURE_SLICE_SIZE >= INPUT0_FEATURE_NUM))
         {
             if (k * FEATURE_SLICE_SIZE + sglid < INPUT0_FEATURE_NUM)
             {
@@ -143,7 +143,6 @@ KERNEL(convolution_b_fs_yx_fsv16_1x1)(
             }
         }
         else
-#endif // INPUT_LEFTOVERS
         {
 #if PADDED_INPUT
 #if X_BLOCK_SIZE > 1
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp
index 6fd074f8d8506d..7150d51ecf1e48 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_yx_fsv16_1x1.cpp
@@ -264,6 +264,8 @@ JitConstants ConvolutionKernel_b_fs_yx_fsv16_1x1::GetJitConstants(const convolut
         }
         if (params.inputs[0].Feature().v % tuning_data.feature_block_size != 0) {
             jit.AddConstant(MakeJitConstant("INPUT_LEFTOVERS", 1));
+        } else {
+            jit.AddConstant(MakeJitConstant("INPUT_LEFTOVERS", 0));
         }
     } else {
         DimensionAccessHelperJit input0_dims(params.inputs[0]);
diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp
index f0243f055c3670..13934020bfdf66 100644
--- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp
@@ -10820,7 +10820,14 @@ TEST_P(conv_dyn_test, convolution_gpu_fsv16_1x1_no_bias) {
         return outputs_ref.at("conv").get_memory();
     };
 
-    auto in_layout = layout{ov::PartialShape{ov::Dimension(), ov::Dimension(p.in_shape[1]), ov::Dimension(), ov::Dimension()}, data_types::f16, format::b_fs_yx_fsv16};
+    cldnn::layout in_layout;
+    if (p.in_shape[2] % 2 == 0) {
+        // input feature is static
+        in_layout = layout{ov::PartialShape{ov::Dimension(), ov::Dimension(p.in_shape[1]), ov::Dimension(), ov::Dimension()}, data_types::f16, format::b_fs_yx_fsv16};
+    } else {
+        // input feature is dynamic
+        in_layout = layout{ov::PartialShape{ov::Dimension(), ov::Dimension(), ov::Dimension(), ov::Dimension()}, data_types::f16, format::b_fs_yx_fsv16};
+    }
     auto input = engine.allocate_memory({ p.in_shape, data_types::f16, format::b_fs_yx_fsv16 });
     auto weights = engine.allocate_memory({p.wei_shape, data_types::f16, is_grouped ? format::bfzyx : format::bfyx});
 

From f62b94f0cd924ba9414b892dd270248059ff16ba Mon Sep 17 00:00:00 2001
From: Wanglei Shen <wanglei.shen@intel.com>
Date: Tue, 24 Dec 2024 11:09:29 +0800
Subject: [PATCH 7/8] support offline CPU in Linux  (#28149)

### Details:
 - *support offline CPU in Linux*
 - *Ignore SOC Ecore of MTL*
 - *enable Ecore of LNL*
- *similar PR of [PR
27870](https://github.com/openvinotoolkit/openvino/pull/27870) which is
reverted*

### Tickets:
 - *CVS-154222, CVS-159641*
-
*[issues-26889](https://github.com/openvinotoolkit/openvino/issues/26889)*
---
 src/inference/src/os/lin/lin_system_conf.cpp  | 366 ++++++++++--------
 .../cpu_map_parser/cache_parser_linux.cpp     | 245 ++++++++++++
 .../unit/cpu_map_parser/freq_parser_linux.cpp | 183 +++++++++
 3 files changed, 642 insertions(+), 152 deletions(-)

diff --git a/src/inference/src/os/lin/lin_system_conf.cpp b/src/inference/src/os/lin/lin_system_conf.cpp
index 9b6247c6691814..64da4cb0ac836a 100644
--- a/src/inference/src/os/lin/lin_system_conf.cpp
+++ b/src/inference/src/os/lin/lin_system_conf.cpp
@@ -23,76 +23,107 @@ CPU::CPU() {
     std::vector<std::vector<std::string>> system_info_table;
     std::vector<std::string> node_info_table;
 
-    auto get_cache_info_linux = [&]() {
+    constexpr int cache_info_mode = 1;
+    constexpr int freq_info_mode = 2;
+
+    auto get_info_linux = [&](int mode) {
         int cpu_index = 0;
-        int cache_index = 0;
-        int cache_files = 3;
+        int file_index = 0;
+        int max_files = 3;
 
-        std::vector<std::string> one_info(cache_files);
+        std::string one_info;
 
-        while (1) {
-            for (int n = 0; n < cache_files; n++) {
-                cache_index = (n == 0) ? n : n + 1;
-
-                std::ifstream cache_file("/sys/devices/system/cpu/cpu" + std::to_string(cpu_index) + "/cache/index" +
-                                         std::to_string(cache_index) + "/shared_cpu_list");
-                if (!cache_file.is_open()) {
-                    cache_index = -1;
-                    break;
-                }
-                std::string cache_info;
-                std::getline(cache_file, cache_info);
-                one_info[n] = std::move(cache_info);
-            }
+        std::string::size_type pos = 0;
+        std::string::size_type endpos = 0;
+        std::string sub_str;
 
-            if (cache_index == -1) {
-                if (cpu_index == 0) {
-                    return -1;
-                } else {
-                    return 0;
-                }
-            } else {
-                system_info_table.push_back(one_info);
-                cpu_index++;
-            }
+        int core_1;
+        int core_2;
+
+        system_info_table.clear();
+
+        std::ifstream possible_file("/sys/devices/system/cpu/possible");
+        std::string possible_info;
+
+        if (possible_file.is_open()) {
+            std::getline(possible_file, possible_info);
+        } else {
+            return -1;
         }
 
-        return 0;
-    };
+        if ((endpos = possible_info.find('-', pos)) != std::string::npos) {
+            sub_str = possible_info.substr(pos, endpos - pos);
+            core_1 = std::stoi(sub_str);
+            sub_str = possible_info.substr(endpos + 1);
+            core_2 = std::stoi(sub_str);
+            system_info_table.resize(core_2 + 1, std::vector<std::string>(max_files, ""));
+        } else {
+            return -1;
+        }
 
-    auto get_freq_info_linux = [&]() {
-        int cpu_index = 0;
-        int cache_index = 0;
+        std::ifstream online_file("/sys/devices/system/cpu/online");
+        std::string online_info;
 
-        std::vector<std::string> file_name = {"/topology/core_cpus_list",
-                                              "/topology/physical_package_id",
-                                              "/cpufreq/cpuinfo_max_freq"};
-        int num_of_files = file_name.size();
-        std::vector<std::string> one_info(num_of_files);
+        if (online_file.is_open()) {
+            std::getline(online_file, online_info);
+        } else {
+            system_info_table.clear();
+            return -1;
+        }
 
         while (1) {
-            for (int n = 0; n < num_of_files; n++) {
-                cache_index = n;
+            if ((endpos = online_info.find('-', pos)) != std::string::npos) {
+                sub_str = online_info.substr(pos, endpos - pos);
+                core_1 = std::stoi(sub_str);
+                sub_str = online_info.substr(endpos + 1);
+                core_2 = std::stoi(sub_str);
 
-                std::ifstream cache_file("/sys/devices/system/cpu/cpu" + std::to_string(cpu_index) + file_name[n]);
-                if (!cache_file.is_open()) {
-                    cache_index = -1;
-                    break;
+                for (cpu_index = core_1; cpu_index <= core_2; cpu_index++) {
+                    if (mode == cache_info_mode) {
+                        for (int n = 0; n < max_files; n++) {
+                            file_index = (n == 0) ? n : n + 1;
+                            one_info.clear();
+
+                            std::ifstream cache_file("/sys/devices/system/cpu/cpu" + std::to_string(cpu_index) +
+                                                     "/cache/index" + std::to_string(file_index) + "/shared_cpu_list");
+                            if (cache_file.is_open()) {
+                                std::getline(cache_file, one_info);
+                            } else {
+                                if ((cpu_index == core_1) && (n == 0)) {
+                                    system_info_table.clear();
+                                    return -1;
+                                }
+                            }
+                            system_info_table[cpu_index][n] = std::move(one_info);
+                        }
+                    } else {
+                        std::vector<std::string> file_name = {"/topology/core_cpus_list",
+                                                              "/topology/physical_package_id",
+                                                              "/cpufreq/cpuinfo_max_freq"};
+
+                        for (int n = 0; n < max_files; n++) {
+                            one_info.clear();
+
+                            std::ifstream cache_file("/sys/devices/system/cpu/cpu" + std::to_string(cpu_index) +
+                                                     file_name[n]);
+                            if (cache_file.is_open()) {
+                                std::getline(cache_file, one_info);
+                            } else {
+                                if ((cpu_index == core_1) && (n == 2)) {
+                                    system_info_table.clear();
+                                    return -1;
+                                }
+                            }
+                            system_info_table[cpu_index][n] = std::move(one_info);
+                        }
+                    }
                 }
-                std::string cache_info;
-                std::getline(cache_file, cache_info);
-                one_info[n] = std::move(cache_info);
             }
 
-            if (cache_index == -1) {
-                if (cpu_index == 0) {
-                    return -1;
-                } else {
-                    return 0;
-                }
+            if ((pos = online_info.find(',', endpos)) != std::string::npos) {
+                pos++;
             } else {
-                system_info_table.push_back(one_info);
-                cpu_index++;
+                break;
             }
         }
 
@@ -202,7 +233,7 @@ CPU::CPU() {
 
     get_node_info_linux();
 
-    if (!get_cache_info_linux()) {
+    if (!get_info_linux(cache_info_mode)) {
         parse_cache_info_linux(system_info_table,
                                node_info_table,
                                _processors,
@@ -216,7 +247,7 @@ CPU::CPU() {
     if ((_proc_type_table.size() == 0) ||
         ((_proc_type_table[0][MAIN_CORE_PROC] == 0) && (_proc_type_table[0][ALL_PROC] > 0) &&
          (_proc_type_table[0][ALL_PROC] != _proc_type_table[0][EFFICIENT_CORE_PROC]))) {
-        if (!get_freq_info_linux()) {
+        if (!get_info_linux(freq_info_mode)) {
             parse_freq_info_linux(system_info_table,
                                   node_info_table,
                                   _processors,
@@ -472,56 +503,73 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_i
 
     const std::vector<int> line_value_0({0, 0, 0, 0, -1, -1});
 
-    for (int n = 0; n < _processors; n++) {
-        if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) {
-            std::string::size_type pos = 0;
-            std::string::size_type endpos = 0;
-            std::string sub_str;
-
-            int core_1;
-            int core_2;
+    std::vector<int> offline_list;
+    int info_index = 0;
 
-            if (0 == _sockets) {
-                _proc_type_table.push_back(line_value_0);
-            } else {
-                _proc_type_table.push_back(_proc_type_table[0]);
-                _proc_type_table[0] = line_value_0;
-            }
-
-            while (1) {
-                if ((endpos = system_info_table[n][2].find('-', pos)) != std::string::npos) {
-                    sub_str = system_info_table[n][2].substr(pos, endpos - pos);
-                    core_1 = std::stoi(sub_str);
-                    sub_str = system_info_table[n][2].substr(endpos + 1);
-                    core_2 = std::stoi(sub_str);
+    for (int n = 0; n < _processors; n++) {
+        if ((system_info_table[n][2].size() > 0) || (system_info_table[n][1].size() > 0)) {
+            info_index = system_info_table[n][2].size() > 0 ? 2 : 1;
+            if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) {
+                std::string::size_type pos = 0;
+                std::string::size_type endpos = 0;
+                std::string sub_str;
+
+                int core_1;
+                int core_2;
+
+                if (0 == _sockets) {
+                    _proc_type_table.push_back(line_value_0);
+                } else {
+                    _proc_type_table.push_back(_proc_type_table[0]);
+                    _proc_type_table[0] = line_value_0;
+                }
 
-                    for (int m = core_1; m <= core_2; m++) {
-                        _cpu_mapping_table[m][CPU_MAP_SOCKET_ID] = _sockets;
-                        _cpu_mapping_table[m][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[m][CPU_MAP_SOCKET_ID];
-                        update_proc_map_info(m);
+                while (1) {
+                    if ((endpos = system_info_table[n][info_index].find('-', pos)) != std::string::npos) {
+                        sub_str = system_info_table[n][info_index].substr(pos, endpos - pos);
+                        core_1 = std::stoi(sub_str);
+                        sub_str = system_info_table[n][info_index].substr(endpos + 1);
+                        core_2 = std::stoi(sub_str);
+
+                        if ((info_index == 1) && (core_2 - core_1 == 1)) {
+                            offline_list.push_back(n);
+                            break;
+                        }
+                        for (int m = core_1; m <= core_2; m++) {
+                            _cpu_mapping_table[m][CPU_MAP_SOCKET_ID] = _sockets;
+                            _cpu_mapping_table[m][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[m][CPU_MAP_SOCKET_ID];
+                            update_proc_map_info(m);
+                            if (_processors == 0) {
+                                return;
+                            };
+                        }
+                    } else if (pos != std::string::npos) {
+                        sub_str = system_info_table[n][info_index].substr(pos);
+                        core_1 = std::stoi(sub_str);
+                        _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = _sockets;
+                        _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] =
+                            _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
+                        update_proc_map_info(core_1);
                         if (_processors == 0) {
                             return;
                         };
+                        endpos = pos;
                     }
-                } else if (pos != std::string::npos) {
-                    sub_str = system_info_table[n][2].substr(pos);
-                    core_1 = std::stoi(sub_str);
-                    _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = _sockets;
-                    _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
-                    update_proc_map_info(core_1);
-                    if (_processors == 0) {
-                        return;
-                    };
-                    endpos = pos;
-                }
 
-                if ((pos = system_info_table[n][2].find(',', endpos)) != std::string::npos) {
-                    pos++;
-                } else {
-                    break;
+                    if ((pos = system_info_table[n][2].find(',', endpos)) != std::string::npos) {
+                        pos++;
+                    } else {
+                        break;
+                    }
+                }
+                _sockets++;
+                if (_proc_type_table[0][ALL_PROC] == 0) {
+                    _proc_type_table.erase(_proc_type_table.begin());
+                    _sockets--;
                 }
             }
-            _sockets++;
+        } else {
+            offline_list.push_back(n);
         }
     }
 
@@ -541,6 +589,11 @@ void parse_cache_info_linux(const std::vector<std::vector<std::string>> system_i
         _numa_nodes = node_info_table.size();
         parse_node_info_linux(node_info_table, _numa_nodes, _sockets, _proc_type_table, _cpu_mapping_table);
     }
+
+    for (size_t n = 0; n < offline_list.size(); n++) {
+        _cpu_mapping_table.erase(_cpu_mapping_table.begin() + offline_list[n] - n);
+        _processors--;
+    }
 };
 
 void get_cpu_mapping_from_cores(const int _processors,
@@ -616,7 +669,6 @@ void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_in
                            std::vector<std::vector<int>>& _cpu_mapping_table) {
     int freq_max = 0;
     bool ecore_enabled = false;
-    bool ht_enabled = false;
 
     _processors = system_info_table.size();
     _numa_nodes = 0;
@@ -626,6 +678,8 @@ void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_in
 
     std::vector<int> line_value_0(PROC_TYPE_TABLE_SIZE, 0);
 
+    std::vector<int> offline_list;
+
     auto clean_up_output = [&]() {
         _processors = 0;
         _cores = 0;
@@ -637,65 +691,68 @@ void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_in
     };
 
     for (int n = 0; n < _processors; n++) {
-        if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) {
-            std::string::size_type pos = 0;
-            std::string::size_type endpos1 = 0;
-            std::string::size_type endpos2 = 0;
-            std::string sub_str;
-
-            int core_1 = 0;
-            int core_2 = 0;
-
-            if (((endpos1 = system_info_table[n][0].find(',', pos)) != std::string::npos) ||
-                ((endpos2 = system_info_table[n][0].find('-', pos)) != std::string::npos)) {
-                endpos1 = (endpos1 != std::string::npos) ? endpos1 : endpos2;
-                sub_str = system_info_table[n][0].substr(pos, endpos1 - pos);
-                core_1 = std::stoi(sub_str);
-                sub_str = system_info_table[n][0].substr(endpos1 + 1);
-                core_2 = std::stoi(sub_str);
-                if ((core_1 != n) && (core_2 != n)) {
-                    clean_up_output();
-                    return;
-                }
-
-                _cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1;
-                _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]);
-                _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
-                _cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores;
-                _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = HYPER_THREADING_PROC;
-                _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = _cores;
+        if (system_info_table[n][2].size() > 0) {
+            if (-1 == _cpu_mapping_table[n][CPU_MAP_SOCKET_ID]) {
+                std::string::size_type pos = 0;
+                std::string::size_type endpos1 = 0;
+                std::string::size_type endpos2 = 0;
+                std::string sub_str;
+
+                int core_1 = 0;
+                int core_2 = 0;
+
+                if (((endpos1 = system_info_table[n][0].find(',', pos)) != std::string::npos) ||
+                    ((endpos2 = system_info_table[n][0].find('-', pos)) != std::string::npos)) {
+                    endpos1 = (endpos1 != std::string::npos) ? endpos1 : endpos2;
+                    sub_str = system_info_table[n][0].substr(pos, endpos1 - pos);
+                    core_1 = std::stoi(sub_str);
+                    sub_str = system_info_table[n][0].substr(endpos1 + 1);
+                    core_2 = std::stoi(sub_str);
+                    if ((core_1 != n) && (core_2 != n)) {
+                        clean_up_output();
+                        return;
+                    }
 
-                _cpu_mapping_table[core_2][CPU_MAP_PROCESSOR_ID] = core_2;
-                _cpu_mapping_table[core_2][CPU_MAP_SOCKET_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
-                _cpu_mapping_table[core_2][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
-                _cpu_mapping_table[core_2][CPU_MAP_CORE_ID] = _cpu_mapping_table[core_1][CPU_MAP_CORE_ID];
-                _cpu_mapping_table[core_2][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC;
-                _cpu_mapping_table[core_2][CPU_MAP_GROUP_ID] = _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID];
+                    _cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1;
+                    _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]);
+                    _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
+                    _cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores;
+                    _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = HYPER_THREADING_PROC;
+                    _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = _cores;
+
+                    _cpu_mapping_table[core_2][CPU_MAP_PROCESSOR_ID] = core_2;
+                    _cpu_mapping_table[core_2][CPU_MAP_SOCKET_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
+                    _cpu_mapping_table[core_2][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
+                    _cpu_mapping_table[core_2][CPU_MAP_CORE_ID] = _cpu_mapping_table[core_1][CPU_MAP_CORE_ID];
+                    _cpu_mapping_table[core_2][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC;
+                    _cpu_mapping_table[core_2][CPU_MAP_GROUP_ID] = _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID];
+
+                    int core_freq = std::stoi(system_info_table[core_1][2]);
+                    freq_max = std::max(core_freq, freq_max);
+                } else if (system_info_table[n][0].size() > 0) {
+                    core_1 = std::stoi(system_info_table[n][0]);
 
-                ht_enabled = true;
-                int core_freq = std::stoi(system_info_table[core_1][2]);
-                freq_max = std::max(core_freq, freq_max);
-            } else if (system_info_table[n][0].size() > 0) {
-                core_1 = std::stoi(system_info_table[n][0]);
+                    _cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1;
+                    _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]);
+                    _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
+                    _cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores;
 
-                _cpu_mapping_table[core_1][CPU_MAP_PROCESSOR_ID] = core_1;
-                _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID] = std::stoi(system_info_table[core_1][1]);
-                _cpu_mapping_table[core_1][CPU_MAP_NUMA_NODE_ID] = _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID];
-                _cpu_mapping_table[core_1][CPU_MAP_CORE_ID] = _cores;
+                    int core_freq = std::stoi(system_info_table[core_1][2]);
+                    if ((0 == freq_max) || (core_freq >= freq_max * 0.97)) {
+                        freq_max = std::max(core_freq, freq_max);
+                        _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC;
+                    } else {
+                        _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = EFFICIENT_CORE_PROC;
+                        ecore_enabled = true;
+                    }
 
-                int core_freq = std::stoi(system_info_table[core_1][2]);
-                if (((0 == freq_max) || (core_freq >= freq_max * 0.95)) && (!ht_enabled)) {
-                    freq_max = std::max(core_freq, freq_max);
-                    _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = MAIN_CORE_PROC;
-                } else {
-                    _cpu_mapping_table[core_1][CPU_MAP_CORE_TYPE] = EFFICIENT_CORE_PROC;
-                    ecore_enabled = true;
+                    _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = _cores;
                 }
-
-                _cpu_mapping_table[core_1][CPU_MAP_GROUP_ID] = _cores;
+                _sockets = std::max(_sockets, _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]);
+                _cores++;
             }
-            _sockets = std::max(_sockets, _cpu_mapping_table[core_1][CPU_MAP_SOCKET_ID]);
-            _cores++;
+        } else {
+            offline_list.push_back(n);
         }
     }
 
@@ -734,6 +791,11 @@ void parse_freq_info_linux(const std::vector<std::vector<std::string>> system_in
         _numa_nodes = node_info_table.size();
         parse_node_info_linux(node_info_table, _numa_nodes, _sockets, _proc_type_table, _cpu_mapping_table);
     }
+
+    for (size_t n = 0; n < offline_list.size(); n++) {
+        _cpu_mapping_table.erase(_cpu_mapping_table.begin() + offline_list[n] - n);
+        _processors--;
+    }
 };
 
 void update_valid_processor_linux(const std::vector<int> phy_core_list,
diff --git a/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp b/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp
index 8679090b9ae491..9ea43bd0604296 100644
--- a/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp
+++ b/src/inference/tests/unit/cpu_map_parser/cache_parser_linux.cpp
@@ -385,6 +385,188 @@ LinuxCpuMapTestCase cache_1sockets_96cores = {
         {"0-95"},
     },
 };
+LinuxCpuMapTestCase cache_2sockets_56cores_hyperthreading = {
+    110,
+    2,
+    2,
+    56,
+    {{110, 56, 0, 54, -1, -1}, {54, 28, 0, 26, 0, 0}, {56, 28, 0, 28, 1, 1}},
+    {
+        {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},    {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
+        {2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},    {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
+        {4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},    {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
+        {6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},    {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
+        {8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1},    {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1},
+        {11, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {12, 0, 0, 11, HYPER_THREADING_PROC, 11, -1},
+        {13, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {14, 0, 0, 13, HYPER_THREADING_PROC, 13, -1},
+        {15, 0, 0, 14, HYPER_THREADING_PROC, 14, -1}, {16, 0, 0, 15, HYPER_THREADING_PROC, 15, -1},
+        {17, 0, 0, 16, HYPER_THREADING_PROC, 16, -1}, {18, 0, 0, 17, HYPER_THREADING_PROC, 17, -1},
+        {19, 0, 0, 18, HYPER_THREADING_PROC, 18, -1}, {21, 0, 0, 19, HYPER_THREADING_PROC, 19, -1},
+        {22, 0, 0, 20, HYPER_THREADING_PROC, 20, -1}, {23, 0, 0, 21, HYPER_THREADING_PROC, 21, -1},
+        {24, 0, 0, 22, HYPER_THREADING_PROC, 22, -1}, {25, 0, 0, 23, HYPER_THREADING_PROC, 23, -1},
+        {26, 0, 0, 24, HYPER_THREADING_PROC, 24, -1}, {27, 0, 0, 25, HYPER_THREADING_PROC, 25, -1},
+        {28, 1, 1, 28, HYPER_THREADING_PROC, 28, -1}, {29, 1, 1, 29, HYPER_THREADING_PROC, 29, -1},
+        {30, 1, 1, 30, HYPER_THREADING_PROC, 30, -1}, {31, 1, 1, 31, HYPER_THREADING_PROC, 31, -1},
+        {32, 1, 1, 32, HYPER_THREADING_PROC, 32, -1}, {33, 1, 1, 33, HYPER_THREADING_PROC, 33, -1},
+        {34, 1, 1, 34, HYPER_THREADING_PROC, 34, -1}, {35, 1, 1, 35, HYPER_THREADING_PROC, 35, -1},
+        {36, 1, 1, 36, HYPER_THREADING_PROC, 36, -1}, {37, 1, 1, 37, HYPER_THREADING_PROC, 37, -1},
+        {38, 1, 1, 38, HYPER_THREADING_PROC, 38, -1}, {39, 1, 1, 39, HYPER_THREADING_PROC, 39, -1},
+        {40, 1, 1, 40, HYPER_THREADING_PROC, 40, -1}, {41, 1, 1, 41, HYPER_THREADING_PROC, 41, -1},
+        {42, 1, 1, 42, HYPER_THREADING_PROC, 42, -1}, {43, 1, 1, 43, HYPER_THREADING_PROC, 43, -1},
+        {44, 1, 1, 44, HYPER_THREADING_PROC, 44, -1}, {45, 1, 1, 45, HYPER_THREADING_PROC, 45, -1},
+        {46, 1, 1, 46, HYPER_THREADING_PROC, 46, -1}, {47, 1, 1, 47, HYPER_THREADING_PROC, 47, -1},
+        {48, 1, 1, 48, HYPER_THREADING_PROC, 48, -1}, {49, 1, 1, 49, HYPER_THREADING_PROC, 49, -1},
+        {50, 1, 1, 50, HYPER_THREADING_PROC, 50, -1}, {51, 1, 1, 51, HYPER_THREADING_PROC, 51, -1},
+        {52, 1, 1, 52, HYPER_THREADING_PROC, 52, -1}, {53, 1, 1, 53, HYPER_THREADING_PROC, 53, -1},
+        {54, 1, 1, 54, HYPER_THREADING_PROC, 54, -1}, {55, 1, 1, 55, HYPER_THREADING_PROC, 55, -1},
+        {56, 0, 0, 0, MAIN_CORE_PROC, 0, -1},         {57, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
+        {58, 0, 0, 2, MAIN_CORE_PROC, 2, -1},         {59, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
+        {60, 0, 0, 4, MAIN_CORE_PROC, 4, -1},         {61, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
+        {62, 0, 0, 6, MAIN_CORE_PROC, 6, -1},         {63, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
+        {64, 0, 0, 8, MAIN_CORE_PROC, 8, -1},         {65, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
+        {66, 0, 0, 26, MAIN_CORE_PROC, 26, -1},       {67, 0, 0, 10, MAIN_CORE_PROC, 10, -1},
+        {68, 0, 0, 11, MAIN_CORE_PROC, 11, -1},       {69, 0, 0, 12, MAIN_CORE_PROC, 12, -1},
+        {70, 0, 0, 13, MAIN_CORE_PROC, 13, -1},       {71, 0, 0, 14, MAIN_CORE_PROC, 14, -1},
+        {72, 0, 0, 15, MAIN_CORE_PROC, 15, -1},       {73, 0, 0, 16, MAIN_CORE_PROC, 16, -1},
+        {74, 0, 0, 17, MAIN_CORE_PROC, 17, -1},       {75, 0, 0, 18, MAIN_CORE_PROC, 18, -1},
+        {76, 0, 0, 27, MAIN_CORE_PROC, 27, -1},       {77, 0, 0, 19, MAIN_CORE_PROC, 19, -1},
+        {78, 0, 0, 20, MAIN_CORE_PROC, 20, -1},       {79, 0, 0, 21, MAIN_CORE_PROC, 21, -1},
+        {80, 0, 0, 22, MAIN_CORE_PROC, 22, -1},       {81, 0, 0, 23, MAIN_CORE_PROC, 23, -1},
+        {82, 0, 0, 24, MAIN_CORE_PROC, 24, -1},       {83, 0, 0, 25, MAIN_CORE_PROC, 25, -1},
+        {84, 1, 1, 28, MAIN_CORE_PROC, 28, -1},       {85, 1, 1, 29, MAIN_CORE_PROC, 29, -1},
+        {86, 1, 1, 30, MAIN_CORE_PROC, 30, -1},       {87, 1, 1, 31, MAIN_CORE_PROC, 31, -1},
+        {88, 1, 1, 32, MAIN_CORE_PROC, 32, -1},       {89, 1, 1, 33, MAIN_CORE_PROC, 33, -1},
+        {90, 1, 1, 34, MAIN_CORE_PROC, 34, -1},       {91, 1, 1, 35, MAIN_CORE_PROC, 35, -1},
+        {92, 1, 1, 36, MAIN_CORE_PROC, 36, -1},       {93, 1, 1, 37, MAIN_CORE_PROC, 37, -1},
+        {94, 1, 1, 38, MAIN_CORE_PROC, 38, -1},       {95, 1, 1, 39, MAIN_CORE_PROC, 39, -1},
+        {96, 1, 1, 40, MAIN_CORE_PROC, 40, -1},       {97, 1, 1, 41, MAIN_CORE_PROC, 41, -1},
+        {98, 1, 1, 42, MAIN_CORE_PROC, 42, -1},       {99, 1, 1, 43, MAIN_CORE_PROC, 43, -1},
+        {100, 1, 1, 44, MAIN_CORE_PROC, 44, -1},      {101, 1, 1, 45, MAIN_CORE_PROC, 45, -1},
+        {102, 1, 1, 46, MAIN_CORE_PROC, 46, -1},      {103, 1, 1, 47, MAIN_CORE_PROC, 47, -1},
+        {104, 1, 1, 48, MAIN_CORE_PROC, 48, -1},      {105, 1, 1, 49, MAIN_CORE_PROC, 49, -1},
+        {106, 1, 1, 50, MAIN_CORE_PROC, 50, -1},      {107, 1, 1, 51, MAIN_CORE_PROC, 51, -1},
+        {108, 1, 1, 52, MAIN_CORE_PROC, 52, -1},      {109, 1, 1, 53, MAIN_CORE_PROC, 53, -1},
+        {110, 1, 1, 54, MAIN_CORE_PROC, 54, -1},      {111, 1, 1, 55, MAIN_CORE_PROC, 55, -1},
+    },
+    {
+        {"0,56", "0,56", "0-9,11-19,21-27,56-83"},
+        {"1,57", "1,57", "0-9,11-19,21-27,56-83"},
+        {"2,58", "2,58", "0-9,11-19,21-27,56-83"},
+        {"3,59", "3,59", "0-9,11-19,21-27,56-83"},
+        {"4,60", "4,60", "0-9,11-19,21-27,56-83"},
+        {"5,61", "5,61", "0-9,11-19,21-27,56-83"},
+        {"6,62", "6,62", "0-9,11-19,21-27,56-83"},
+        {"7,63", "7,63", "0-9,11-19,21-27,56-83"},
+        {"8,64", "8,64", "0-9,11-19,21-27,56-83"},
+        {"9,65", "9,65", "0-9,11-19,21-27,56-83"},
+        {"", "", ""},
+        {"11,67", "11,67", "0-9,11-19,21-27,56-83"},
+        {"12,68", "12,68", "0-9,11-19,21-27,56-83"},
+        {"13,69", "13,69", "0-9,11-19,21-27,56-83"},
+        {"14,70", "14,70", "0-9,11-19,21-27,56-83"},
+        {"15,71", "15,71", "0-9,11-19,21-27,56-83"},
+        {"16,72", "16,72", "0-9,11-19,21-27,56-83"},
+        {"17,73", "17,73", "0-9,11-19,21-27,56-83"},
+        {"18,74", "18,74", "0-9,11-19,21-27,56-83"},
+        {"19,75", "19,75", "0-9,11-19,21-27,56-83"},
+        {"", "", ""},
+        {"21,77", "21,77", "0-9,11-19,21-27,56-83"},
+        {"22,78", "22,78", "0-9,11-19,21-27,56-83"},
+        {"23,79", "23,79", "0-9,11-19,21-27,56-83"},
+        {"24,80", "24,80", "0-9,11-19,21-27,56-83"},
+        {"25,81", "25,81", "0-9,11-19,21-27,56-83"},
+        {"26,82", "26,82", "0-9,11-19,21-27,56-83"},
+        {"27,83", "27,83", "0-9,11-19,21-27,56-83"},
+        {"28,84", "28,84", "28-55,84-111"},
+        {"29,85", "29,85", "28-55,84-111"},
+        {"30,86", "30,86", "28-55,84-111"},
+        {"31,87", "31,87", "28-55,84-111"},
+        {"32,88", "32,88", "28-55,84-111"},
+        {"33,89", "33,89", "28-55,84-111"},
+        {"34,90", "34,90", "28-55,84-111"},
+        {"35,91", "35,91", "28-55,84-111"},
+        {"36,92", "36,92", "28-55,84-111"},
+        {"37,93", "37,93", "28-55,84-111"},
+        {"38,94", "38,94", "28-55,84-111"},
+        {"39,95", "39,95", "28-55,84-111"},
+        {"40,96", "40,96", "28-55,84-111"},
+        {"41,97", "41,97", "28-55,84-111"},
+        {"42,98", "42,98", "28-55,84-111"},
+        {"43,99", "43,99", "28-55,84-111"},
+        {"44,100", "44,100", "28-55,84-111"},
+        {"45,101", "45,101", "28-55,84-111"},
+        {"46,102", "46,102", "28-55,84-111"},
+        {"47,103", "47,103", "28-55,84-111"},
+        {"48,104", "48,104", "28-55,84-111"},
+        {"49,105", "49,105", "28-55,84-111"},
+        {"50,106", "50,106", "28-55,84-111"},
+        {"51,107", "51,107", "28-55,84-111"},
+        {"52,108", "52,108", "28-55,84-111"},
+        {"53,109", "53,109", "28-55,84-111"},
+        {"54,110", "54,110", "28-55,84-111"},
+        {"55,111", "55,111", "28-55,84-111"},
+        {"0,56", "0,56", "0-9,11-19,21-27,56-83"},
+        {"1,57", "1,57", "0-9,11-19,21-27,56-83"},
+        {"2,58", "2,58", "0-9,11-19,21-27,56-83"},
+        {"3,59", "3,59", "0-9,11-19,21-27,56-83"},
+        {"4,60", "4,60", "0-9,11-19,21-27,56-83"},
+        {"5,61", "5,61", "0-9,11-19,21-27,56-83"},
+        {"6,62", "6,62", "0-9,11-19,21-27,56-83"},
+        {"7,63", "7,63", "0-9,11-19,21-27,56-83"},
+        {"8,64", "8,64", "0-9,11-19,21-27,56-83"},
+        {"9,65", "9,65", "0-9,11-19,21-27,56-83"},
+        {"66", "66", "0-9,11-19,21-27,56-83"},
+        {"11,67", "11,67", "0-9,11-19,21-27,56-83"},
+        {"12,68", "12,68", "0-9,11-19,21-27,56-83"},
+        {"13,69", "13,69", "0-9,11-19,21-27,56-83"},
+        {"14,70", "14,70", "0-9,11-19,21-27,56-83"},
+        {"15,71", "15,71", "0-9,11-19,21-27,56-83"},
+        {"16,72", "16,72", "0-9,11-19,21-27,56-83"},
+        {"17,73", "17,73", "0-9,11-19,21-27,56-83"},
+        {"18,74", "18,74", "0-9,11-19,21-27,56-83"},
+        {"19,75", "19,75", "0-9,11-19,21-27,56-83"},
+        {"76", "76", "0-9,11-19,21-27,56-83"},
+        {"21,77", "21,77", "0-9,11-19,21-27,56-83"},
+        {"22,78", "22,78", "0-9,11-19,21-27,56-83"},
+        {"23,79", "23,79", "0-9,11-19,21-27,56-83"},
+        {"24,80", "24,80", "0-9,11-19,21-27,56-83"},
+        {"25,81", "25,81", "0-9,11-19,21-27,56-83"},
+        {"26,82", "26,82", "0-9,11-19,21-27,56-83"},
+        {"27,83", "27,83", "0-9,11-19,21-27,56-83"},
+        {"28,84", "28,84", "28-55,84-111"},
+        {"29,85", "29,85", "28-55,84-111"},
+        {"30,86", "30,86", "28-55,84-111"},
+        {"31,87", "31,87", "28-55,84-111"},
+        {"32,88", "32,88", "28-55,84-111"},
+        {"33,89", "33,89", "28-55,84-111"},
+        {"34,90", "34,90", "28-55,84-111"},
+        {"35,91", "35,91", "28-55,84-111"},
+        {"36,92", "36,92", "28-55,84-111"},
+        {"37,93", "37,93", "28-55,84-111"},
+        {"38,94", "38,94", "28-55,84-111"},
+        {"39,95", "39,95", "28-55,84-111"},
+        {"40,96", "40,96", "28-55,84-111"},
+        {"41,97", "41,97", "28-55,84-111"},
+        {"42,98", "42,98", "28-55,84-111"},
+        {"43,99", "43,99", "28-55,84-111"},
+        {"44,100", "44,100", "28-55,84-111"},
+        {"45,101", "45,101", "28-55,84-111"},
+        {"46,102", "46,102", "28-55,84-111"},
+        {"47,103", "47,103", "28-55,84-111"},
+        {"48,104", "48,104", "28-55,84-111"},
+        {"49,105", "49,105", "28-55,84-111"},
+        {"50,106", "50,106", "28-55,84-111"},
+        {"51,107", "51,107", "28-55,84-111"},
+        {"52,108", "52,108", "28-55,84-111"},
+        {"53,109", "53,109", "28-55,84-111"},
+        {"54,110", "54,110", "28-55,84-111"},
+        {"55,111", "55,111", "28-55,84-111"},
+    },
+    {
+        {"0-9,11-19,21-27,56-83"},
+        {"28-55,84-111"},
+    },
+};
 LinuxCpuMapTestCase cache_2sockets_48cores_hyperthreading = {
     96,
     2,
@@ -1005,6 +1187,36 @@ LinuxCpuMapTestCase cache_2sockets_20cores_hyperthreading_1 = {
     },
     {},
 };
+LinuxCpuMapTestCase cache_1sockets_16cores_hyperthreading = {
+    20,
+    1,
+    1,
+    14,
+    {{20, 6, 8, 6, 0, 0}},
+    {
+        {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},  {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
+        {2, 0, 0, 1, MAIN_CORE_PROC, 1, -1},        {3, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
+        {4, 0, 0, 2, MAIN_CORE_PROC, 2, -1},        {5, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
+        {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},  {7, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
+        {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},  {9, 0, 0, 4, MAIN_CORE_PROC, 4, -1},
+        {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
+        {12, 0, 0, 6, EFFICIENT_CORE_PROC, 6, -1},  {13, 0, 0, 7, EFFICIENT_CORE_PROC, 6, -1},
+        {14, 0, 0, 8, EFFICIENT_CORE_PROC, 6, -1},  {15, 0, 0, 9, EFFICIENT_CORE_PROC, 6, -1},
+        {16, 0, 0, 10, EFFICIENT_CORE_PROC, 7, -1}, {17, 0, 0, 11, EFFICIENT_CORE_PROC, 7, -1},
+        {18, 0, 0, 12, EFFICIENT_CORE_PROC, 7, -1}, {19, 0, 0, 13, EFFICIENT_CORE_PROC, 7, -1},
+    },
+    {
+        {"0,5", "0,5", "0-19"},  {"1-2", "1-2", "0-19"},  {"1-2", "1-2", "0-19"},     {"3-4", "3-4", "0-19"},
+        {"3-4", "3-4", "0-19"},  {"0,5", "0,5", "0-19"},  {"6-7", "6-7", "0-19"},     {"6-7", "6-7", "0-19"},
+        {"8-9", "8-9", "0-19"},  {"8-9", "8-9", "0-19"},  {"10-11", "10-11", "0-19"}, {"10-11", "10-11", "0-19"},
+        {"12", "12-15", "0-19"}, {"13", "12-15", "0-19"}, {"14", "12-15", "0-19"},    {"15", "12-15", "0-19"},
+        {"16", "16-19", "0-19"}, {"17", "16-19", "0-19"}, {"18", "16-19", "0-19"},    {"19", "16-19", "0-19"},
+        {"20", "20-21", ""},     {"21", "20-21", ""},
+    },
+    {
+        {"0-21"},
+    },
+};
 LinuxCpuMapTestCase cache_1sockets_14cores_hyperthreading = {
     20,
     1,
@@ -1135,6 +1347,36 @@ LinuxCpuMapTestCase cache_1sockets_8cores_hyperthreading = {
     },
     {{"0-11"}},
 };
+LinuxCpuMapTestCase cache_1sockets_8cores_hyperthreading_1 = {
+    8,
+    1,
+    1,
+    8,
+    {{8, 4, 4, 0, 0, 0}},
+    {
+        {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1},
+        {1, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
+        {2, 0, 0, 2, MAIN_CORE_PROC, 2, -1},
+        {3, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
+        {4, 0, 0, 4, EFFICIENT_CORE_PROC, 4, -1},
+        {5, 0, 0, 5, EFFICIENT_CORE_PROC, 4, -1},
+        {6, 0, 0, 6, EFFICIENT_CORE_PROC, 4, -1},
+        {7, 0, 0, 7, EFFICIENT_CORE_PROC, 4, -1},
+    },
+    {
+        {"0", "0", "0-3"},
+        {"1", "1", "0-3"},
+        {"2", "2", "0-3"},
+        {"3", "3", "0-3"},
+        {"4", "4-7", ""},
+        {"5", "4-7", ""},
+        {"6", "4-7", ""},
+        {"7", "4-7", ""},
+    },
+    {
+        {"0-7"},
+    },
+};
 LinuxCpuMapTestCase cache_1sockets_6cores_hyperthreading = {
     12,
     1,
@@ -1220,6 +1462,7 @@ INSTANTIATE_TEST_SUITE_P(CPUMap,
                          LinuxCpuMapCacheParserTests,
                          testing::Values(cache_2sockets_104cores_hyperthreading,
                                          cache_1sockets_96cores,
+                                         cache_2sockets_56cores_hyperthreading,
                                          cache_2sockets_48cores_hyperthreading,
                                          cache_2sockets_48cores_hyperthreading_1,
                                          cache_2sockets_24cores_hyperthreading,
@@ -1229,10 +1472,12 @@ INSTANTIATE_TEST_SUITE_P(CPUMap,
                                          cache_2sockets_48cores_2,
                                          cache_2sockets_20cores_hyperthreading,
                                          cache_2sockets_20cores_hyperthreading_1,
+                                         cache_1sockets_16cores_hyperthreading,
                                          cache_1sockets_14cores_hyperthreading,
                                          cache_1sockets_14cores_hyperthreading_1,
                                          cache_1sockets_10cores_hyperthreading,
                                          cache_1sockets_8cores_hyperthreading,
+                                         cache_1sockets_8cores_hyperthreading_1,
                                          cache_1sockets_6cores_hyperthreading,
                                          cache_1sockets_4cores,
                                          cache_VM_cache_0));
diff --git a/src/inference/tests/unit/cpu_map_parser/freq_parser_linux.cpp b/src/inference/tests/unit/cpu_map_parser/freq_parser_linux.cpp
index 04ab617961b953..8ccdfad011d19c 100644
--- a/src/inference/tests/unit/cpu_map_parser/freq_parser_linux.cpp
+++ b/src/inference/tests/unit/cpu_map_parser/freq_parser_linux.cpp
@@ -258,6 +258,188 @@ LinuxCpuMapTestCase freq_2sockets_112cores_hyperthreading = {
     },  // param[in]: The CPU frequency information table of this simulated platform
     {{"0-55,112-167"}, {"56-111,168-223"}},  // param[in]: The numa node information table of this simulated platform
 };
+LinuxCpuMapTestCase freq_2sockets_56cores_hyperthreading = {
+    110,
+    2,
+    2,
+    56,
+    {{110, 56, 0, 54, -1, -1}, {54, 28, 0, 26, 0, 0}, {56, 28, 0, 28, 1, 1}},
+    {
+        {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},    {1, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
+        {2, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},    {3, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
+        {4, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},    {5, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
+        {6, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},    {7, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
+        {8, 0, 0, 8, HYPER_THREADING_PROC, 8, -1},    {9, 0, 0, 9, HYPER_THREADING_PROC, 9, -1},
+        {11, 0, 0, 10, HYPER_THREADING_PROC, 10, -1}, {12, 0, 0, 11, HYPER_THREADING_PROC, 11, -1},
+        {13, 0, 0, 12, HYPER_THREADING_PROC, 12, -1}, {14, 0, 0, 13, HYPER_THREADING_PROC, 13, -1},
+        {15, 0, 0, 14, HYPER_THREADING_PROC, 14, -1}, {16, 0, 0, 15, HYPER_THREADING_PROC, 15, -1},
+        {17, 0, 0, 16, HYPER_THREADING_PROC, 16, -1}, {18, 0, 0, 17, HYPER_THREADING_PROC, 17, -1},
+        {19, 0, 0, 18, HYPER_THREADING_PROC, 18, -1}, {21, 0, 0, 19, HYPER_THREADING_PROC, 19, -1},
+        {22, 0, 0, 20, HYPER_THREADING_PROC, 20, -1}, {23, 0, 0, 21, HYPER_THREADING_PROC, 21, -1},
+        {24, 0, 0, 22, HYPER_THREADING_PROC, 22, -1}, {25, 0, 0, 23, HYPER_THREADING_PROC, 23, -1},
+        {26, 0, 0, 24, HYPER_THREADING_PROC, 24, -1}, {27, 0, 0, 25, HYPER_THREADING_PROC, 25, -1},
+        {28, 1, 1, 26, HYPER_THREADING_PROC, 26, -1}, {29, 1, 1, 27, HYPER_THREADING_PROC, 27, -1},
+        {30, 1, 1, 28, HYPER_THREADING_PROC, 28, -1}, {31, 1, 1, 29, HYPER_THREADING_PROC, 29, -1},
+        {32, 1, 1, 30, HYPER_THREADING_PROC, 30, -1}, {33, 1, 1, 31, HYPER_THREADING_PROC, 31, -1},
+        {34, 1, 1, 32, HYPER_THREADING_PROC, 32, -1}, {35, 1, 1, 33, HYPER_THREADING_PROC, 33, -1},
+        {36, 1, 1, 34, HYPER_THREADING_PROC, 34, -1}, {37, 1, 1, 35, HYPER_THREADING_PROC, 35, -1},
+        {38, 1, 1, 36, HYPER_THREADING_PROC, 36, -1}, {39, 1, 1, 37, HYPER_THREADING_PROC, 37, -1},
+        {40, 1, 1, 38, HYPER_THREADING_PROC, 38, -1}, {41, 1, 1, 39, HYPER_THREADING_PROC, 39, -1},
+        {42, 1, 1, 40, HYPER_THREADING_PROC, 40, -1}, {43, 1, 1, 41, HYPER_THREADING_PROC, 41, -1},
+        {44, 1, 1, 42, HYPER_THREADING_PROC, 42, -1}, {45, 1, 1, 43, HYPER_THREADING_PROC, 43, -1},
+        {46, 1, 1, 44, HYPER_THREADING_PROC, 44, -1}, {47, 1, 1, 45, HYPER_THREADING_PROC, 45, -1},
+        {48, 1, 1, 46, HYPER_THREADING_PROC, 46, -1}, {49, 1, 1, 47, HYPER_THREADING_PROC, 47, -1},
+        {50, 1, 1, 48, HYPER_THREADING_PROC, 48, -1}, {51, 1, 1, 49, HYPER_THREADING_PROC, 49, -1},
+        {52, 1, 1, 50, HYPER_THREADING_PROC, 50, -1}, {53, 1, 1, 51, HYPER_THREADING_PROC, 51, -1},
+        {54, 1, 1, 52, HYPER_THREADING_PROC, 52, -1}, {55, 1, 1, 53, HYPER_THREADING_PROC, 53, -1},
+        {56, 0, 0, 0, MAIN_CORE_PROC, 0, -1},         {57, 0, 0, 1, MAIN_CORE_PROC, 1, -1},
+        {58, 0, 0, 2, MAIN_CORE_PROC, 2, -1},         {59, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
+        {60, 0, 0, 4, MAIN_CORE_PROC, 4, -1},         {61, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
+        {62, 0, 0, 6, MAIN_CORE_PROC, 6, -1},         {63, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
+        {64, 0, 0, 8, MAIN_CORE_PROC, 8, -1},         {65, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
+        {66, 0, 0, 54, MAIN_CORE_PROC, 54, -1},       {67, 0, 0, 10, MAIN_CORE_PROC, 10, -1},
+        {68, 0, 0, 11, MAIN_CORE_PROC, 11, -1},       {69, 0, 0, 12, MAIN_CORE_PROC, 12, -1},
+        {70, 0, 0, 13, MAIN_CORE_PROC, 13, -1},       {71, 0, 0, 14, MAIN_CORE_PROC, 14, -1},
+        {72, 0, 0, 15, MAIN_CORE_PROC, 15, -1},       {73, 0, 0, 16, MAIN_CORE_PROC, 16, -1},
+        {74, 0, 0, 17, MAIN_CORE_PROC, 17, -1},       {75, 0, 0, 18, MAIN_CORE_PROC, 18, -1},
+        {76, 0, 0, 55, MAIN_CORE_PROC, 55, -1},       {77, 0, 0, 19, MAIN_CORE_PROC, 19, -1},
+        {78, 0, 0, 20, MAIN_CORE_PROC, 20, -1},       {79, 0, 0, 21, MAIN_CORE_PROC, 21, -1},
+        {80, 0, 0, 22, MAIN_CORE_PROC, 22, -1},       {81, 0, 0, 23, MAIN_CORE_PROC, 23, -1},
+        {82, 0, 0, 24, MAIN_CORE_PROC, 24, -1},       {83, 0, 0, 25, MAIN_CORE_PROC, 25, -1},
+        {84, 1, 1, 26, MAIN_CORE_PROC, 26, -1},       {85, 1, 1, 27, MAIN_CORE_PROC, 27, -1},
+        {86, 1, 1, 28, MAIN_CORE_PROC, 28, -1},       {87, 1, 1, 29, MAIN_CORE_PROC, 29, -1},
+        {88, 1, 1, 30, MAIN_CORE_PROC, 30, -1},       {89, 1, 1, 31, MAIN_CORE_PROC, 31, -1},
+        {90, 1, 1, 32, MAIN_CORE_PROC, 32, -1},       {91, 1, 1, 33, MAIN_CORE_PROC, 33, -1},
+        {92, 1, 1, 34, MAIN_CORE_PROC, 34, -1},       {93, 1, 1, 35, MAIN_CORE_PROC, 35, -1},
+        {94, 1, 1, 36, MAIN_CORE_PROC, 36, -1},       {95, 1, 1, 37, MAIN_CORE_PROC, 37, -1},
+        {96, 1, 1, 38, MAIN_CORE_PROC, 38, -1},       {97, 1, 1, 39, MAIN_CORE_PROC, 39, -1},
+        {98, 1, 1, 40, MAIN_CORE_PROC, 40, -1},       {99, 1, 1, 41, MAIN_CORE_PROC, 41, -1},
+        {100, 1, 1, 42, MAIN_CORE_PROC, 42, -1},      {101, 1, 1, 43, MAIN_CORE_PROC, 43, -1},
+        {102, 1, 1, 44, MAIN_CORE_PROC, 44, -1},      {103, 1, 1, 45, MAIN_CORE_PROC, 45, -1},
+        {104, 1, 1, 46, MAIN_CORE_PROC, 46, -1},      {105, 1, 1, 47, MAIN_CORE_PROC, 47, -1},
+        {106, 1, 1, 48, MAIN_CORE_PROC, 48, -1},      {107, 1, 1, 49, MAIN_CORE_PROC, 49, -1},
+        {108, 1, 1, 50, MAIN_CORE_PROC, 50, -1},      {109, 1, 1, 51, MAIN_CORE_PROC, 51, -1},
+        {110, 1, 1, 52, MAIN_CORE_PROC, 52, -1},      {111, 1, 1, 53, MAIN_CORE_PROC, 53, -1},
+    },
+    {
+        {"0,56", "0", "3500000"},
+        {"1,57", "0", "3500000"},
+        {"2,58", "0", "3500000"},
+        {"3,59", "0", "3500000"},
+        {"4,60", "0", "3500000"},
+        {"5,61", "0", "3500000"},
+        {"6,62", "0", "3500000"},
+        {"7,63", "0", "3500000"},
+        {"8,64", "0", "3500000"},
+        {"9,65", "0", "3500000"},
+        {"", "", ""},
+        {"11,67", "0", "3500000"},
+        {"12,68", "0", "3500000"},
+        {"13,69", "0", "3500000"},
+        {"14,70", "0", "3500000"},
+        {"15,71", "0", "3500000"},
+        {"16,72", "0", "3500000"},
+        {"17,73", "0", "3500000"},
+        {"18,74", "0", "3500000"},
+        {"19,75", "0", "3500000"},
+        {"", "", ""},
+        {"21,77", "0", "3500000"},
+        {"22,78", "0", "3500000"},
+        {"23,79", "0", "3500000"},
+        {"24,80", "0", "3500000"},
+        {"25,81", "0", "3500000"},
+        {"26,82", "0", "3500000"},
+        {"27,83", "0", "3500000"},
+        {"28,84", "1", "3500000"},
+        {"29,85", "1", "3500000"},
+        {"30,86", "1", "3500000"},
+        {"31,87", "1", "3500000"},
+        {"32,88", "1", "3500000"},
+        {"33,89", "1", "3500000"},
+        {"34,90", "1", "3500000"},
+        {"35,91", "1", "3500000"},
+        {"36,92", "1", "3500000"},
+        {"37,93", "1", "3500000"},
+        {"38,94", "1", "3500000"},
+        {"39,95", "1", "3500000"},
+        {"40,96", "1", "3500000"},
+        {"41,97", "1", "3500000"},
+        {"42,98", "1", "3500000"},
+        {"43,99", "1", "3500000"},
+        {"44,100", "1", "3500000"},
+        {"45,101", "1", "3500000"},
+        {"46,102", "1", "3500000"},
+        {"47,103", "1", "3500000"},
+        {"48,104", "1", "3500000"},
+        {"49,105", "1", "3500000"},
+        {"50,106", "1", "3500000"},
+        {"51,107", "1", "3500000"},
+        {"52,108", "1", "3500000"},
+        {"53,109", "1", "3500000"},
+        {"54,110", "1", "3500000"},
+        {"55,111", "1", "3500000"},
+        {"0,56", "0", "3500000"},
+        {"1,57", "0", "3500000"},
+        {"2,58", "0", "3500000"},
+        {"3,59", "0", "3500000"},
+        {"4,60", "0", "3500000"},
+        {"5,61", "0", "3500000"},
+        {"6,62", "0", "3500000"},
+        {"7,63", "0", "3500000"},
+        {"8,64", "0", "3500000"},
+        {"9,65", "0", "3500000"},
+        {"66", "0", "3500000"},
+        {"11,67", "0", "3500000"},
+        {"12,68", "0", "3500000"},
+        {"13,69", "0", "3500000"},
+        {"14,70", "0", "3500000"},
+        {"15,71", "0", "3500000"},
+        {"16,72", "0", "3500000"},
+        {"17,73", "0", "3500000"},
+        {"18,74", "0", "3500000"},
+        {"19,75", "0", "3500000"},
+        {"76", "0", "3500000"},
+        {"21,77", "0", "3500000"},
+        {"22,78", "0", "3500000"},
+        {"23,79", "0", "3500000"},
+        {"24,80", "0", "3500000"},
+        {"25,81", "0", "3500000"},
+        {"26,82", "0", "3500000"},
+        {"27,83", "0", "3500000"},
+        {"28,84", "1", "3500000"},
+        {"29,85", "1", "3500000"},
+        {"30,86", "1", "3500000"},
+        {"31,87", "1", "3500000"},
+        {"32,88", "1", "3500000"},
+        {"33,89", "1", "3500000"},
+        {"34,90", "1", "3500000"},
+        {"35,91", "1", "3500000"},
+        {"36,92", "1", "3500000"},
+        {"37,93", "1", "3500000"},
+        {"38,94", "1", "3500000"},
+        {"39,95", "1", "3500000"},
+        {"40,96", "1", "3500000"},
+        {"41,97", "1", "3500000"},
+        {"42,98", "1", "3500000"},
+        {"43,99", "1", "3500000"},
+        {"44,100", "1", "3500000"},
+        {"45,101", "1", "3500000"},
+        {"46,102", "1", "3500000"},
+        {"47,103", "1", "3500000"},
+        {"48,104", "1", "3500000"},
+        {"49,105", "1", "3500000"},
+        {"50,106", "1", "3500000"},
+        {"51,107", "1", "3500000"},
+        {"52,108", "1", "3500000"},
+        {"53,109", "1", "3500000"},
+        {"54,110", "1", "3500000"},
+        {"55,111", "1", "3500000"},
+    },
+    {
+        {"0-9,11-19,21-27,56-83"},
+        {"28-55,84-111"},
+    },
+};
 LinuxCpuMapTestCase freq_2sockets_48cores_hyperthreading = {
     96,
     2,
@@ -987,6 +1169,7 @@ TEST_P(LinuxCpuMapFreqParserTests, LinuxFreq) {}
 INSTANTIATE_TEST_SUITE_P(CPUMap,
                          LinuxCpuMapFreqParserTests,
                          testing::Values(freq_2sockets_112cores_hyperthreading,
+                                         freq_2sockets_56cores_hyperthreading,
                                          freq_2sockets_48cores_hyperthreading,
                                          freq_2sockets_48cores_hyperthreading_1,
                                          freq_2sockets_24cores_hyperthreading,

From b4c81e0f39e9cd30752879e4db32487dadba7db0 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Tue, 24 Dec 2024 09:02:27 +0400
Subject: [PATCH 8/8] [TRANSFORMATIONS][GPU] SDPA Fusion passes (#28042)

### Details:
 - Added basic SDPA fusion pass and QK scaling fusion into SDPA

T5 case

---------

Signed-off-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
---
 .../common_optimizations/sdpa_fusion.hpp      |  60 +++++
 .../sdpa_scale_fusion.hpp                     |  58 +++++
 .../moc_transformations.cpp                   |   2 +
 .../common_optimizations/sdpa_fusion.cpp      | 127 ++++++++++
 .../sdpa_scale_fusion.cpp                     | 140 +++++++++++
 .../common_optimizations/sdpa_fusion_test.cpp | 234 ++++++++++++++++++
 .../sdpa_scale_fusion_test.cpp                | 228 +++++++++++++++++
 .../transformation_pipeline.cpp               |   2 +
 .../src/plugin/transformations_pipeline.cpp   |   2 +
 9 files changed, 853 insertions(+)
 create mode 100644 src/common/transformations/include/transformations/common_optimizations/sdpa_fusion.hpp
 create mode 100644 src/common/transformations/include/transformations/common_optimizations/sdpa_scale_fusion.hpp
 create mode 100644 src/common/transformations/src/transformations/common_optimizations/sdpa_fusion.cpp
 create mode 100644 src/common/transformations/src/transformations/common_optimizations/sdpa_scale_fusion.cpp
 create mode 100644 src/common/transformations/tests/common_optimizations/sdpa_fusion_test.cpp
 create mode 100644 src/common/transformations/tests/common_optimizations/sdpa_scale_fusion_test.cpp

diff --git a/src/common/transformations/include/transformations/common_optimizations/sdpa_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/sdpa_fusion.hpp
new file mode 100644
index 00000000000000..84383b777604ea
--- /dev/null
+++ b/src/common/transformations/include/transformations/common_optimizations/sdpa_fusion.hpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/matcher_pass.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+
+/// This pass transforms the following sub-graph to a single Scaled Dot Product Attention operation.
+/// Before:
+///     ┌───────┐     ┌───────┐    ┌───────┐
+///     │   Q   │     │   K   │    │   V   │
+///     └───┬───┘     └───┬───┘    └───┬───┘
+///         │             │            │
+///         │             │            │
+///     ┌───┴───┐   ┌─────┴──────┐     │
+///     │ MatMul│<──│ Transpose  │     │
+///     └───┬───┘   | (Optional) │     │
+///         │       └────────────┘     │
+///     ┌───┴───┐    ┌─────────────┐   │
+///     │  Add  │<───│AttentionMask│   │
+///     └───┬───┘    | (Optional)  │   │
+///         │        └─────────────┘   │
+///     ┌───┴───┐                      │
+///     │Softmax│                      │
+///     └───┬───┘                      │
+///         │                          │
+///     ┌───┴───┐                      │
+///     │ MatMul│<─────────────────────┘
+///     └───┬───┘
+///     ┌───┴───┐
+///     │ Output│
+///     └───────┘
+///
+/// After:
+///     ┌───────┐    ┌───────┐    ┌───────┐    ┌─────────────┐
+///     │   Q   │    │   K   │    │   V   │    │AttentionMask│
+///     └───┬───┘    └───┬───┘    └───┬───┘    └──────┬──────┘
+///         │            │            │               │
+///         │            │            │               │
+///     ┌───┴────────────┴────────────┴───────────────┴─┐
+///     │           ScaledDotProductAttention           │
+///     └────────────────────┬──────────────────────────┘
+///                          │
+///                          │
+///                     ┌────┴────┐
+///                     │  Output │
+///                     └─────────┘
+class TRANSFORMATIONS_API SDPAFusion : public ov::pass::MatcherPass {
+public:
+    OPENVINO_MATCHER_PASS_RTTI("SDPAFusion", "0");
+    SDPAFusion();
+};
+
+}  // namespace pass
+}  // namespace ov
diff --git a/src/common/transformations/include/transformations/common_optimizations/sdpa_scale_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/sdpa_scale_fusion.hpp
new file mode 100644
index 00000000000000..cae0363e785f4e
--- /dev/null
+++ b/src/common/transformations/include/transformations/common_optimizations/sdpa_scale_fusion.hpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/matcher_pass.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+
+/// Merges explicit multiplication by scalar value for Q and K into scale attribute of SDPA op
+/// Before:
+///     ┌───────┐    ┌───────┐    ┌───────┐  ┌─────────────┐     ┌─────────────┐
+///     │   Q   │    │   K   │    │   V   │  │AttentionMask│     │    Scale    |
+///     └───┬───┘    └───┬───┘    └───┬───┘  │ (Optional)  │     │  (Optional) │
+///         │            │            │      └──────┬──────┘     └───────┬─────┘
+///         │            │            │             │                    |
+///     ┌───┴───┐    ┌───┴───┐        │             │                    |
+///     │  Mul  |    │  Mul  │        |             │                    |
+///     └───┬───┘    └───┬───┘        │             │                    │
+///         │            │            │             │                    │
+///         |            │            │             │                    │
+///     ┌───┴────────────┴────────────┴─────────────┴─┐                  |
+///     │           ScaledDotProductAttention         │──────────────────┘
+///     └────────────────────┬────────────────────────┘
+///                          │
+///                          │
+///                     ┌────┴────┐
+///                     │  Output │
+///                     └─────────┘
+/// After:
+///     ┌───────┐    ┌───────┐    ┌───────┐  ┌─────────────┐  ┌───────┐
+///     │   Q   │    │   K   │    │   V   │  │AttentionMask│  │ Scale |
+///     └───┬───┘    └───┬───┘    └───┬───┘  └──────┬──────┘  └───┬───┘
+///         │            │            │             │             |
+///         │            │            │             │             |
+///         |            │            │             │             |
+///     ┌───┴────────────┴────────────┴─────────────┴─┐           |
+///     │           ScaledDotProductAttention         │───────────┘
+///     └────────────────────┬────────────────────────┘
+///                          │
+///                          │
+///                     ┌────┴────┐
+///                     │  Output │
+///                     └─────────┘
+/// Multiply ops for Q and K are eliminated in the following cases:
+/// 1. Q_scale and K_scale are constant
+/// 2. Q_scale * SDPA_Scale == 1 or K_scale * SDPA_Scale == 1
+class TRANSFORMATIONS_API SDPAScaleFusion : public ov::pass::MatcherPass {
+public:
+    OPENVINO_MATCHER_PASS_RTTI("SDPAScaleFusion", "0");
+    SDPAScaleFusion();
+};
+
+}  // namespace pass
+}  // namespace ov
diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
index 185ae84ec83642..23fbf882024bdc 100644
--- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
+++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
@@ -65,6 +65,7 @@
 #include "transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp"
 #include "transformations/common_optimizations/reshape_sequence_fusion.hpp"
 #include "transformations/common_optimizations/ric_fusion.hpp"
+#include "transformations/common_optimizations/sdpa_fusion.hpp"
 #include "transformations/common_optimizations/select_with_one_value_condition.hpp"
 #include "transformations/common_optimizations/sequence_fusion.hpp"
 #include "transformations/common_optimizations/shared_ops_optimization.hpp"
@@ -229,6 +230,7 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ov::Model>
     ADD_MATCHER(common_fusions, ConvertTensorIteratorToSequence)
     ADD_MATCHER(common_fusions, SplitConcatPairToInterpolateFusion, m_use_shapes)
     ADD_MATCHER(common_fusions, ConvolutionToGroupConvolutionFusion)
+    ADD_MATCHER(common_fusions, SDPAFusion)
     if (m_use_shapes) {
         ADD_MATCHER(common_fusions, NearestNeighborUpsamplingFusion)
     }
diff --git a/src/common/transformations/src/transformations/common_optimizations/sdpa_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/sdpa_fusion.cpp
new file mode 100644
index 00000000000000..fc581580f70001
--- /dev/null
+++ b/src/common/transformations/src/transformations/common_optimizations/sdpa_fusion.cpp
@@ -0,0 +1,127 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/sdpa_fusion.hpp"
+
+#include "openvino/core/rt_info.hpp"
+#include "openvino/core/type.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/matmul.hpp"
+#include "openvino/op/scaled_dot_product_attention.hpp"
+#include "openvino/op/softmax.hpp"
+#include "openvino/op/transpose.hpp"
+#include "openvino/op/unsqueeze.hpp"
+#include "openvino/pass/pattern/op/optional.hpp"
+#include "openvino/pass/pattern/op/pattern.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "transformations/utils/gen_pattern.hpp"
+
+namespace ov {
+namespace pass {
+
+SDPAFusion::SDPAFusion() {
+    using namespace ov::pass::pattern;
+    using namespace ov::gen_pattern;
+
+    auto q = makePattern(ov::Rank(4));
+    auto k = makePattern(ov::Rank(4));
+    auto v = makePattern(ov::Rank(4));
+    auto mask = makePattern();
+
+    auto k_transpose_order = pattern::wrap_type<ov::op::v0::Constant>([](const Output<Node>& node) {
+        auto axis_order =
+            std::dynamic_pointer_cast<ov::op::v0::Constant>(node.get_node_shared_ptr())->cast_vector<int64_t>();
+        return axis_order == std::vector<int64_t>{0, 1, 3, 2};
+    });
+
+    auto k_t = pattern::wrap_type<ov::op::v1::Transpose>({k, k_transpose_order});
+    auto qk_nn = makePattern<ov::op::v0::MatMul>({q, k_t}, {{"transpose_a", false}, {"transpose_b", false}});
+    auto qk_nt = makePattern<ov::op::v0::MatMul>({q, k}, {{"transpose_a", false}, {"transpose_b", true}});
+    auto qk = qk_nt | qk_nn;
+    auto optional_add_mask = optional<ov::op::v1::Add>({qk, mask});
+    auto softmax = makePattern<ov::op::v8::Softmax>({optional_add_mask}, {{"axis", "-1"}});
+    auto qkv = makePattern<ov::op::v0::MatMul>({softmax, v}, {{"transpose_a", false}, {"transpose_b", false}});
+
+    auto valid_qk_shapes = [](const std::shared_ptr<ov::op::v0::MatMul>& qk_matmul) {
+        auto q_pshape = qk_matmul->get_input_partial_shape(0);
+        auto k_pshape = qk_matmul->get_input_partial_shape(1);
+
+        const size_t q_head_size_idx = 3;
+        const size_t k_head_size_idx = qk_matmul->get_transpose_b() ? 3 : 2;
+
+        return q_pshape.size() == 4 && k_pshape.size() == 4 && q_pshape[q_head_size_idx].is_static() &&
+               k_pshape[k_head_size_idx].is_static() &&
+               q_pshape[q_head_size_idx].get_length() == k_pshape[k_head_size_idx].get_length();
+    };
+
+    ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        if (transformation_callback(m.get_match_root())) {
+            return false;
+        }
+
+        auto q_node = pattern_map.at(q);
+        auto k_node = pattern_map.at(k);
+        auto v_node = pattern_map.at(v);
+
+        if (!valid_qk_shapes(ov::as_type_ptr<ov::op::v0::MatMul>(pattern_map.at(qk).get_node_shared_ptr()))) {
+            return false;
+        }
+
+        if (pattern_map.at(qk).get_target_inputs().size() > 1 ||
+            pattern_map.at(softmax).get_target_inputs().size() > 1) {
+            return false;
+        }
+        if (pattern_map.count(optional_add_mask) && (pattern_map.at(optional_add_mask).get_target_inputs().size() > 1 ||
+                                                     pattern_map.at(mask).get_partial_shape().size() > 4)) {
+            return false;
+        }
+
+        Output<ov::Node> mask_value;
+        Output<ov::Node> mask_input;
+        if (pattern_map.find(optional_add_mask) != pattern_map.end()) {
+            mask_value = pattern_map.at(mask);
+        } else {
+            mask_value = ov::op::v0::Constant::create(q_node.get_element_type(), ov::Shape{}, std::vector<float>{0});
+        }
+
+        if (mask_value.get_partial_shape().size() > 4) {
+            return false;
+        }
+
+        if (mask_value.get_partial_shape().rank() == 0 || mask_value.get_partial_shape().rank() == 4) {
+            mask_input = mask_value;
+        } else {
+            size_t rank_diff = q_node.get_partial_shape().size() - mask_value.get_partial_shape().size();
+            std::vector<int64_t> axes(rank_diff);
+            std::iota(axes.begin(), axes.end(), 0);
+            mask_input = std::make_shared<ov::op::v0::Unsqueeze>(
+                mask_value,
+                ov::op::v0::Constant::create(ov::element::i64, ov::Shape{rank_diff}, axes));
+        }
+
+        std::shared_ptr<ov::Node> scale_node =
+            ov::op::v0::Constant::create(q_node.get_element_type(), ov::Shape{}, std::vector<float>{1.0f});
+
+        std::shared_ptr<ov::Node> sdpa = std::make_shared<ov::op::v13::ScaledDotProductAttention>(q_node,
+                                                                                                  k_node,
+                                                                                                  v_node,
+                                                                                                  mask_input,
+                                                                                                  scale_node,
+                                                                                                  false);
+
+        sdpa->set_friendly_name(m.get_match_root()->get_friendly_name());
+        ov::copy_runtime_info(m.get_matched_nodes(), sdpa);
+        ov::replace_node(m.get_match_root(), sdpa);
+
+        return true;
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(qkv, "SDPAFusion");
+    this->register_matcher(m, callback);
+}
+
+}  // namespace pass
+}  // namespace ov
diff --git a/src/common/transformations/src/transformations/common_optimizations/sdpa_scale_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/sdpa_scale_fusion.cpp
new file mode 100644
index 00000000000000..3d750fe38a868e
--- /dev/null
+++ b/src/common/transformations/src/transformations/common_optimizations/sdpa_scale_fusion.cpp
@@ -0,0 +1,140 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/sdpa_scale_fusion.hpp"
+
+#include <memory>
+
+#include "openvino/core/node.hpp"
+#include "openvino/core/rt_info.hpp"
+#include "openvino/core/type.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/scaled_dot_product_attention.hpp"
+#include "openvino/pass/pattern/op/optional.hpp"
+#include "openvino/pass/pattern/op/pattern.hpp"
+#include "transformations/utils/gen_pattern.hpp"
+
+namespace ov {
+namespace pass {
+
+SDPAScaleFusion::SDPAScaleFusion() {
+    using namespace ov::pass::pattern;
+    using namespace ov::gen_pattern;
+
+    auto q = makePattern(ov::Rank(4));
+    auto k = makePattern(ov::Rank(4));
+    auto v = makePattern(ov::Rank(4));
+    auto mask = makePattern();
+    auto sdpa_scale = makeConst({});
+    auto scale_q = makePattern("[]") | makePattern("[1]");
+    auto scale_k = makePattern("[]") | makePattern("[1]");
+
+    auto scaled_q = optional<ov::op::v1::Multiply>({q, scale_q});
+    auto scaled_k = optional<ov::op::v1::Multiply>({k, scale_k});
+    auto sdpa_mask_scale =
+        makePattern<ov::op::v13::ScaledDotProductAttention>({scaled_q, scaled_k, v, mask, sdpa_scale},
+                                                            {{"causal", false}});
+    auto sdpa_mask =
+        makePattern<ov::op::v13::ScaledDotProductAttention>({scaled_q, scaled_k, v, mask}, {{"causal", false}});
+    auto sdpa_simple =
+        makePattern<ov::op::v13::ScaledDotProductAttention>({scaled_q, scaled_k, v}, {{"causal", false}});
+    auto sdpa = sdpa_simple | sdpa_mask | sdpa_mask_scale;
+
+    ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        if (transformation_callback(m.get_match_root())) {
+            return false;
+        }
+
+        auto sdpa = m.get_match_root();
+
+        const bool has_q_scale = pattern_map.count(scaled_q);
+        const bool has_k_scale = pattern_map.count(scaled_k);
+
+        // Nothing to do
+        if (!has_q_scale && !has_k_scale)
+            return false;
+
+        auto prev_scale_value = 1.0f;
+        auto scale_q_value = 1.0f;
+        auto scale_k_value = 1.0f;
+        auto scale_et = sdpa->get_output_element_type(0);
+
+        Output<ov::Node> q_input = sdpa->get_input_source_output(0);
+        Output<ov::Node> k_input = sdpa->get_input_source_output(1);
+
+        std::shared_ptr<ov::Node> scale_q_node = nullptr;
+        std::shared_ptr<ov::Node> scale_k_node = nullptr;
+
+        if (pattern_map.find(sdpa_scale) != pattern_map.end()) {
+            auto prev_scale_node =
+                ov::as_type_ptr<ov::op::v0::Constant>(pattern_map.at(sdpa_scale).get_node_shared_ptr());
+            prev_scale_value = prev_scale_node->cast_vector<float>()[0];
+            scale_et = prev_scale_node->get_output_element_type(0);
+        } else {
+            auto head_size = q_input.get_partial_shape()[3];
+            if (head_size.is_dynamic())
+                return false;
+
+            prev_scale_value = 1.0f / std::sqrt(static_cast<float>(head_size.get_length()));
+        }
+
+        // Extract scalar scale values for Q and K if those are constant and set new inputs for SDPA
+        if (has_q_scale) {
+            scale_q_node = pattern_map.at(scale_q).get_node_shared_ptr();
+            if (ov::is_type<ov::op::v0::Constant>(scale_q_node)) {
+                scale_q_value = ov::as_type_ptr<ov::op::v0::Constant>(scale_q_node)->cast_vector<float>()[0];
+                q_input = pattern_map.at(q);
+            }
+        }
+        if (has_k_scale) {
+            scale_k_node = pattern_map.at(scale_k).get_node_shared_ptr();
+            if (ov::is_type<ov::op::v0::Constant>(scale_k_node)) {
+                scale_k_value = ov::as_type_ptr<ov::op::v0::Constant>(scale_k_node)->cast_vector<float>()[0];
+                k_input = pattern_map.at(k);
+            }
+        }
+
+        Output<ov::Node> new_scale_node;
+        auto new_scale_val = prev_scale_value * scale_q_value * scale_k_value;
+
+        // If new scale is 1 and we have non-constant scale node for either Q or K, then we can make it a scale of SDPA
+        if (new_scale_val == 1.0f) {
+            if (has_q_scale && !ov::is_type<ov::op::v0::Constant>(scale_q_node)) {
+                new_scale_node = pattern_map.at(scale_q);
+                q_input = pattern_map.at(q);
+            } else if (has_k_scale && !ov::is_type<ov::op::v0::Constant>(scale_k_node)) {
+                new_scale_node = pattern_map.at(scale_k);
+                k_input = pattern_map.at(k);
+            } else {
+                new_scale_node = ov::op::v0::Constant::create(scale_et, ov::Shape{}, std::vector<float>{new_scale_val});
+            }
+        } else {
+            new_scale_node = ov::op::v0::Constant::create(scale_et, ov::Shape{}, std::vector<float>{new_scale_val});
+        }
+
+        OutputVector new_inputs = {q_input, k_input, pattern_map.at(v)};
+        if (pattern_map.find(mask) != pattern_map.end()) {
+            new_inputs.push_back(pattern_map.at(mask));
+        } else {
+            new_inputs.push_back(
+                ov::op::v0::Constant::create(new_scale_node.get_element_type(), ov::Shape{}, std::vector<float>{0.0f}));
+        }
+
+        new_inputs.push_back(new_scale_node);
+
+        auto new_sdpa = sdpa->clone_with_new_inputs(new_inputs);
+        new_sdpa->set_friendly_name(sdpa->get_friendly_name());
+        ov::copy_runtime_info(sdpa, new_sdpa);
+        ov::replace_node(sdpa, new_sdpa);
+
+        return true;
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(sdpa, "SDPAScaleFusion");
+    this->register_matcher(m, callback);
+}
+
+}  // namespace pass
+}  // namespace ov
diff --git a/src/common/transformations/tests/common_optimizations/sdpa_fusion_test.cpp b/src/common/transformations/tests/common_optimizations/sdpa_fusion_test.cpp
new file mode 100644
index 00000000000000..52c10ba5967bd8
--- /dev/null
+++ b/src/common/transformations/tests/common_optimizations/sdpa_fusion_test.cpp
@@ -0,0 +1,234 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <openvino/core/model.hpp>
+#include <openvino/opsets/opset10.hpp>
+#include <openvino/pass/manager.hpp>
+#include <transformations/common_optimizations/sdpa_fusion.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "common_test_utils/ov_test_utils.hpp"
+#include "openvino/op/matmul.hpp"
+#include "openvino/op/softmax.hpp"
+#include "openvino/op/transpose.hpp"
+
+using namespace testing;
+using namespace ov::pass;
+using namespace ov;
+
+TEST_F(TransformationTestsF, SDPAFusionTest1) {
+    const PartialShape query_shape{1, 32, -1, 32};
+    const PartialShape key_shape{1, 32, -1, 32};
+    const PartialShape value_shape{1, 32, -1, 32};
+
+    const auto query = std::make_shared<ov::op::v0::Parameter>(element::f32, query_shape);
+    const auto key = std::make_shared<ov::op::v0::Parameter>(element::f32, key_shape);
+    const auto value = std::make_shared<ov::op::v0::Parameter>(element::f32, value_shape);
+    const auto casual = false;
+    {
+        const auto qk = std::make_shared<ov::op::v0::MatMul>(query, key, false, true);
+        const auto softmax = std::make_shared<ov::op::v8::Softmax>(qk, -1);
+        const auto qkv = std::make_shared<ov::op::v0::MatMul>(softmax, value, false, false);
+
+        model = std::make_shared<ov::Model>(NodeVector{qkv}, ParameterVector{query, key, value});
+        manager.register_pass<ov::pass::SDPAFusion>();
+    }
+
+    {
+        const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{1.0f});
+        const auto mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{0.0f});
+        const auto sdpa = std::make_shared<ov::op::v13::ScaledDotProductAttention>(query,
+                                                                                   key,
+                                                                                   value,
+                                                                                   mask_const,
+                                                                                   scale_const,
+                                                                                   casual);
+        model_ref = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value});
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAFusionTest2) {
+    const PartialShape query_shape{1, 32, -1, 32};
+    const PartialShape key_shape{1, 32, -1, 32};
+    const PartialShape value_shape{1, 32, -1, 32};
+
+    const auto query = std::make_shared<ov::op::v0::Parameter>(element::f16, query_shape);
+    const auto key = std::make_shared<ov::op::v0::Parameter>(element::f16, key_shape);
+    const auto value = std::make_shared<ov::op::v0::Parameter>(element::f16, value_shape);
+    const auto casual = false;
+    {
+        const auto qk = std::make_shared<ov::op::v0::MatMul>(query, key, false, true);
+        const auto softmax = std::make_shared<ov::op::v8::Softmax>(qk, -1);
+        const auto qkv = std::make_shared<ov::op::v0::MatMul>(softmax, value, false, false);
+
+        model = std::make_shared<ov::Model>(NodeVector{qkv}, ParameterVector{query, key, value});
+        manager.register_pass<ov::pass::SDPAFusion>();
+    }
+
+    {
+        const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector<float>{1.0f});
+        const auto mask_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector<float>{0.0f});
+        const auto sdpa = std::make_shared<ov::op::v13::ScaledDotProductAttention>(query,
+                                                                                   key,
+                                                                                   value,
+                                                                                   mask_const,
+                                                                                   scale_const,
+                                                                                   casual);
+        model_ref = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value});
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAFusionTest3) {
+    const PartialShape query_shape{1, 32, -1, 32};
+    const PartialShape key_shape{1, 32, -1, 32};
+    const PartialShape value_shape{1, 32, -1, 32};
+
+    const auto query = std::make_shared<ov::op::v0::Parameter>(element::f16, query_shape);
+    const auto key = std::make_shared<ov::op::v0::Parameter>(element::f16, key_shape);
+    const auto value = std::make_shared<ov::op::v0::Parameter>(element::f16, value_shape);
+    const auto casual = false;
+    {
+        const auto key_t =
+            std::make_shared<ov::op::v1::Transpose>(key,
+                                                    op::v0::Constant::create(element::i64, Shape{4}, {0, 1, 3, 2}));
+        const auto qk = std::make_shared<ov::op::v0::MatMul>(query, key_t, false, false);
+        const auto softmax = std::make_shared<ov::op::v8::Softmax>(qk, -1);
+        const auto qkv = std::make_shared<ov::op::v0::MatMul>(softmax, value, false, false);
+
+        model = std::make_shared<ov::Model>(NodeVector{qkv}, ParameterVector{query, key, value});
+        manager.register_pass<ov::pass::SDPAFusion>();
+    }
+
+    {
+        const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector<float>{1.0f});
+        const auto mask_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector<float>{0.0f});
+        const auto sdpa = std::make_shared<ov::op::v13::ScaledDotProductAttention>(query,
+                                                                                   key,
+                                                                                   value,
+                                                                                   mask_const,
+                                                                                   scale_const,
+                                                                                   casual);
+        model_ref = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value});
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAFusionTest4) {
+    const PartialShape query_shape{1, 32, -1, 32};
+    const PartialShape key_shape{1, 32, 32, -1};
+    const PartialShape value_shape{1, 32, -1, 32};
+
+    const auto query = std::make_shared<ov::op::v0::Parameter>(element::f16, query_shape);
+    const auto key = std::make_shared<ov::op::v0::Parameter>(element::f16, key_shape);
+    const auto value = std::make_shared<ov::op::v0::Parameter>(element::f16, value_shape);
+    {
+        const auto qk = std::make_shared<ov::op::v0::MatMul>(query, key, false, false);
+        const auto softmax = std::make_shared<ov::op::v8::Softmax>(qk, -1);
+        const auto qkv = std::make_shared<ov::op::v0::MatMul>(softmax, value, false, false);
+
+        model = std::make_shared<ov::Model>(NodeVector{qkv}, ParameterVector{query, key, value});
+        manager.register_pass<ov::pass::SDPAFusion>();
+    }
+
+    model_ref = model->clone();
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAFusionTest5) {
+    const PartialShape query_shape{1, 32, -1, 32};
+    const PartialShape key_shape{1, 32, -1, 32};
+    const PartialShape value_shape{1, 32, -1, 32};
+    const PartialShape attention_mask_shape{1, 32, -1, -1};
+
+    const auto query = std::make_shared<ov::op::v0::Parameter>(element::f16, query_shape);
+    const auto key = std::make_shared<ov::op::v0::Parameter>(element::f16, key_shape);
+    const auto value = std::make_shared<ov::op::v0::Parameter>(element::f16, value_shape);
+    const auto mask = std::make_shared<ov::op::v0::Parameter>(element::f16, attention_mask_shape);
+    const auto casual = false;
+    {
+        const auto qk = std::make_shared<ov::op::v0::MatMul>(query, key, false, true);
+        const auto mask_add = std::make_shared<ov::op::v1::Add>(qk, mask);
+        const auto softmax = std::make_shared<ov::op::v8::Softmax>(mask_add, -1);
+        const auto qkv = std::make_shared<ov::op::v0::MatMul>(softmax, value, false, false);
+
+        model = std::make_shared<ov::Model>(NodeVector{qkv}, ParameterVector{query, key, value, mask});
+        manager.register_pass<ov::pass::SDPAFusion>();
+    }
+
+    {
+        const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector<float>{1.0f});
+        const auto sdpa =
+            std::make_shared<ov::op::v13::ScaledDotProductAttention>(query, key, value, mask, scale_const, casual);
+        model_ref = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value, mask});
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAFusionTest6) {
+    const PartialShape query_shape{1, 32, 10, 32};
+    const PartialShape key_shape{1, 32, 10, 32};
+    const PartialShape value_shape{1, 32, 10, 32};
+    const PartialShape attention_mask_shape{1, 1, 10, 10};
+
+    const auto query = std::make_shared<ov::op::v0::Parameter>(element::f16, query_shape);
+    const auto key = std::make_shared<ov::op::v0::Parameter>(element::f16, key_shape);
+    const auto value = std::make_shared<ov::op::v0::Parameter>(element::f16, value_shape);
+    const auto mask = std::make_shared<ov::op::v0::Parameter>(element::f16, attention_mask_shape);
+    const auto casual = false;
+    {
+        const auto qk = std::make_shared<ov::op::v0::MatMul>(query, key, false, true);
+        const auto mask_add = std::make_shared<ov::op::v1::Add>(qk, mask);
+        const auto softmax = std::make_shared<ov::op::v8::Softmax>(mask_add, -1);
+        const auto qkv = std::make_shared<ov::op::v0::MatMul>(softmax, value, false, false);
+
+        model = std::make_shared<ov::Model>(NodeVector{qkv}, ParameterVector{query, key, value, mask});
+        manager.register_pass<ov::pass::SDPAFusion>();
+    }
+
+    {
+        const auto scale_const = ov::op::v0::Constant::create(element::f16, ov::Shape{}, std::vector<float>{1.0f});
+        const auto sdpa =
+            std::make_shared<ov::op::v13::ScaledDotProductAttention>(query, key, value, mask, scale_const, casual);
+        model_ref = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value, mask});
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAFusionTest7) {
+    const PartialShape query_shape{1, 8, -1, 32};
+    const PartialShape key_shape{-1, 1, 8, 32};
+    const PartialShape value_shape{1, 8, -1, 32};
+
+    const auto query = std::make_shared<ov::op::v0::Parameter>(element::f16, query_shape);
+    const auto key = std::make_shared<ov::op::v0::Parameter>(element::f16, key_shape);
+    const auto value = std::make_shared<ov::op::v0::Parameter>(element::f16, value_shape);
+    {
+        const auto key_t =
+            std::make_shared<ov::op::v1::Transpose>(key,
+                                                    op::v0::Constant::create(element::i64, Shape{4}, {1, 2, 3, 0}));
+        const auto qk = std::make_shared<ov::op::v0::MatMul>(query, key_t, false, false);
+        const auto softmax = std::make_shared<ov::op::v8::Softmax>(qk, -1);
+        const auto qkv = std::make_shared<ov::op::v0::MatMul>(softmax, value, false, false);
+
+        model = std::make_shared<ov::Model>(NodeVector{qkv}, ParameterVector{query, key, value});
+        manager.register_pass<ov::pass::SDPAFusion>();
+    }
+}
diff --git a/src/common/transformations/tests/common_optimizations/sdpa_scale_fusion_test.cpp b/src/common/transformations/tests/common_optimizations/sdpa_scale_fusion_test.cpp
new file mode 100644
index 00000000000000..f922f030a9c43b
--- /dev/null
+++ b/src/common/transformations/tests/common_optimizations/sdpa_scale_fusion_test.cpp
@@ -0,0 +1,228 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <openvino/core/model.hpp>
+#include <openvino/opsets/opset10.hpp>
+#include <openvino/pass/manager.hpp>
+#include <transformations/common_optimizations/sdpa_scale_fusion.hpp>
+#include <transformations/utils/utils.hpp>
+
+#include "common_test_utils/ov_test_utils.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/scaled_dot_product_attention.hpp"
+
+using namespace testing;
+using namespace ov::pass;
+using namespace ov;
+
+TEST_F(TransformationTestsF, SDPAScaleFusionTest1) {
+    const PartialShape query_shape{1, 32, -1, 32};
+    const PartialShape key_shape{1, 32, -1, 32};
+    const PartialShape value_shape{1, 32, -1, 32};
+
+    const auto query = std::make_shared<ov::op::v0::Parameter>(element::f32, query_shape);
+    const auto key = std::make_shared<ov::op::v0::Parameter>(element::f32, key_shape);
+    const auto value = std::make_shared<ov::op::v0::Parameter>(element::f32, value_shape);
+    const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{8.0f});
+    const auto v_scaled = std::make_shared<ov::op::v1::Multiply>(value, scale_const);
+    const auto casual = false;
+    {
+        const auto q_scaled = std::make_shared<ov::op::v1::Multiply>(query, scale_const);
+        const auto k_scaled = std::make_shared<ov::op::v1::Multiply>(key, scale_const);
+        const auto sdpa =
+            std::make_shared<ov::op::v13::ScaledDotProductAttention>(q_scaled, k_scaled, v_scaled, casual);
+
+        model = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value});
+        manager.register_pass<ov::pass::SDPAScaleFusion>();
+    }
+
+    {
+        const auto new_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{0.0f});
+        const auto new_scale_const =
+            ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{64.0f / std::sqrt(32.0f)});
+        const auto sdpa = std::make_shared<ov::op::v13::ScaledDotProductAttention>(query,
+                                                                                   key,
+                                                                                   v_scaled,
+                                                                                   new_mask_const,
+                                                                                   new_scale_const,
+                                                                                   casual);
+        model_ref = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value});
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAScaleFusionTest2) {
+    const PartialShape query_shape{1, 32, -1, 32};
+    const PartialShape key_shape{1, 32, -1, 32};
+    const PartialShape value_shape{1, 32, -1, 32};
+
+    const auto query = std::make_shared<ov::op::v0::Parameter>(element::f32, query_shape);
+    const auto key = std::make_shared<ov::op::v0::Parameter>(element::f32, key_shape);
+    const auto value = std::make_shared<ov::op::v0::Parameter>(element::f32, value_shape);
+    const auto sdpa_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{0.0f});
+    const auto sdpa_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{2.0f});
+    const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{8.0f});
+    const auto v_scaled = std::make_shared<ov::op::v1::Multiply>(value, scale_const);
+    const auto casual = false;
+    {
+        const auto q_scaled = std::make_shared<ov::op::v1::Multiply>(query, scale_const);
+        const auto k_scaled = std::make_shared<ov::op::v1::Multiply>(key, scale_const);
+        const auto sdpa = std::make_shared<ov::op::v13::ScaledDotProductAttention>(q_scaled,
+                                                                                   k_scaled,
+                                                                                   v_scaled,
+                                                                                   sdpa_mask_const,
+                                                                                   sdpa_scale_const,
+                                                                                   casual);
+
+        model = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value});
+        manager.register_pass<ov::pass::SDPAScaleFusion>();
+    }
+
+    {
+        const auto new_scale_const =
+            ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{128.0f});
+        const auto sdpa = std::make_shared<ov::op::v13::ScaledDotProductAttention>(query,
+                                                                                   key,
+                                                                                   v_scaled,
+                                                                                   sdpa_mask_const,
+                                                                                   new_scale_const,
+                                                                                   casual);
+        model_ref = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value});
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAScaleFusionTest3) {
+    const PartialShape query_shape{1, 32, -1, 32};
+    const PartialShape key_shape{1, 32, -1, 32};
+    const PartialShape value_shape{1, 32, -1, 32};
+
+    const auto query = std::make_shared<ov::op::v0::Parameter>(element::f32, query_shape);
+    const auto key = std::make_shared<ov::op::v0::Parameter>(element::f32, key_shape);
+    const auto value = std::make_shared<ov::op::v0::Parameter>(element::f32, value_shape);
+    const auto sdpa_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{0.0f});
+    const auto sdpa_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{2.0f});
+    const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{8.0f});
+    const auto v_scaled = std::make_shared<ov::op::v1::Multiply>(value, scale_const);
+    const auto casual = false;
+    {
+        const auto q_scaled = std::make_shared<ov::op::v1::Multiply>(query, scale_const);
+        const auto sdpa = std::make_shared<ov::op::v13::ScaledDotProductAttention>(q_scaled,
+                                                                                   key,
+                                                                                   v_scaled,
+                                                                                   sdpa_mask_const,
+                                                                                   sdpa_scale_const,
+                                                                                   casual);
+
+        model = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value});
+        manager.register_pass<ov::pass::SDPAScaleFusion>();
+    }
+
+    {
+        const auto new_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{16.0f});
+        const auto sdpa = std::make_shared<ov::op::v13::ScaledDotProductAttention>(query,
+                                                                                   key,
+                                                                                   v_scaled,
+                                                                                   sdpa_mask_const,
+                                                                                   new_scale_const,
+                                                                                   casual);
+        model_ref = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value});
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAScaleFusionTest4) {
+    const PartialShape query_shape{1, 32, -1, 32};
+    const PartialShape key_shape{1, 32, -1, 32};
+    const PartialShape value_shape{1, 32, -1, 32};
+
+    const auto query = std::make_shared<ov::op::v0::Parameter>(element::f32, query_shape);
+    const auto key = std::make_shared<ov::op::v0::Parameter>(element::f32, key_shape);
+    const auto value = std::make_shared<ov::op::v0::Parameter>(element::f32, value_shape);
+    const auto sdpa_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{0.0f});
+    const auto sdpa_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{2.0f});
+    const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{8.0f});
+    const auto scale_dyn = std::make_shared<ov::op::v0::Parameter>(element::f32, ov::Shape{});
+    const auto v_scaled = std::make_shared<ov::op::v1::Multiply>(value, scale_const);
+    const auto casual = false;
+    const auto q_scaled = std::make_shared<ov::op::v1::Multiply>(query, scale_dyn);
+    {
+        const auto k_scaled = std::make_shared<ov::op::v1::Multiply>(key, scale_const);
+        const auto sdpa = std::make_shared<ov::op::v13::ScaledDotProductAttention>(q_scaled,
+                                                                                   k_scaled,
+                                                                                   v_scaled,
+                                                                                   sdpa_mask_const,
+                                                                                   sdpa_scale_const,
+                                                                                   casual);
+
+        model = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value, scale_dyn});
+        manager.register_pass<ov::pass::SDPAScaleFusion>();
+    }
+
+    {
+        const auto new_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{16.0f});
+        const auto sdpa = std::make_shared<ov::op::v13::ScaledDotProductAttention>(q_scaled,
+                                                                                   key,
+                                                                                   v_scaled,
+                                                                                   sdpa_mask_const,
+                                                                                   new_scale_const,
+                                                                                   casual);
+        model_ref = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value, scale_dyn});
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
+
+TEST_F(TransformationTestsF, SDPAScaleFusionTest5) {
+    const PartialShape query_shape{1, 32, -1, 32};
+    const PartialShape key_shape{1, 32, -1, 32};
+    const PartialShape value_shape{1, 32, -1, 32};
+
+    const auto query = std::make_shared<ov::op::v0::Parameter>(element::f32, query_shape);
+    const auto key = std::make_shared<ov::op::v0::Parameter>(element::f32, key_shape);
+    const auto value = std::make_shared<ov::op::v0::Parameter>(element::f32, value_shape);
+    const auto sdpa_mask_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{0.0f});
+    const auto sdpa_scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{1.0f});
+    const auto scale_const = ov::op::v0::Constant::create(element::f32, ov::Shape{}, std::vector<float>{1.0f});
+    const auto scale_dyn = std::make_shared<ov::op::v0::Parameter>(element::f32, ov::Shape{});
+    const auto v_scaled = std::make_shared<ov::op::v1::Multiply>(value, scale_const);
+    const auto casual = false;
+    {
+        const auto q_scaled = std::make_shared<ov::op::v1::Multiply>(query, scale_dyn);
+        const auto k_scaled = std::make_shared<ov::op::v1::Multiply>(key, scale_const);
+        const auto sdpa = std::make_shared<ov::op::v13::ScaledDotProductAttention>(q_scaled,
+                                                                                   k_scaled,
+                                                                                   v_scaled,
+                                                                                   sdpa_mask_const,
+                                                                                   sdpa_scale_const,
+                                                                                   casual);
+
+        model = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value, scale_dyn});
+        manager.register_pass<ov::pass::SDPAScaleFusion>();
+    }
+
+    {
+        const auto sdpa = std::make_shared<ov::op::v13::ScaledDotProductAttention>(query,
+                                                                                   key,
+                                                                                   v_scaled,
+                                                                                   sdpa_mask_const,
+                                                                                   scale_dyn,
+                                                                                   casual);
+        model_ref = std::make_shared<ov::Model>(NodeVector{sdpa}, ParameterVector{query, key, value, scale_dyn});
+    }
+
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+    comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
+}
diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
index a63377312ecb95..fb9e0925bc89e2 100644
--- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
@@ -37,6 +37,7 @@
 #include "transformations/common_optimizations/nop_elimination.hpp"
 #include "transformations/common_optimizations/reshape_prelu.hpp"
 #include "transformations/common_optimizations/rms_fusion.hpp"
+#include "transformations/common_optimizations/sdpa_fusion.hpp"
 #include "transformations/common_optimizations/transpose_sinking.hpp"
 #include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp"
 #include "transformations/common_optimizations/wrap_interpolate_into_transposes.hpp"
@@ -695,6 +696,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
     CPU_DISABLE_PASS_COMMON(manager, ov::pass::MatMulConstTransposesExtraction);
     CPU_DISABLE_PASS_COMMON(manager, ov::pass::ConvertScatterNDUpdate15ToScatterNDUpdate3);
     CPU_DISABLE_PASS_COMMON(manager, ov::pass::ConvertSliceScatter);
+    CPU_DISABLE_PASS_COMMON(manager, ov::pass::SDPAFusion);
     CPU_DISABLE_PASS_X64(manager, ov::pass::HSigmoidDecomposition);
 
     CPU_DISABLE_PASS_X64(manager, ov::pass::ReduceL1Decomposition);
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index 53ab9aa188b7aa..7c7c09adcd182f 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -92,6 +92,7 @@
 #include "transformations/common_optimizations/lstm_cell_fusion.hpp"
 #include "transformations/common_optimizations/move_eltwise_up_data_movement.hpp"
 #include "transformations/common_optimizations/mvn_fusion.hpp"
+#include "transformations/common_optimizations/sdpa_scale_fusion.hpp"
 #include "transformations/common_optimizations/softmax_fusion.hpp"
 #include "transformations/common_optimizations/glu_fusion.hpp"
 #include "transformations/common_optimizations/transpose_sinking.hpp"
@@ -941,6 +942,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         if (!disable_horizontal_fc_fusion)
             manager.register_pass<ov::pass::ConstantFolding>();
 
+        manager.register_pass<ov::pass::SDPAScaleFusion>();
         manager.register_pass<ov::pass::ConvertGatherToGatherCompressed>();
         auto pass_config = manager.get_pass_config();
         manager.register_pass<ov::intel_gpu::KVCacheFusion>();