Merge branch 'master' into nashez/napi_tensor_continuous

openvinotoolkit · Dec 30, 2024 · be34fed · be34fed
2 parents 1ac87a6 + 31f6b3d
commit be34fed
Show file tree

Hide file tree

Showing 20 changed files with 615 additions and 19 deletions.
diff --git a/...ference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst b/...ference-devices-and-modes/cpu-device/performance-hint-and-thread-scheduling.rst
@@ -63,19 +63,19 @@ the model precision and the ratio of P-cores and E-cores.
 
 Then the default settings for low-level performance properties on Windows and Linux are as follows:
 
-+--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+
-| Property                             | Windows                                                               | Linux                                                                 |
-+======================================+=======================================================================+=======================================================================+
-| ``ov::num_streams``                  | 1                                                                     | 1                                                                     |
-+--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+
-| ``ov::inference_num_threads``        | is equal to the number of P-cores or P-cores+E-cores on one numa node | is equal to the number of P-cores or P-cores+E-cores on one numa node |
-+--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+
-| ``ov::hint::scheduling_core_type``   | :ref:`Core Type Table of Latency Hint <core_type_latency>`            | :ref:`Core Type Table of Latency Hint <core_type_latency>`            |
-+--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+
-| ``ov::hint::enable_hyper_threading`` | No                                                                    | No                                                                    |
-+--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+
-| ``ov::hint::enable_cpu_pinning``     | No / Not Supported                                                    | Yes except using P-cores and E-cores together                         |
-+--------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------------------------------+
++--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+
+| Property                             | Windows                                                            | Linux                                                              |
++======================================+====================================================================+====================================================================+
+| ``ov::num_streams``                  | 1                                                                  | 1                                                                  |
++--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+
+| ``ov::inference_num_threads``        | is equal to the number of P-cores or P-cores+E-cores on one socket | is equal to the number of P-cores or P-cores+E-cores on one socket |
++--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+
+| ``ov::hint::scheduling_core_type``   | :ref:`Core Type Table of Latency Hint <core_type_latency>`         | :ref:`Core Type Table of Latency Hint <core_type_latency>`         |
++--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+
+| ``ov::hint::enable_hyper_threading`` | No                                                                 | No                                                                 |
++--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+
+| ``ov::hint::enable_cpu_pinning``     | No / Not Supported                                                 | Yes except using P-cores and E-cores together                      |
++--------------------------------------+--------------------------------------------------------------------+--------------------------------------------------------------------+
 
 .. note::
 
@@ -96,7 +96,7 @@ Then the default settings for low-level performance properties on Windows and Li
    Starting from 5th Gen Intel Xeon Processors, new microarchitecture enabled new sub-NUMA clusters
    feature. A sub-NUMA cluster (SNC) can create two or more localization domains (numa nodes)
    within a socket by BIOS configuration. 
-   By default OpenVINO with latency hint uses single NUMA node for inference. Although such
+   By default OpenVINO with latency hint uses single socket for inference. Although such
    behavior allows to achive best performance for most of the models, there might be corner
    cases which require manual tuning of ``ov::num_streams`` and ``ov::hint::enable_hyper_threading parameters``.
    Please find more detail about `Sub-NUMA Clustering <https://www.intel.com/content/www/us/en/developer/articles/technical/xeon-processor-scalable-family-technical-overview.html>`__ 

diff --git a/...mon/transformations/include/transformations/op_conversions/fake_convert_decomposition.hpp b/...mon/transformations/include/transformations/op_conversions/fake_convert_decomposition.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/matcher_pass.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+
+class TRANSFORMATIONS_API FakeConvertDecomposition;
+
+}  // namespace pass
+}  // namespace ov
+
+/**
+ * @ingroup ov_transformation_common_api
+ * @brief FakeConvertDecomposition transformation decomposes FakeConvert layer.
+ * f8: f8e4m3, f8e5m2
+ * downconvert: f32->f8, f16->f8, bf16->f8
+ * upconvert: f8->f32, f8->f16, f8->bf16
+ * output = (upconvert(downconvert(input * scale - shift)) + shift) / scale
+ *
+ */
+
+class ov::pass::FakeConvertDecomposition : public ov::pass::MatcherPass {
+public:
+    OPENVINO_MATCHER_PASS_RTTI("FakeConvertDecomposition");
+    FakeConvertDecomposition();
+};
diff --git a/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp
@@ -0,0 +1,76 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/fake_convert_decomposition.hpp"
+
+#include "itt.hpp"
+#include "openvino/core/rt_info.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/convert.hpp"
+#include "openvino/op/divide.hpp"
+#include "openvino/op/fake_convert.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/subtract.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+
+ov::pass::FakeConvertDecomposition::FakeConvertDecomposition() {
+    MATCHER_SCOPE(FakeConvertDecomposition);
+    auto data = pattern::any_input();
+
+    auto fake_convert = ov::pass::pattern::wrap_type<ov::op::v13::FakeConvert>();
+
+    matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) {
+        auto& pattern_to_output = m.get_pattern_value_map();
+        const auto fake_convert_node =
+            ov::as_type_ptr<ov::op::v13::FakeConvert>(pattern_to_output.at(fake_convert).get_node_shared_ptr());
+
+        if (fake_convert_node == nullptr || transformation_callback(fake_convert_node)) {
+            return false;
+        }
+
+        Output<Node> data{fake_convert_node->input_value(0)};
+        const Output<Node> input_scale{fake_convert_node->input_value(1)};
+        auto input_type = data.get_element_type();
+
+        ov::pass::NodeRegistry decomp_ops;
+        if (input_type != input_scale.get_element_type()) {
+            input_type = input_scale.get_element_type();
+            data = std::make_shared<ov::op::v0::Convert>(data, input_type);
+            data = decomp_ops.add(data.get_node_shared_ptr());
+        }
+
+        std::shared_ptr<Node> result;
+        const auto scale = decomp_ops.make<ov::op::v1::Multiply>(data, input_scale);
+        if (fake_convert_node->get_input_size() == 2) {
+            const auto downconvert =
+                decomp_ops.make<ov::op::v0::Convert>(scale, fake_convert_node->get_destination_element_type());
+            const auto upconvert = decomp_ops.make<ov::op::v0::Convert>(downconvert, input_type);
+
+            result = decomp_ops.make<ov::op::v1::Divide>(upconvert, input_scale);
+        } else {
+            const Output<Node> input_shift{fake_convert_node->input_value(2)};
+            const auto shift = decomp_ops.make<ov::op::v1::Subtract>(scale, input_shift);
+
+            const auto downconvert =
+                decomp_ops.make<ov::op::v0::Convert>(shift, fake_convert_node->get_destination_element_type());
+            const auto upconvert = decomp_ops.make<ov::op::v0::Convert>(downconvert, input_type);
+
+            const auto deshift = decomp_ops.make<ov::op::v1::Add>(upconvert, input_shift);
+            result = decomp_ops.make<ov::op::v1::Divide>(deshift, input_scale);
+        }
+
+        if (result->get_output_element_type(0) != fake_convert_node->get_output_element_type(0)) {
+            result = decomp_ops.make<ov::op::v0::Convert>(result, fake_convert_node->get_output_element_type(0));
+        }
+
+        result->set_friendly_name(m.get_match_root()->get_friendly_name());
+        ov::copy_runtime_info(fake_convert_node, decomp_ops.get());
+        ov::replace_node(m.get_match_root(), result);
+        return true;
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(fake_convert, matcher_name);
+    register_matcher(m, callback);
+}
diff --git a/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp b/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp
@@ -0,0 +1,149 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/fake_convert_decomposition.hpp"
+
+#include <gtest/gtest.h>
+
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/ov_test_utils.hpp"
+#include "openvino/opsets/opset1.hpp"
+#include "openvino/opsets/opset13.hpp"
+
+using namespace ov;
+
+using FakeConvertDecompositionParams = std::tuple<Shape,            // data shape
+                                                  Shape,            // scale shape
+                                                  Shape,            // shift shape
+                                                  element::Type_t,  // input precision
+                                                  element::Type_t,  // destination precision
+                                                  bool>;            // default shift
+
+class FakeConvertDecompositionTest : public ov::test::TestsCommon,
+                                     public ::testing::WithParamInterface<FakeConvertDecompositionParams> {
+public:
+    static std::string getTestCaseName(::testing::TestParamInfo<FakeConvertDecompositionParams> obj) {
+        FakeConvertDecompositionParams params = obj.param;
+
+        Shape data_shape, scale_shape, shift_shape;
+        element::Type_t data_prec, dst_prec;
+        bool default_shift;
+        std::tie(data_shape, scale_shape, shift_shape, data_prec, dst_prec, default_shift) = params;
+
+        std::ostringstream result;
+        result << "dataShape=" << ov::test::utils::vec2str(data_shape) << "_";
+        result << "scaleShape=" << ov::test::utils::vec2str(scale_shape) << "_";
+        result << "shiftShape=" << ov::test::utils::vec2str(shift_shape) << "_";
+        result << "dataPrecision=" << element::Type(data_prec) << "_";
+        result << "destinationPrecision=" << element::Type(dst_prec) << "_";
+        if (default_shift)
+            result << "defaultShift=true";
+        else
+            result << "defaultShift=false";
+        return result.str();
+    }
+};
+
+TEST_P(FakeConvertDecompositionTest, CompareFunctions) {
+    FakeConvertDecompositionParams params = this->GetParam();
+
+    Shape data_shape, scale_shape, shift_shape;
+    element::Type_t data_prec, dst_prec;
+    bool default_shift;
+    std::tie(data_shape, scale_shape, shift_shape, data_prec, dst_prec, default_shift) = params;
+
+    std::shared_ptr<ov::Model> model(nullptr);
+    {
+        const auto data = std::make_shared<opset1::Parameter>(data_prec, PartialShape(data_shape));
+        const auto scale = std::make_shared<opset1::Constant>(data_prec, scale_shape);
+        const auto shift = std::make_shared<opset1::Constant>(data_prec, shift_shape);
+
+        const auto fake_convert = default_shift ? std::make_shared<opset13::FakeConvert>(data, scale, dst_prec)
+                                                : std::make_shared<opset13::FakeConvert>(data, scale, shift, dst_prec);
+        model = std::make_shared<ov::Model>(NodeVector{fake_convert}, ParameterVector{data});
+
+        pass::Manager manager;
+        manager.register_pass<ov::pass::InitNodeInfo>();
+        manager.register_pass<ov::pass::FakeConvertDecomposition>();
+        manager.run_passes(model);
+
+        OV_ASSERT_NO_THROW(check_rt_info(model));
+    }
+
+    std::shared_ptr<ov::Model> model_ref(nullptr);
+    {
+        const auto input_data = std::make_shared<opset1::Parameter>(data_prec, PartialShape(data_shape));
+        const auto input_scale = std::make_shared<opset1::Constant>(data_prec, scale_shape);
+        const auto input_shift = std::make_shared<opset1::Constant>(data_prec, shift_shape);
+        ParameterVector params;
+        params.push_back(input_data);
+        std::shared_ptr<Node> data = input_data;
+
+        std::shared_ptr<Node> result;
+        const auto scale = std::make_shared<ov::op::v1::Multiply>(data, input_scale);
+        if (default_shift) {
+            const auto downconvert = std::make_shared<ov::op::v0::Convert>(scale, dst_prec);
+            const auto upconvert = std::make_shared<ov::op::v0::Convert>(downconvert, data_prec);
+
+            result = std::make_shared<ov::op::v1::Divide>(upconvert, input_scale);
+        } else {
+            const auto shift = std::make_shared<ov::op::v1::Subtract>(scale, input_shift);
+
+            const auto downconvert = std::make_shared<ov::op::v0::Convert>(shift, dst_prec);
+            const auto upconvert = std::make_shared<ov::op::v0::Convert>(downconvert, data_prec);
+
+            const auto deshift = std::make_shared<ov::op::v1::Add>(upconvert, input_shift);
+            result = std::make_shared<ov::op::v1::Divide>(deshift, input_scale);
+        }
+
+        model_ref = std::make_shared<ov::Model>(NodeVector{result}, params);
+    }
+
+    const auto res = compare_functions(model, model_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+const std::vector<element::Type_t> data_precisions = {element::Type_t::f32,
+                                                      element::Type_t::f16,
+                                                      element::Type_t::bf16};
+
+const std::vector<element::Type_t> destination_precisions = {element::Type_t::f8e4m3, element::Type_t::f8e5m2};
+
+const std::vector<bool> default_shift = {true, false};
+
+const auto simple_fake_convert_params = ::testing::Combine(::testing::Values(Shape{2, 3, 4, 5}),
+                                                           ::testing::Values(Shape{1}),
+                                                           ::testing::Values(Shape{1}),
+                                                           ::testing::ValuesIn(data_precisions),
+                                                           ::testing::ValuesIn(destination_precisions),
+                                                           ::testing::ValuesIn(default_shift));
+
+const auto broadcast_fake_convert_params = ::testing::Combine(::testing::Values(Shape{2, 3, 4, 5}),
+                                                              ::testing::Values(Shape{2, 3, 1, 1}),
+                                                              ::testing::Values(Shape{2, 3, 1, 1}),
+                                                              ::testing::ValuesIn(data_precisions),
+                                                              ::testing::ValuesIn(destination_precisions),
+                                                              ::testing::ValuesIn(default_shift));
+
+const auto elementwise_fake_convert_params = ::testing::Combine(::testing::Values(Shape{2, 3, 4, 5}),
+                                                                ::testing::Values(Shape{2, 3, 4, 5}),
+                                                                ::testing::Values(Shape{2, 3, 4, 5}),
+                                                                ::testing::ValuesIn(data_precisions),
+                                                                ::testing::ValuesIn(destination_precisions),
+                                                                ::testing::ValuesIn(default_shift));
+
+INSTANTIATE_TEST_SUITE_P(SimpleFakeConvert_Decomposition,
+                         FakeConvertDecompositionTest,
+                         simple_fake_convert_params,
+                         FakeConvertDecompositionTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(BroadcastFakeConvert_Decomposition,
+                         FakeConvertDecompositionTest,
+                         broadcast_fake_convert_params,
+                         FakeConvertDecompositionTest::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(ElementwiseFakeConvert_Decomposition,
+                         FakeConvertDecompositionTest,
+                         elementwise_fake_convert_params,
+                         FakeConvertDecompositionTest::getTestCaseName);
diff --git a/src/frontends/paddle/src/default_opset.hpp b/src/frontends/paddle/src/default_opset.hpp
@@ -2,13 +2,13 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "openvino/opsets/opset9.hpp"
+#include "openvino/opsets/opset14.hpp"
 
 namespace ov {
 namespace frontend {
 namespace paddle {
 namespace op {
-namespace default_opset = ov::opset9;
+namespace default_opset = ov::opset14;
 
 }  // namespace op
 }  // namespace paddle

diff --git a/src/frontends/paddle/src/op/elu.cpp b/src/frontends/paddle/src/op/elu.cpp
@@ -0,0 +1,23 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "default_opset.hpp"
+#include "openvino/frontend/paddle/node_context.hpp"
+#include "openvino/frontend/paddle/visibility.hpp"
+
+namespace ov {
+namespace frontend {
+namespace paddle {
+namespace op {
+NamedOutputs elu(const NodeContext& node) {
+    auto data = node.get_input("X");
+    auto alpha = node.get_attribute<float>("alpha", 1.0);
+    const auto& elu_node = std::make_shared<default_opset::Elu>(data, alpha);
+    return node.default_single_output_mapping({elu_node}, {"Out"});
+}
+
+}  // namespace op
+}  // namespace paddle
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/paddle/src/op/expand_v2.cpp b/src/frontends/paddle/src/op/expand_v2.cpp
@@ -19,8 +19,16 @@ NamedOutputs expand_v2(const NodeContext& node) {
         auto inputs = node.get_ng_inputs("expand_shapes_tensor");
         ov::NodeVector node_vec;
         for (auto& input : inputs) {
+            if (input.get_partial_shape().rank().get_length() == 0) {
+                // should unsqueeze the input with non-shape.
+                auto unsqueeze_scalar = default_opset::Constant::create(ov::element::i32, {}, {0});
+                input = std::make_shared<default_opset::Unsqueeze>(input, unsqueeze_scalar);
+            }
+            PADDLE_OP_CHECK(node,
+                            input.get_partial_shape().rank().get_length() == 1,
+                            "the rank of conv input must == 1");
             auto cast = std::make_shared<Convert>(input, element::i32);
-            node_vec.push_back(cast);
+            node_vec.emplace_back(cast);
         }
         shape_expected_node = std::make_shared<Concat>(node_vec, 0);
     } else {