Merge branch 'master' into split-config-in-compile-model

openvinotoolkit · Dec 11, 2024 · ca0ba63 · ca0ba63
2 parents 511bb88 + 328feb6
commit ca0ba63
Show file tree

Hide file tree

Showing 16 changed files with 125 additions and 39 deletions.
diff --git a/.github/workflows/send_workflows_to_opentelemetry.yml b/.github/workflows/send_workflows_to_opentelemetry.yml
@@ -5,11 +5,7 @@ on:
     workflows:
       - Android ARM64 with vcpkg
       - Android x64
-      - Documentation
-      - Cleanup PIP caches
-      - Code snippets
-      - Code Style
-      - Code coverage
+      - Cleanup caches
       - Coverity (Ubuntu 20.04, Python 3.11)
       - Debian 10 ARM
       - Fedora 29 (RHEL 8.4), Python 3.9
@@ -19,10 +15,12 @@ on:
       - Linux ARM64 (Ubuntu 20.04, Python 3.11)
       - Linux Static CC (Ubuntu 22.04, Python 3.11, Clang)
       - Linux RISC-V with Conan (Ubuntu 22.04, Python 3.10)
+      - Linux (Ubuntu 22.04, Python 3.11, Intel DPC++ Compiler)
+      - Linux CPU Plugin Snippets with LIBXSMM (Ubuntu 20.04)
+      - Linux Sanitizers (Ubuntu 20.04, Python 3.9)
       - macOS (Python 3.11)
       - macOS ARM64 (Python 3.11)
-      - MO
-      - Python API Checks
+      - Manylinux 2014
       - Webassembly
       - Windows (VS 2019, Python 3.11, Release)
       - Windows (VS 2019, Python 3.11, Debug)

diff --git a/docs/RELEASE.MD b/docs/RELEASE.MD
@@ -0,0 +1,29 @@
+# OpenVINO Release Management
+The process described below reflects the approach to managing OpenVINO releases.
+
+## Release Milestones
+- Planning
+- Execution (development of new features)
+- Stabilization (Feature Freeze, Code Freeze milestones)
+- Validation
+- Distribution
+
+### Planning
+This phase takes 2-4 weeks and involves scoping the backlog, prioritizing it, analyzing, and making commitments by developers for timelines specified by the release manager.
+
+### Execution (development of new features)
+- [OpenVINO Contributing Guide](https://github.com/openvinotoolkit/openvino/blob/master/CONTRIBUTING.md)
+- [Code Contribution Guide](https://docs.openvino.ai/2024/about-openvino/contributing/code-contribution-guide.html)
+- [OpenVINO First Good Issue](https://github.com/openvinotoolkit/openvino/issues/17502)
+
+### Stabilization (Feature Freeze, Code Freeze milestones)
+- **Feature Freeze**: This milestone ensures that no new features are added to the software after a certain point. This allows the development team to focus on stabilizing and refining the existing features, fixing bugs, and improving performance without the risk of introducing new issues.
+- **Code Freeze**: This milestone marks the point where no new code changes are allowed except for critical bug fixes. This helps in ensuring that the final product is stable and reliable, as it minimizes the risk of last-minute changes that could introduce new bugs or instability.
+
+### Release Validation
+- This is a continuous process executed on a regular basis with cadence based on testing type: nightly, bi-weekly, weekly.
+- After Code Freeze, the testing team can perform final regression testing to ensure that recent changes have not introduced new bugs and that the software meets the required quality standards.
+
+### Distribution
+- OpenVINO has different types of build distribution: Regular releases, Long-Term Support, Pre-release releases, Nightly builds. Read more here: [OpenVINO Release Policy](https://docs.openvino.ai/2024/about-openvino/release-notes-openvino/release-policy.html)
+- Different distribution channels are supported. Explore different options here: [OpenVINO Download](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
diff --git a/src/bindings/js/node/src/tensor.cpp b/src/bindings/js/node/src/tensor.cpp
@@ -66,6 +66,12 @@ Napi::Object TensorWrap::wrap(Napi::Env env, ov::Tensor tensor) {
 }
 
 Napi::Value TensorWrap::get_data(const Napi::CallbackInfo& info) {
+    Napi::Env env = info.Env();
+    if (info.Length() > 0) {
+        reportError(env, "getData() does not accept any arguments.");
+        return env.Undefined();
+    }
+
     auto type = _tensor.get_element_type();
 
     switch (type) {

diff --git a/src/bindings/js/node/tests/unit/tensor.test.js b/src/bindings/js/node/tests/unit/tensor.test.js
@@ -121,6 +121,12 @@ describe('ov.Tensor tests', () => {
       assert.deepStrictEqual(tensor.getData(), data);
     });
 
+    it('getData should throw an error if arguments are provided', () => {
+      const tensor = new ov.Tensor(ov.element.f32, shape, data);
+      assert.throws(() => tensor.getData(1), {
+        message: 'getData() does not accept any arguments.',
+      });
+    });
     it('test tensor.data setter - different element type throws', () => {
       const float64Data = Float64Array.from([1, 2, 3]);
       const tensor = new ov.Tensor(ov.element.f32, [1, 3]);

diff --git a/src/core/include/openvino/pass/pattern/matcher.hpp b/src/core/include/openvino/pass/pattern/matcher.hpp
@@ -62,20 +62,45 @@ class OPENVINO_API Matcher {
     // Avoid implicit string construction from nullptr.
     Matcher(const std::shared_ptr<Node> pattern_node, std::nullptr_t name) = delete;
 
-    Matcher() = default;
-    Matcher(Output<Node>& pattern_node) : m_pattern_node{pattern_node} {}
-
-    Matcher(Output<Node>& pattern_node, const std::string& name) : m_pattern_node(pattern_node), m_name{name} {}
+    Matcher()
+        : m_match_root{},
+          m_pattern_node{},
+          m_pattern_map{},
+          m_pattern_value_maps{},
+          m_matched_list{},
+          m_name{""},
+          m_strict_mode{false} {}
+    Matcher(Output<Node>& pattern_node)
+        : m_match_root{},
+          m_pattern_node{pattern_node},
+          m_pattern_map{},
+          m_pattern_value_maps{},
+          m_matched_list{},
+          m_name{""},
+          m_strict_mode{false} {}
+
+    Matcher(Output<Node>& pattern_node, const std::string& name)
+        : m_match_root{},
+          m_pattern_node{pattern_node},
+          m_pattern_map{},
+          m_pattern_value_maps{},
+          m_matched_list{},
+          m_name{name},
+          m_strict_mode{false} {}
 
     /// \brief Constructs a Matcher object
     ///
     /// \param pattern_node is a pattern sub graph that will be matched against input graphs
     /// \param name is a string which is used for logging and disabling a matcher
     /// \param strict_mode forces a matcher to consider shapes and ET of nodes
     Matcher(const Output<Node>& pattern_node, const std::string& name, bool strict_mode)
-        : m_pattern_node(pattern_node),
-          m_name(name),
-          m_strict_mode(strict_mode) {}
+        : m_match_root{},
+          m_pattern_node{pattern_node},
+          m_pattern_map{},
+          m_pattern_value_maps{},
+          m_matched_list{},
+          m_name{name},
+          m_strict_mode{strict_mode} {}
 
     // Some matches should start on a node rather than an output. These three constructors
     // are transition until we work out the right way to do that.

diff --git a/src/frontends/pytorch/src/op/stft.cpp b/src/frontends/pytorch/src/op/stft.cpp
@@ -10,6 +10,7 @@
 #include "openvino/op/convert_like.hpp"
 #include "openvino/op/divide.hpp"
 #include "openvino/op/shape_of.hpp"
+#include "openvino/op/sqrt.hpp"
 #include "openvino/op/unsqueeze.hpp"
 #include "utils.hpp"
 
@@ -66,8 +67,6 @@ OutputVector translate_stft(const NodeContext& context) {
     if (!context.input_is_none(5)) {
         normalized = context.const_input<bool>(5);
     }
-    PYTORCH_OP_CONVERSION_CHECK(!normalized,
-                                "aten::stft conversion is currently supported with normalized=False only.");
 
     bool onesided = true;
     if (!context.input_is_none(6)) {
@@ -85,7 +84,15 @@ OutputVector translate_stft(const NodeContext& context) {
     // Perform STFT
     constexpr bool transpose_frames = true;
     auto stft = context.mark_node(std::make_shared<v15::STFT>(input, window, n_fft, hop_length, transpose_frames));
-    return {stft};
+
+    if (normalized) {
+        const auto nfft_convert = context.mark_node(std::make_shared<v1::ConvertLike>(n_fft, stft));
+        const auto divisor = context.mark_node(std::make_shared<v0::Sqrt>(nfft_convert));
+        const auto norm_stft = context.mark_node(std::make_shared<v1::Divide>(stft, divisor));
+        return {norm_stft};
+    } else {
+        return {stft};
+    }
 };
 }  // namespace op
 }  // namespace pytorch

diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/resample.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/resample.hpp
@@ -15,7 +15,7 @@ namespace cldnn {
 struct resample : public primitive_base<resample> {
     CLDNN_DECLARE_PRIMITIVE(resample)
 
-    resample() : primitive_base("", {}) {}
+    resample() : primitive_base("", {}), scales_port(0) {}
 
     using InterpolateOp = ov::op::util::InterpolateBase;
 

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
@@ -140,7 +140,7 @@ class ExecutionConfig {
 
     // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call
     // So this method should be called after setting all user properties, but before apply_user_properties() call.
-    void apply_rt_info(const ov::RTMap& rt_info);
+    void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info);
 
     std::string to_string() const;
 

diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
@@ -171,6 +171,10 @@ void prepare_primitive_fusing::fuse_swiglu(program &p) {
     // Apply only for high performant GPU
     if (disable_fc_swiglu_fusion || p.get_engine().get_device_info().execution_units_count < 128)
         return;
+
+    if (p.get_engine().get_device_info().supports_immad)
+        return;
+
     // TODO: to support other glu types && other weight data types
     auto itr = p.get_processing_order().begin();
     std::map<primitive_id, std::vector<std::pair<primitive_id, size_t>>> fusing_history;

diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
@@ -121,7 +121,7 @@ struct convolution_onednn : typed_primitive_onednn_impl<convolution> {
 
 private:
     int _zero_point_mask;
-    dnnl::memory::data_type _wzp_data_type;
+    dnnl::memory::data_type _wzp_data_type = dnnl::memory::data_type::undef;
 
 protected:
     std::unique_ptr<primitive_impl> clone() const override {

diff --git a/...intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/...intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
@@ -846,9 +846,11 @@ void FullyConnected_bf_tiled::GetUpdateDispatchDataFunc(KernelData& kd) const {
                         // quantized input is char type
                         kd.internalBufferSizes.push_back(input_size);
                         // half type of de_quan_scale and activation sum for each quantized group
+                        OPENVINO_ASSERT(quantize_grp_size != 0, "Error: quantize_grp_size is zero.");
                         kd.internalBufferSizes.push_back((input_size / quantize_grp_size) * 2 * 2);
                     }
 
+                    OPENVINO_ASSERT(quantize_grp_size != 0, "Error: quantize_grp_size is zero.");
                     kd.kernels[0].params.workGroups.global = {std::max((input_size / quantize_grp_size), (size_t)1), 1, 1};
                     kd.kernels[0].params.workGroups.local = {16, 1, 1};
                 }
@@ -983,6 +985,7 @@ KernelsData FullyConnected_bf_tiled::GetMultiKernelsData(const Params &params,
     const auto& fc_params = static_cast<const fully_connected_params&>(params);
 
     size_t quantize_grp_size = get_dynamic_quantize_group_size(fc_params);
+    OPENVINO_ASSERT(quantize_grp_size != 0, "Error: quantize_grp_size is zero.");
 
     bool bProperInput = fc_params.inputs[0].GetLayout() == dl;
     if (!bProperInput && !fc_params.inputs[0].PitchesDifferFromLogicalDims()) {

diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -190,7 +190,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     ExecutionConfig config = m_configs_map.at(device_id);
     config.set_user_property(orig_config);
     if (model->has_rt_info("runtime_options"))
-        config.apply_rt_info(model->get_rt_info<ov::AnyMap>("runtime_options"));
+        config.apply_rt_info(context->get_engine().get_device_info(), model->get_rt_info<ov::AnyMap>("runtime_options"));
     config.apply_user_properties(context->get_engine().get_device_info());
 
     set_cache_info(model, config);
@@ -281,7 +281,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     ExecutionConfig config = m_configs_map.at(device_id);
     config.set_user_property(orig_config);
     if (model->has_rt_info("runtime_options"))
-        config.apply_rt_info(model->get_rt_info<ov::AnyMap>("runtime_options"));
+        config.apply_rt_info(ctx->get_engine().get_device_info(), model->get_rt_info<ov::AnyMap>("runtime_options"));
     config.apply_user_properties(ctx->get_engine().get_device_info());
 
     ProgramBuilder prog(ctx->get_engine(), config);

diff --git a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp
@@ -63,7 +63,7 @@ KVCacheFusionMatcher::KVCacheFusionMatcher() {
             return false;
 
         // TODO: Support conversion internally
-        if (concat_node->get_output_element_type(0) != past_node->get_output_element_type(0))
+        if (!concat_node || concat_node->get_output_element_type(0) != past_node->get_output_element_type(0))
             return false;
 
         auto variable = past_node->get_variable();

diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -262,10 +262,12 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) {
     user_properties.clear();
 }
 
-void ExecutionConfig::apply_rt_info(const ov::RTMap& rt_info) {
-    apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
+void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info) {
+    if (!info.supports_immad) {
+        apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
+        apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
+    }
     apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
-    apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
 }
 
 std::string ExecutionConfig::to_string() const {

diff --git a/src/plugins/intel_gpu/tests/functional/behavior/properties.cpp b/src/plugins/intel_gpu/tests/functional/behavior/properties.cpp
@@ -2,7 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include <algorithm>
 #include "openvino/runtime/properties.hpp"
+#include "openvino/runtime/intel_gpu/properties.hpp"
 #include "base/ov_behavior_test_utils.hpp"
 #include "openvino/runtime/core.hpp"
 #include "common_test_utils/subgraph_builders/conv_pool_relu.hpp"
@@ -43,11 +45,17 @@ TEST_F(TestPropertiesGPU, RTInfoPropertiesWithDefault) {
     model->set_rt_info("8.0", "runtime_options", ov::hint::activations_scale_factor.name());
 
     OV_ASSERT_NO_THROW(compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU));
-    OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision));
     OV_ASSERT_NO_THROW(size = compiled_model.get_property(ov::hint::dynamic_quantization_group_size));
+    ASSERT_EQ(size.as<uint64_t>(), 0);
+
+    // GPU with systolic does not support some of rt_info
+    auto capabilities = core.get_property(ov::test::utils::DEVICE_GPU, ov::device::capabilities);
+    if (find(capabilities.cbegin(), capabilities.cend(), ov::intel_gpu::capability::HW_MATMUL) != capabilities.cend())
+        return;
+
+    OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision));
     OV_ASSERT_NO_THROW(scale = compiled_model.get_property(ov::hint::activations_scale_factor));
     ASSERT_EQ(type.as<ov::element::Type>(), ov::element::f16);
-    ASSERT_EQ(size.as<uint64_t>(), 0);
     ASSERT_EQ(scale.as<float>(), 8.0f);
 }
 

diff --git a/tests/layer_tests/pytorch_tests/test_stft.py b/tests/layer_tests/pytorch_tests/test_stft.py
@@ -24,16 +24,17 @@ def _prepare_input(self, win_length, signal_shape, rand_data=False, out_dtype="f
 
         return (signal, window.astype(out_dtype))
 
-    def create_model(self, n_fft, hop_length, win_length):
+    def create_model(self, n_fft, hop_length, win_length, normalized):
         import torch
 
         class aten_stft(torch.nn.Module):
 
-            def __init__(self, n_fft, hop_length, win_length):
+            def __init__(self, n_fft, hop_length, win_length, normalized):
                 super(aten_stft, self).__init__()
                 self.n_fft = n_fft
                 self.hop_length = hop_length
                 self.win_length = win_length
+                self.normalized = normalized
 
             def forward(self, x, window):
                 return torch.stft(
@@ -44,14 +45,14 @@ def forward(self, x, window):
                     window=window,
                     center=False,
                     pad_mode="reflect",
-                    normalized=False,
+                    normalized=self.normalized,
                     onesided=True,
                     return_complex=False,
                 )
 
         ref_net = None
 
-        return aten_stft(n_fft, hop_length, win_length), ref_net, "aten::stft"
+        return aten_stft(n_fft, hop_length, win_length, normalized), ref_net, "aten::stft"
 
     @pytest.mark.nightly
     @pytest.mark.precommit
@@ -64,10 +65,11 @@ def forward(self, x, window):
         [24, 32, 20],
         [128, 128, 128],
     ])
-    def test_stft(self, n_fft, hop_length, window_size, signal_shape, ie_device, precision, ir_version, trace_model):
+    @pytest.mark.parametrize(("normalized"), [True, False])
+    def test_stft(self, n_fft, hop_length, window_size, signal_shape, normalized, ie_device, precision, ir_version, trace_model):
         if ie_device == "GPU":
             pytest.xfail(reason="STFT op is not supported on GPU yet")
-        self._test(*self.create_model(n_fft, hop_length, window_size), ie_device, precision,
+        self._test(*self.create_model(n_fft, hop_length, window_size, normalized), ie_device, precision,
                    ir_version, kwargs_to_prepare_input={"win_length": window_size, "signal_shape": signal_shape}, trace_model=trace_model)
 
 
@@ -125,8 +127,8 @@ def forward(self, x):
         [16, None, 16, False, "reflect", False, True, False],  # hop_length None
         [16, None, None, False, "reflect", False, True, False],  # hop & win length None
         [16, 4, None, False, "reflect", False, True, False],  # win_length None
-        # Unsupported cases:
         [16, 4, 16, False, "reflect", True, True, False],  # normalized True
+        # Unsupported cases:
         [16, 4, 16, False, "reflect", False, False, False],  # onesided False
         [16, 4, 16, False, "reflect", False, True, True],  # reutrn_complex True
     ])
@@ -138,10 +140,6 @@ def test_stft_not_supported_attrs(self, n_fft, hop_length, win_length, center, p
             pytest.xfail(
                 reason="torch stft uses list() for `center` subgrpah before aten::stft, that leads to error: No conversion rule found for operations: aten::list")
 
-        if normalized is True:
-            pytest.xfail(
-                reason="aten::stft conversion is currently supported with normalized=False only")
-
         if onesided is False:
             pytest.xfail(
                 reason="aten::stft conversion is currently supported with onesided=True only")