Merge branch 'master' into shape-infer/remove-cpu-custom-shape-infer-…

…factories
openvinotoolkit · Jan 17, 2025 · b7a89aa · b7a89aa
2 parents 95c02ac + 33eda4b
commit b7a89aa
Show file tree

Hide file tree

Showing 423 changed files with 2,047 additions and 1,706 deletions.
diff --git a/.github/dockerfiles/docker_tag b/.github/dockerfiles/docker_tag
@@ -1 +1 @@
-pr-28380
+pr-28040
diff --git a/.github/dockerfiles/ov_build/ubuntu_22_04_x64_cc/Dockerfile b/.github/dockerfiles/ov_build/ubuntu_22_04_x64_cc/Dockerfile
@@ -36,7 +36,11 @@ RUN apt-get update && \
         # For Java API
         default-jdk \
         # Compiler \
-        clang \
+        clang-15 \
+        # Static analyzer
+        clang-tidy-15 \
+        # clang-tidy uses clang-format as a dependency
+        clang-format-15 \
         && \
     rm -rf /var/lib/apt/lists/*
 
@@ -47,8 +51,8 @@ RUN chmod +x /install_build_dependencies.sh && \
     rm -rf /var/lib/apt/lists/*
 
 # Set clang as a default compiler
-RUN update-alternatives --install /usr/bin/cc cc /usr/bin/clang 100 && \
-    update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++ 100
+RUN update-alternatives --install /usr/bin/cc cc /usr/bin/clang-15 100 && \
+    update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++-15 100
 
 # Install sscache
 ARG SCCACHE_VERSION="v0.7.5"

diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml
@@ -151,14 +151,17 @@ jobs:
       # Build
       #
 
-      - name: CMake configure - CC COLLECT
+      - name: CMake configure - CC COLLECT with clang-tidy
+         # clang-tidy static analysis check is enabled as part of collection
+         # to avoid an additional separate build execution
         run: |
           cmake \
             -G "${{ env.CMAKE_GENERATOR }}" \
             -DCMAKE_CXX_STANDARD=20 \
             -DBUILD_SHARED_LIBS=OFF \
             -DENABLE_TESTS=ON \
             -DENABLE_CPPLINT=OFF \
+            -DENABLE_CLANG_TIDY=ON \
             -DENABLE_NCC_STYLE=OFF \
             -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
             -DENABLE_PROFILING_ITT=ON \

diff --git a/cmake/developer_package/OpenVINODeveloperScriptsConfig.cmake b/cmake/developer_package/OpenVINODeveloperScriptsConfig.cmake
@@ -305,6 +305,7 @@ include(python_requirements)
 
 include(cpplint/cpplint)
 include(clang_format/clang_format)
+include(clang_tidy/clang_tidy)
 include(ncc_naming_style/ncc_naming_style)
 
 # Restore state

diff --git a/cmake/developer_package/clang_tidy/clang_tidy.cmake b/cmake/developer_package/clang_tidy/clang_tidy.cmake
@@ -0,0 +1,25 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+if(ENABLE_CLANG_TIDY)
+    set(CLANG_TIDY_REQUIRED_VERSION 15 CACHE STRING "clang-tidy version to use")
+    set(CLANG_TIDY_FILENAME clang-tidy-${CLANG_TIDY_REQUIRED_VERSION} clang-tidy)
+    find_host_program(CLANG_TIDY NAMES ${CLANG_TIDY_FILENAME} PATHS ENV PATH)
+    if(CLANG_TIDY)
+        execute_process(COMMAND ${CLANG_TIDY} ${CMAKE_CURRENT_SOURCE_DIR} ARGS --version OUTPUT_VARIABLE CLANG_VERSION)
+        if(NOT CLANG_VERSION)
+            message(WARNING "Supported clang-tidy version is ${CLANG_TIDY_REQUIRED_VERSION}!")
+            set(ENABLE_CLANG_TIDY OFF)
+        else()
+            string(REGEX REPLACE "[^0-9]+([0-9]+)\\..*" "\\1" CLANG_TIDY_MAJOR_VERSION ${CLANG_VERSION})
+            if(NOT CLANG_TIDY_MAJOR_VERSION EQUAL CLANG_TIDY_REQUIRED_VERSION)
+                message(WARNING "Supported clang-tidy version is ${CLANG_TIDY_REQUIRED_VERSION}! Provided version ${CLANG_TIDY_MAJOR_VERSION}")
+                set(ENABLE_CLANG_TIDY OFF)
+            endif()
+        endif()
+    else()
+        message(WARNING "Supported clang-tidy-${CLANG_TIDY_REQUIRED_VERSION} is not found!")
+        set(ENABLE_CLANG_TIDY OFF)
+    endif()
+endif()
diff --git a/cmake/developer_package/features.cmake b/cmake/developer_package/features.cmake
@@ -78,6 +78,8 @@ ov_dependent_option (ENABLE_CPPLINT_REPORT "Build cpplint report instead of fail
 
 ov_option (ENABLE_CLANG_FORMAT "Enable clang-format checks during the build" ${STYLE_CHECKS_DEFAULT})
 
+ov_option (ENABLE_CLANG_TIDY "Enable clang-tidy checks during the build" ${STYLE_CHECKS_DEFAULT})
+
 ov_option (ENABLE_NCC_STYLE "Enable ncc style check" ${STYLE_CHECKS_DEFAULT})
 
 ov_option (ENABLE_UNSAFE_LOCATIONS "skip check for MD5 for dependency" OFF)

diff --git a/cmake/developer_package/plugins/plugins.cmake b/cmake/developer_package/plugins/plugins.cmake
@@ -34,10 +34,11 @@ endif()
 #               [SKIP_INSTALL]
 #               [SKIP_REGISTRATION] Skip creation of <device>.xml
 #               [ADD_CLANG_FORMAT]
+#               [ADD_CLANG_TIDY]
 #               )
 #
 function(ov_add_plugin)
-    set(options SKIP_INSTALL PSEUDO_DEVICE ADD_CLANG_FORMAT AS_EXTENSION SKIP_REGISTRATION)
+    set(options SKIP_INSTALL PSEUDO_DEVICE ADD_CLANG_FORMAT ADD_CLANG_TIDY AS_EXTENSION SKIP_REGISTRATION)
     set(oneValueArgs NAME DEVICE_NAME VERSION_DEFINES_FOR PSEUDO_PLUGIN_FOR)
     set(multiValueArgs DEFAULT_CONFIG SOURCES OBJECT_LIBRARIES CPPLINT_FILTERS)
     cmake_parse_arguments(OV_PLUGIN "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
@@ -105,6 +106,12 @@ function(ov_add_plugin)
             string(CONCAT custom_filter "${custom_filter}" "," "${filter}")
         endforeach()
 
+        if (OV_PLUGIN_ADD_CLANG_TIDY)
+            if (ENABLE_CLANG_TIDY)
+                set_target_properties(${OV_PLUGIN_NAME} PROPERTIES CXX_CLANG_TIDY clang-tidy-${CLANG_TIDY_REQUIRED_VERSION})
+            endif()
+        endif()
+
         if (OV_PLUGIN_ADD_CLANG_FORMAT)
             ov_add_clang_format_target(${OV_PLUGIN_NAME}_clang FOR_SOURCES ${OV_PLUGIN_SOURCES})
         else()

diff --git a/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst b/docs/articles_en/openvino-workflow/model-preparation/convert-model-pytorch.rst
@@ -179,7 +179,7 @@ It is recommended to address model outputs by the index rather then the name.
 Support for torch.export
 ########################
 
-`torch.export <https://pytorch.org/docs/2.2/export.html>`__ is the current way to get a graph
+`torch.export <https://pytorch.org/docs/stable/export.html>`__ is the current way to get a graph
 representation of a model (since PyTorch 2.1). It produces ``ExportedProgram`` which includes
 the graph representation in the FX format. To see why it has an advantage over the TorchScript
 representation, refer to `PyTorch documentation <https://pytorch.org/docs/stable/fx.html>`__.
@@ -198,11 +198,6 @@ Here is an example of how to convert a model obtained with ``torch.export``:
    exported_model = export(model, (torch.randn(1, 3, 224, 224),))
    ov_model = convert_model(exported_model)
 
-.. note::
-
-   This is an experimental feature. Use it only if you know that you need to. PyTorch version 2.2
-   is recommended. Dynamic shapes are not supported yet.
-
 Converting a PyTorch Model from Disk
 ####################################
 

diff --git a/...mon/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp b/...mon/transformations/src/transformations/sdpa_to_paged_attention/position_ids_replacer.cpp
@@ -72,8 +72,15 @@ ov::pass::PositionIDsReplacerQwen::PositionIDsReplacerQwen(const Output<Node>& p
 
     auto p_neg_const = wrap_type<v0::Constant>();
     auto p_neg_mul = wrap_type<v1::Multiply>({p_current_len, p_neg_const});
+
+    // For now, it has always been a constant, but this may change in the future.
+    // In case of model being in FP16, there will be a decompressing subgraph:
+    // i.e. Constant -> Convert -> Slice
+    //
+    // Also, it hasn't been observed yet, but, theoretically, there can also be a
+    // dequantizing subgraph, so it's going to be any_input() here.
+    auto p_rotary_emb_sincos = pattern::any_input();
     // the rotary_emb_cos/rotary_emb_sin are sliced by the total length [1,..4096,1,128]
-    auto p_rotary_emb_sincos = wrap_type<v0::Constant>();
     auto p_slice_1 = wrap_type<v8::Slice>({p_rotary_emb_sincos, _const(), p_opt_reshape, _const(), _const()});
     auto p_slice_2 = wrap_type<v8::Slice>({p_slice_1, p_neg_mul, _const(), _const(), _const()});
 

diff --git a/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp b/src/common/transformations/tests/op_conversions/sdpa_to_paged_attention_test.cpp
@@ -29,7 +29,6 @@
 #include "openvino/op/subtract.hpp"
 #include "openvino/op/transpose.hpp"
 #include "openvino/op/unsqueeze.hpp"
-#include "openvino/pass/visualize_tree.hpp"
 #include "transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.hpp"
 #include "transformations/sdpa_to_paged_attention/state_management_pattern.hpp"
 #include "transformations/sdpa_to_paged_attention/total_sequence_length_pattern.hpp"
@@ -186,17 +185,25 @@ class Qwen7bChatSDPA {
 
     static std::shared_ptr<Node> gen_rope_emb_sin(const std::shared_ptr<Node>& total_seq_len,
                                                   const std::shared_ptr<Node>& neg_mul,
-                                                  std::shared_ptr<Node>& head_size) {
-        auto sin = makeConst(element::f32, {1, 4096, 1, 128}, MOCK_VALUE);
+                                                  std::shared_ptr<Node>& head_size,
+                                                  element::Type model_precision) {
+        auto sin = makeConst(model_precision, {1, 4096, 1, 128}, MOCK_VALUE);
+        if (model_precision != element::f32) {
+            sin = makeOP<v0::Convert>({sin}, {dest_type_f32});
+        }
         auto sliced_sin_by_total = makeOP<v8::Slice>({sin, {0}, total_seq_len, {1}, {1}});
         auto rotary_emb_sin_shape = makeOP<v3::ShapeOf>({sliced_sin_by_total}, {{"output_type", "i64"}});
         head_size = makeOP<v8::Gather>({rotary_emb_sin_shape, {3}, 0}, {{"batch_dims", 0}});
         return makeOP<v8::Slice>({sliced_sin_by_total, neg_mul, {LLONG_MAX}, {1}, {1}});
     }
 
     static std::shared_ptr<Node> gen_rope_emb_cos(const std::shared_ptr<Node>& total_seq_len,
-                                                  const std::shared_ptr<Node>& neg_mul) {
-        auto cos = makeConst(element::f32, {1, 4096, 1, 128}, MOCK_VALUE);
+                                                  const std::shared_ptr<Node>& neg_mul,
+                                                  element::Type model_precision) {
+        auto cos = makeConst(model_precision, {1, 4096, 1, 128}, MOCK_VALUE);
+        if (model_precision != element::f32) {
+            cos = makeOP<v0::Convert>({cos}, {dest_type_f32});
+        }
         auto sliced_cos_by_total = makeOP<v8::Slice>({cos, {0}, total_seq_len, {1}, {1}});
         return makeOP<v8::Slice>({sliced_cos_by_total, neg_mul, {LLONG_MAX}, {1}, {1}});
     }
@@ -343,8 +350,12 @@ class Qwen7bChatPA {
 
     static std::shared_ptr<Node> gen_rope_emb_sin(const std::shared_ptr<Node>& max_context_len,
                                                   const std::shared_ptr<Node>& position_ids,
-                                                  std::shared_ptr<Node>& head_size) {
-        auto sin = makeConst(element::f32, {1, 4096, 1, 128}, MOCK_VALUE);
+                                                  std::shared_ptr<Node>& head_size,
+                                                  element::Type model_precision) {
+        auto sin = makeConst(model_precision, {1, 4096, 1, 128}, MOCK_VALUE);
+        if (model_precision != element::f32) {
+            sin = makeOP<v0::Convert>({sin}, {dest_type_f32});
+        }
         auto slice_sin = makeOP<v8::Gather>({sin, position_ids, 1}, {{"batch_dims", 0}});
 
         auto slice = makeOP<v8::Slice>({sin, {0}, max_context_len, {1}, {1}});
@@ -355,8 +366,12 @@ class Qwen7bChatPA {
     }
 
     static std::shared_ptr<Node> gen_rope_emb_cos(const std::shared_ptr<Node>& max_context_len,
-                                                  const std::shared_ptr<Node>& position_ids) {
-        auto cos = makeConst(element::f32, {1, 4096, 1, 128}, MOCK_VALUE);
+                                                  const std::shared_ptr<Node>& position_ids,
+                                                  element::Type model_precision) {
+        auto cos = makeConst(model_precision, {1, 4096, 1, 128}, MOCK_VALUE);
+        if (model_precision != element::f32) {
+            cos = makeOP<v0::Convert>({cos}, {dest_type_f32});
+        }
         auto slice = makeOP<v8::Gather>({cos, position_ids, 1}, {{"batch_dims", 0}});
         return makeOP<v1::Reshape>({slice, {-1, 1, 1, 128}}, {{"special_zero", false}});
     }
@@ -425,7 +440,10 @@ class Qwen7bChatPA {
 
 }  // namespace
 
-TEST_F(TransformationTestsF, SDPAToPA_Qwen) {
+class SDPAToPATest : public TransformationTestsF, public ::testing::WithParamInterface<element::Type> {};
+
+TEST_P(SDPAToPATest, SDPAToPA_Qwen7bChat_General) {
+    const auto model_precision = GetParam();
     {
         // Inputs to SDPA transformer:
         auto beam_idx = makeOP<v0::Parameter>({}, {{"shape", PartialShape{DYN}}, el_type_i64});
@@ -455,8 +473,9 @@ TEST_F(TransformationTestsF, SDPAToPA_Qwen) {
         // RoPE emb sin/cos init:
         auto neg_cur_seq_len = Qwen7bChatSDPA::neg_mul(current_seq_len);
         auto head_size = shared_ptr<Node>();
-        auto rope_emb_sin = Qwen7bChatSDPA::gen_rope_emb_sin(total_seq_len, neg_cur_seq_len, head_size);
-        auto rope_emb_cos = Qwen7bChatSDPA::gen_rope_emb_cos(total_seq_len, neg_cur_seq_len);
+        auto rope_emb_sin =
+            Qwen7bChatSDPA::gen_rope_emb_sin(total_seq_len, neg_cur_seq_len, head_size, model_precision);
+        auto rope_emb_cos = Qwen7bChatSDPA::gen_rope_emb_cos(total_seq_len, neg_cur_seq_len, model_precision);
 
         // RoPE for Q,K inputs:
         auto rope_q = Qwen7bChatSDPA::gen_rope(QKV::Q, qkv_proj, head_size, rope_emb_sin, rope_emb_cos);
@@ -515,8 +534,10 @@ TEST_F(TransformationTestsF, SDPAToPA_Qwen) {
 
         // RoPE emb sin/cos init:
         auto head_size = shared_ptr<Node>();
-        auto rope_emb_sin = Qwen7bChatPA::gen_rope_emb_sin(max_context_len_aligned, position_ids_aligned, head_size);
-        auto rope_emb_cos = Qwen7bChatPA::gen_rope_emb_cos(max_context_len_aligned, position_ids_aligned);
+        auto rope_emb_sin =
+            Qwen7bChatPA::gen_rope_emb_sin(max_context_len_aligned, position_ids_aligned, head_size, model_precision);
+        auto rope_emb_cos =
+            Qwen7bChatPA::gen_rope_emb_cos(max_context_len_aligned, position_ids_aligned, model_precision);
 
         // rope Q, K:
         auto rope_Q = Qwen7bChatPA::gen_rope(QKV::Q, qkv_proj, head_size, rope_emb_sin, rope_emb_cos);
@@ -564,7 +585,7 @@ TEST_F(TransformationTestsF, SDPAToPA_Qwen) {
     disable_rt_info_check();
 }
 
-TEST_F(TransformationTestsF, SDPAToPA_TotalSequenceLengthPatternQwen) {
+TEST_P(SDPAToPATest, SDPAToPA_Qwen7bChat_TotalSequenceLengthPattern) {
     {
         // Inputs to SDPA transformer:
         auto beam_idx = makeOP<v0::Parameter>({}, {{"shape", PartialShape{DYN}}, el_type_i64});
@@ -632,7 +653,7 @@ static std::shared_ptr<ov::Node> make_param(const PartialShape& pshape,
 // TODO: write a test for StateManagementPattern only (because changes for Alibi are inside it)
 // TODO: align precisions, check the copying of "fuse_names" attr in SDPAToPagedAttention
 // checking the graph structure and names, other checks are temporarily disabled:
-TEST_F(TransformationTestsF, SDPAToPA_Baichuan2_13b_general_test) {
+TEST_P(SDPAToPATest, SDPAToPA_Baichuan2_13b_General) {
     {
         auto beam_idx = make_param(PartialShape{DYN}, element::i32, "beam_idx");
         auto position_ids = make_param(PartialShape{DYN, DYN}, element::i64, "position_ids");
@@ -881,4 +902,17 @@ TEST_F(TransformationTestsF, SDPAToPA_Baichuan2_13b_general_test) {
         disable_result_friendly_names_check();
         disable_rt_info_check();
     }
-}
+}
+
+/*
+As there's often a need to cover specific model's architecutres in these
+tests, please, make sure you name the tests in the following manner:
+SDPAToPA_MODELNAME_PATTERNYOUCOVER:
+i.e. SDPAToPA_Qwen7bChat_TotalSequenceLengthPattern or
+SDPAToPA_Baichuan2_13b_General if this is a test for the
+entire SDPAToPA transformation
+*/
+
+const std::vector<ov::element::Type> element_types = {element::f16, element::f32};
+
+INSTANTIATE_TEST_SUITE_P(SDPAToPATest_Conversion, SDPAToPATest, testing::ValuesIn(element_types));
diff --git a/src/plugins/intel_cpu/CMakeLists.txt b/src/plugins/intel_cpu/CMakeLists.txt
@@ -239,7 +239,8 @@ ov_add_plugin(NAME ${TARGET_NAME}
               AS_EXTENSION
               VERSION_DEFINES_FOR src/plugin.cpp
               SOURCES ${SOURCES} ${HEADERS}
-              ADD_CLANG_FORMAT)
+              ADD_CLANG_FORMAT
+              ADD_CLANG_TIDY)
 
 # give a different file name depending on target platform architecture
 if(ARM OR AARCH64)

diff --git a/src/plugins/intel_cpu/src/.clang-tidy b/src/plugins/intel_cpu/src/.clang-tidy
@@ -0,0 +1,83 @@
+---
+
+### NOTE:
+# The 'Checks: >' is a multiline string here. Comment must not be moved into the string.
+#
+### Scopes to be enabled:
+#
+# cppcoreguidelines-*,
+# google-*,
+# readability-*,
+# modernize-*,
+# bugprone-*,
+# misc-*,
+#
+### Checks that are turned off for a reason:
+#
+# -cppcoreguidelines-pro-bounds-pointer-arithmetic
+# -google-readability-todo. No big reason to enforce
+# -modernize-use-trailing-return-type. Just stylistic preference
+# -readability-identifier-length. A lot of code use short names for readability, i.e. 'B' for batch
+# -readability-uppercase-literal-suffix. 
+#
+### Checks that are turned off but better be enabled later:
+# -bugprone-narrowing-conversions
+# -bugprone-easily-swappable-parameters
+# -bugprone-fold-init-type
+# -bugprone-implicit-widening-of-multiplication-result
+# -cppcoreguidelines-narrowing-conversions
+# -google-readability-braces-around-statements
+# -readability-implicit-bool-conversion,
+# -readability-magic-numbers, cppcoreguidelines-avoid-magic-numbers
+# -readability-function-cognitive-complexity. Reasonable way to enforce splitting complex code into simple functions
+# -modernize-concat-nested-namespaces. More compact way when C++17 is available
+
+Checks: >
+  -*,
+  performance-*,
+  modernize-pass-by-value,
+  cppcoreguidelines-prefer-member-initializer,
+  -bugprone-easily-swappable-parameters,
+  -bugprone-fold-init-type,
+  -bugprone-implicit-widening-of-multiplication-result,
+  -bugprone-narrowing-conversions,
+  -cppcoreguidelines-narrowing-conversions,
+  -cppcoreguidelines-pro-bounds-pointer-arithmetic,
+  -google-build-using-namespace,
+  -google-readability-todo,
+  -readability-braces-around-statements,
+  -google-readability-braces-around-statements,
+  -modernize-use-trailing-return-type,
+  -readability-identifier-length,
+  -readability-implicit-bool-conversion,
+  -readability-magic-numbers,
+  -cppcoreguidelines-avoid-magic-numbers,
+  -readability-uppercase-literal-suffix,
+  -readability-function-cognitive-complexity,
+  -modernize-concat-nested-namespaces,
+# Treat warnings as errors
+WarningsAsErrors: '*'
+# Use clang-format for applied fixes
+FormatStyle: file
+HeaderFilterRegex: ''
+CheckOptions:
+  - key: cppcoreguidelines-avoid-do-while.IgnoreMacros
+    value: true
+  # matches with corresponding cpplink check  
+  - key: google-readability-namespace-comments.ShortNamespaceLines
+    value: "10"
+  # matches with corresponding cpplink check  
+  - key: google-readability-namespace-comments.SpacesBeforeComments
+    value: "2"
+  - key: modernize-loop-convert.MinConfidence
+    value: reasonable
+  - key: modernize-pass-by-value.IncludeStyle
+    value: google
+### To be considered to enable:
+#  # Unifies the usage of the statements
+#  - key: readability-braces-around-statements.ShortStatementLines
+#    value: "1"
+#  Reasonable way to enforce splitting complex code into simple functions
+#  - key: google-readability-function-size.StatementThreshold
+#    value: "800"
+---