Skip to content

Commit

Permalink
Merge branch 'master' into shape-infer/remove-cpu-custom-shape-infer-…
Browse files Browse the repository at this point in the history
…factories
  • Loading branch information
praasz authored Jan 17, 2025
2 parents 95c02ac + 33eda4b commit b7a89aa
Show file tree
Hide file tree
Showing 423 changed files with 2,047 additions and 1,706 deletions.
2 changes: 1 addition & 1 deletion .github/dockerfiles/docker_tag
Original file line number Diff line number Diff line change
@@ -1 +1 @@
pr-28380
pr-28040
10 changes: 7 additions & 3 deletions .github/dockerfiles/ov_build/ubuntu_22_04_x64_cc/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ RUN apt-get update && \
# For Java API
default-jdk \
# Compiler \
clang \
clang-15 \
# Static analyzer
clang-tidy-15 \
# clang-tidy uses clang-format as a dependency
clang-format-15 \
&& \
rm -rf /var/lib/apt/lists/*

Expand All @@ -47,8 +51,8 @@ RUN chmod +x /install_build_dependencies.sh && \
rm -rf /var/lib/apt/lists/*

# Set clang as a default compiler
RUN update-alternatives --install /usr/bin/cc cc /usr/bin/clang 100 && \
update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++ 100
RUN update-alternatives --install /usr/bin/cc cc /usr/bin/clang-15 100 && \
update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++-15 100

# Install sscache
ARG SCCACHE_VERSION="v0.7.5"
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/linux_conditional_compilation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -151,14 +151,17 @@ jobs:
# Build
#

- name: CMake configure - CC COLLECT
- name: CMake configure - CC COLLECT with clang-tidy
# clang-tidy static analysis check is enabled as part of collection
# to avoid an additional separate build execution
run: |
cmake \
-G "${{ env.CMAKE_GENERATOR }}" \
-DCMAKE_CXX_STANDARD=20 \
-DBUILD_SHARED_LIBS=OFF \
-DENABLE_TESTS=ON \
-DENABLE_CPPLINT=OFF \
-DENABLE_CLANG_TIDY=ON \
-DENABLE_NCC_STYLE=OFF \
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON \
-DENABLE_PROFILING_ITT=ON \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ include(python_requirements)

include(cpplint/cpplint)
include(clang_format/clang_format)
include(clang_tidy/clang_tidy)
include(ncc_naming_style/ncc_naming_style)

# Restore state
Expand Down
25 changes: 25 additions & 0 deletions cmake/developer_package/clang_tidy/clang_tidy.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (C) 2018-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#

if(ENABLE_CLANG_TIDY)
set(CLANG_TIDY_REQUIRED_VERSION 15 CACHE STRING "clang-tidy version to use")
set(CLANG_TIDY_FILENAME clang-tidy-${CLANG_TIDY_REQUIRED_VERSION} clang-tidy)
find_host_program(CLANG_TIDY NAMES ${CLANG_TIDY_FILENAME} PATHS ENV PATH)
if(CLANG_TIDY)
execute_process(COMMAND ${CLANG_TIDY} ${CMAKE_CURRENT_SOURCE_DIR} ARGS --version OUTPUT_VARIABLE CLANG_VERSION)
if(NOT CLANG_VERSION)
message(WARNING "Supported clang-tidy version is ${CLANG_TIDY_REQUIRED_VERSION}!")
set(ENABLE_CLANG_TIDY OFF)
else()
string(REGEX REPLACE "[^0-9]+([0-9]+)\\..*" "\\1" CLANG_TIDY_MAJOR_VERSION ${CLANG_VERSION})
if(NOT CLANG_TIDY_MAJOR_VERSION EQUAL CLANG_TIDY_REQUIRED_VERSION)
message(WARNING "Supported clang-tidy version is ${CLANG_TIDY_REQUIRED_VERSION}! Provided version ${CLANG_TIDY_MAJOR_VERSION}")
set(ENABLE_CLANG_TIDY OFF)
endif()
endif()
else()
message(WARNING "Supported clang-tidy-${CLANG_TIDY_REQUIRED_VERSION} is not found!")
set(ENABLE_CLANG_TIDY OFF)
endif()
endif()
2 changes: 2 additions & 0 deletions cmake/developer_package/features.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ ov_dependent_option (ENABLE_CPPLINT_REPORT "Build cpplint report instead of fail

ov_option (ENABLE_CLANG_FORMAT "Enable clang-format checks during the build" ${STYLE_CHECKS_DEFAULT})

ov_option (ENABLE_CLANG_TIDY "Enable clang-tidy checks during the build" ${STYLE_CHECKS_DEFAULT})

ov_option (ENABLE_NCC_STYLE "Enable ncc style check" ${STYLE_CHECKS_DEFAULT})

ov_option (ENABLE_UNSAFE_LOCATIONS "skip check for MD5 for dependency" OFF)
Expand Down
9 changes: 8 additions & 1 deletion cmake/developer_package/plugins/plugins.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@ endif()
# [SKIP_INSTALL]
# [SKIP_REGISTRATION] Skip creation of <device>.xml
# [ADD_CLANG_FORMAT]
# [ADD_CLANG_TIDY]
# )
#
function(ov_add_plugin)
set(options SKIP_INSTALL PSEUDO_DEVICE ADD_CLANG_FORMAT AS_EXTENSION SKIP_REGISTRATION)
set(options SKIP_INSTALL PSEUDO_DEVICE ADD_CLANG_FORMAT ADD_CLANG_TIDY AS_EXTENSION SKIP_REGISTRATION)
set(oneValueArgs NAME DEVICE_NAME VERSION_DEFINES_FOR PSEUDO_PLUGIN_FOR)
set(multiValueArgs DEFAULT_CONFIG SOURCES OBJECT_LIBRARIES CPPLINT_FILTERS)
cmake_parse_arguments(OV_PLUGIN "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
Expand Down Expand Up @@ -105,6 +106,12 @@ function(ov_add_plugin)
string(CONCAT custom_filter "${custom_filter}" "," "${filter}")
endforeach()

if (OV_PLUGIN_ADD_CLANG_TIDY)
if (ENABLE_CLANG_TIDY)
set_target_properties(${OV_PLUGIN_NAME} PROPERTIES CXX_CLANG_TIDY clang-tidy-${CLANG_TIDY_REQUIRED_VERSION})
endif()
endif()

if (OV_PLUGIN_ADD_CLANG_FORMAT)
ov_add_clang_format_target(${OV_PLUGIN_NAME}_clang FOR_SOURCES ${OV_PLUGIN_SOURCES})
else()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ It is recommended to address model outputs by the index rather then the name.
Support for torch.export
########################

`torch.export <https://pytorch.org/docs/2.2/export.html>`__ is the current way to get a graph
`torch.export <https://pytorch.org/docs/stable/export.html>`__ is the current way to get a graph
representation of a model (since PyTorch 2.1). It produces ``ExportedProgram`` which includes
the graph representation in the FX format. To see why it has an advantage over the TorchScript
representation, refer to `PyTorch documentation <https://pytorch.org/docs/stable/fx.html>`__.
Expand All @@ -198,11 +198,6 @@ Here is an example of how to convert a model obtained with ``torch.export``:
exported_model = export(model, (torch.randn(1, 3, 224, 224),))
ov_model = convert_model(exported_model)
.. note::

This is an experimental feature. Use it only if you know that you need to. PyTorch version 2.2
is recommended. Dynamic shapes are not supported yet.

Converting a PyTorch Model from Disk
####################################

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,15 @@ ov::pass::PositionIDsReplacerQwen::PositionIDsReplacerQwen(const Output<Node>& p

auto p_neg_const = wrap_type<v0::Constant>();
auto p_neg_mul = wrap_type<v1::Multiply>({p_current_len, p_neg_const});

// For now, it has always been a constant, but this may change in the future.
// In case of model being in FP16, there will be a decompressing subgraph:
// i.e. Constant -> Convert -> Slice
//
// Also, it hasn't been observed yet, but, theoretically, there can also be a
// dequantizing subgraph, so it's going to be any_input() here.
auto p_rotary_emb_sincos = pattern::any_input();
// the rotary_emb_cos/rotary_emb_sin are sliced by the total length [1,..4096,1,128]
auto p_rotary_emb_sincos = wrap_type<v0::Constant>();
auto p_slice_1 = wrap_type<v8::Slice>({p_rotary_emb_sincos, _const(), p_opt_reshape, _const(), _const()});
auto p_slice_2 = wrap_type<v8::Slice>({p_slice_1, p_neg_mul, _const(), _const(), _const()});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
#include "openvino/op/subtract.hpp"
#include "openvino/op/transpose.hpp"
#include "openvino/op/unsqueeze.hpp"
#include "openvino/pass/visualize_tree.hpp"
#include "transformations/sdpa_to_paged_attention/prev_sequence_length_pattern.hpp"
#include "transformations/sdpa_to_paged_attention/state_management_pattern.hpp"
#include "transformations/sdpa_to_paged_attention/total_sequence_length_pattern.hpp"
Expand Down Expand Up @@ -186,17 +185,25 @@ class Qwen7bChatSDPA {

static std::shared_ptr<Node> gen_rope_emb_sin(const std::shared_ptr<Node>& total_seq_len,
const std::shared_ptr<Node>& neg_mul,
std::shared_ptr<Node>& head_size) {
auto sin = makeConst(element::f32, {1, 4096, 1, 128}, MOCK_VALUE);
std::shared_ptr<Node>& head_size,
element::Type model_precision) {
auto sin = makeConst(model_precision, {1, 4096, 1, 128}, MOCK_VALUE);
if (model_precision != element::f32) {
sin = makeOP<v0::Convert>({sin}, {dest_type_f32});
}
auto sliced_sin_by_total = makeOP<v8::Slice>({sin, {0}, total_seq_len, {1}, {1}});
auto rotary_emb_sin_shape = makeOP<v3::ShapeOf>({sliced_sin_by_total}, {{"output_type", "i64"}});
head_size = makeOP<v8::Gather>({rotary_emb_sin_shape, {3}, 0}, {{"batch_dims", 0}});
return makeOP<v8::Slice>({sliced_sin_by_total, neg_mul, {LLONG_MAX}, {1}, {1}});
}

static std::shared_ptr<Node> gen_rope_emb_cos(const std::shared_ptr<Node>& total_seq_len,
const std::shared_ptr<Node>& neg_mul) {
auto cos = makeConst(element::f32, {1, 4096, 1, 128}, MOCK_VALUE);
const std::shared_ptr<Node>& neg_mul,
element::Type model_precision) {
auto cos = makeConst(model_precision, {1, 4096, 1, 128}, MOCK_VALUE);
if (model_precision != element::f32) {
cos = makeOP<v0::Convert>({cos}, {dest_type_f32});
}
auto sliced_cos_by_total = makeOP<v8::Slice>({cos, {0}, total_seq_len, {1}, {1}});
return makeOP<v8::Slice>({sliced_cos_by_total, neg_mul, {LLONG_MAX}, {1}, {1}});
}
Expand Down Expand Up @@ -343,8 +350,12 @@ class Qwen7bChatPA {

static std::shared_ptr<Node> gen_rope_emb_sin(const std::shared_ptr<Node>& max_context_len,
const std::shared_ptr<Node>& position_ids,
std::shared_ptr<Node>& head_size) {
auto sin = makeConst(element::f32, {1, 4096, 1, 128}, MOCK_VALUE);
std::shared_ptr<Node>& head_size,
element::Type model_precision) {
auto sin = makeConst(model_precision, {1, 4096, 1, 128}, MOCK_VALUE);
if (model_precision != element::f32) {
sin = makeOP<v0::Convert>({sin}, {dest_type_f32});
}
auto slice_sin = makeOP<v8::Gather>({sin, position_ids, 1}, {{"batch_dims", 0}});

auto slice = makeOP<v8::Slice>({sin, {0}, max_context_len, {1}, {1}});
Expand All @@ -355,8 +366,12 @@ class Qwen7bChatPA {
}

static std::shared_ptr<Node> gen_rope_emb_cos(const std::shared_ptr<Node>& max_context_len,
const std::shared_ptr<Node>& position_ids) {
auto cos = makeConst(element::f32, {1, 4096, 1, 128}, MOCK_VALUE);
const std::shared_ptr<Node>& position_ids,
element::Type model_precision) {
auto cos = makeConst(model_precision, {1, 4096, 1, 128}, MOCK_VALUE);
if (model_precision != element::f32) {
cos = makeOP<v0::Convert>({cos}, {dest_type_f32});
}
auto slice = makeOP<v8::Gather>({cos, position_ids, 1}, {{"batch_dims", 0}});
return makeOP<v1::Reshape>({slice, {-1, 1, 1, 128}}, {{"special_zero", false}});
}
Expand Down Expand Up @@ -425,7 +440,10 @@ class Qwen7bChatPA {

} // namespace

TEST_F(TransformationTestsF, SDPAToPA_Qwen) {
class SDPAToPATest : public TransformationTestsF, public ::testing::WithParamInterface<element::Type> {};

TEST_P(SDPAToPATest, SDPAToPA_Qwen7bChat_General) {
const auto model_precision = GetParam();
{
// Inputs to SDPA transformer:
auto beam_idx = makeOP<v0::Parameter>({}, {{"shape", PartialShape{DYN}}, el_type_i64});
Expand Down Expand Up @@ -455,8 +473,9 @@ TEST_F(TransformationTestsF, SDPAToPA_Qwen) {
// RoPE emb sin/cos init:
auto neg_cur_seq_len = Qwen7bChatSDPA::neg_mul(current_seq_len);
auto head_size = shared_ptr<Node>();
auto rope_emb_sin = Qwen7bChatSDPA::gen_rope_emb_sin(total_seq_len, neg_cur_seq_len, head_size);
auto rope_emb_cos = Qwen7bChatSDPA::gen_rope_emb_cos(total_seq_len, neg_cur_seq_len);
auto rope_emb_sin =
Qwen7bChatSDPA::gen_rope_emb_sin(total_seq_len, neg_cur_seq_len, head_size, model_precision);
auto rope_emb_cos = Qwen7bChatSDPA::gen_rope_emb_cos(total_seq_len, neg_cur_seq_len, model_precision);

// RoPE for Q,K inputs:
auto rope_q = Qwen7bChatSDPA::gen_rope(QKV::Q, qkv_proj, head_size, rope_emb_sin, rope_emb_cos);
Expand Down Expand Up @@ -515,8 +534,10 @@ TEST_F(TransformationTestsF, SDPAToPA_Qwen) {

// RoPE emb sin/cos init:
auto head_size = shared_ptr<Node>();
auto rope_emb_sin = Qwen7bChatPA::gen_rope_emb_sin(max_context_len_aligned, position_ids_aligned, head_size);
auto rope_emb_cos = Qwen7bChatPA::gen_rope_emb_cos(max_context_len_aligned, position_ids_aligned);
auto rope_emb_sin =
Qwen7bChatPA::gen_rope_emb_sin(max_context_len_aligned, position_ids_aligned, head_size, model_precision);
auto rope_emb_cos =
Qwen7bChatPA::gen_rope_emb_cos(max_context_len_aligned, position_ids_aligned, model_precision);

// rope Q, K:
auto rope_Q = Qwen7bChatPA::gen_rope(QKV::Q, qkv_proj, head_size, rope_emb_sin, rope_emb_cos);
Expand Down Expand Up @@ -564,7 +585,7 @@ TEST_F(TransformationTestsF, SDPAToPA_Qwen) {
disable_rt_info_check();
}

TEST_F(TransformationTestsF, SDPAToPA_TotalSequenceLengthPatternQwen) {
TEST_P(SDPAToPATest, SDPAToPA_Qwen7bChat_TotalSequenceLengthPattern) {
{
// Inputs to SDPA transformer:
auto beam_idx = makeOP<v0::Parameter>({}, {{"shape", PartialShape{DYN}}, el_type_i64});
Expand Down Expand Up @@ -632,7 +653,7 @@ static std::shared_ptr<ov::Node> make_param(const PartialShape& pshape,
// TODO: write a test for StateManagementPattern only (because changes for Alibi are inside it)
// TODO: align precisions, check the copying of "fuse_names" attr in SDPAToPagedAttention
// checking the graph structure and names, other checks are temporarily disabled:
TEST_F(TransformationTestsF, SDPAToPA_Baichuan2_13b_general_test) {
TEST_P(SDPAToPATest, SDPAToPA_Baichuan2_13b_General) {
{
auto beam_idx = make_param(PartialShape{DYN}, element::i32, "beam_idx");
auto position_ids = make_param(PartialShape{DYN, DYN}, element::i64, "position_ids");
Expand Down Expand Up @@ -881,4 +902,17 @@ TEST_F(TransformationTestsF, SDPAToPA_Baichuan2_13b_general_test) {
disable_result_friendly_names_check();
disable_rt_info_check();
}
}
}

/*
As there's often a need to cover specific model's architecutres in these
tests, please, make sure you name the tests in the following manner:
SDPAToPA_MODELNAME_PATTERNYOUCOVER:
i.e. SDPAToPA_Qwen7bChat_TotalSequenceLengthPattern or
SDPAToPA_Baichuan2_13b_General if this is a test for the
entire SDPAToPA transformation
*/

const std::vector<ov::element::Type> element_types = {element::f16, element::f32};

INSTANTIATE_TEST_SUITE_P(SDPAToPATest_Conversion, SDPAToPATest, testing::ValuesIn(element_types));
3 changes: 2 additions & 1 deletion src/plugins/intel_cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,8 @@ ov_add_plugin(NAME ${TARGET_NAME}
AS_EXTENSION
VERSION_DEFINES_FOR src/plugin.cpp
SOURCES ${SOURCES} ${HEADERS}
ADD_CLANG_FORMAT)
ADD_CLANG_FORMAT
ADD_CLANG_TIDY)

# give a different file name depending on target platform architecture
if(ARM OR AARCH64)
Expand Down
83 changes: 83 additions & 0 deletions src/plugins/intel_cpu/src/.clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
---

### NOTE:
# The 'Checks: >' is a multiline string here. Comment must not be moved into the string.
#
### Scopes to be enabled:
#
# cppcoreguidelines-*,
# google-*,
# readability-*,
# modernize-*,
# bugprone-*,
# misc-*,
#
### Checks that are turned off for a reason:
#
# -cppcoreguidelines-pro-bounds-pointer-arithmetic
# -google-readability-todo. No big reason to enforce
# -modernize-use-trailing-return-type. Just stylistic preference
# -readability-identifier-length. A lot of code use short names for readability, i.e. 'B' for batch
# -readability-uppercase-literal-suffix.
#
### Checks that are turned off but better be enabled later:
# -bugprone-narrowing-conversions
# -bugprone-easily-swappable-parameters
# -bugprone-fold-init-type
# -bugprone-implicit-widening-of-multiplication-result
# -cppcoreguidelines-narrowing-conversions
# -google-readability-braces-around-statements
# -readability-implicit-bool-conversion,
# -readability-magic-numbers, cppcoreguidelines-avoid-magic-numbers
# -readability-function-cognitive-complexity. Reasonable way to enforce splitting complex code into simple functions
# -modernize-concat-nested-namespaces. More compact way when C++17 is available

Checks: >
-*,
performance-*,
modernize-pass-by-value,
cppcoreguidelines-prefer-member-initializer,
-bugprone-easily-swappable-parameters,
-bugprone-fold-init-type,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-narrowing-conversions,
-cppcoreguidelines-narrowing-conversions,
-cppcoreguidelines-pro-bounds-pointer-arithmetic,
-google-build-using-namespace,
-google-readability-todo,
-readability-braces-around-statements,
-google-readability-braces-around-statements,
-modernize-use-trailing-return-type,
-readability-identifier-length,
-readability-implicit-bool-conversion,
-readability-magic-numbers,
-cppcoreguidelines-avoid-magic-numbers,
-readability-uppercase-literal-suffix,
-readability-function-cognitive-complexity,
-modernize-concat-nested-namespaces,
# Treat warnings as errors
WarningsAsErrors: '*'
# Use clang-format for applied fixes
FormatStyle: file
HeaderFilterRegex: ''
CheckOptions:
- key: cppcoreguidelines-avoid-do-while.IgnoreMacros
value: true
# matches with corresponding cpplink check
- key: google-readability-namespace-comments.ShortNamespaceLines
value: "10"
# matches with corresponding cpplink check
- key: google-readability-namespace-comments.SpacesBeforeComments
value: "2"
- key: modernize-loop-convert.MinConfidence
value: reasonable
- key: modernize-pass-by-value.IncludeStyle
value: google
### To be considered to enable:
# # Unifies the usage of the statements
# - key: readability-braces-around-statements.ShortStatementLines
# value: "1"
# Reasonable way to enforce splitting complex code into simple functions
# - key: google-readability-function-size.StatementThreshold
# value: "800"
---
Loading

0 comments on commit b7a89aa

Please sign in to comment.