From 2e0c4762c628ada1fddc7bd72afd425496e606bb Mon Sep 17 00:00:00 2001 From: chenhuwa Date: Wed, 28 Feb 2024 16:03:34 +0800 Subject: [PATCH] apply comments and more update --- .../include/snippets/pass/gn_tokenization.hpp | 3 +- src/common/snippets/src/op/reshape.cpp | 6 +- src/common/snippets/src/op/subgraph.cpp | 13 +++- .../snippets/src/pass/gn_decomposition.cpp | 27 ++++++-- .../snippets/src/pass/gn_tokenization.cpp | 4 +- .../shape_inference/shape_infer_instances.cpp | 10 ++- .../src/shape_inference/shape_inference.cpp | 3 +- src/plugins/intel_cpu/src/nodes/subgraph.cpp | 6 ++ .../transformation_pipeline.cpp | 9 ++- .../group_normalization.cpp | 61 ++++++++++++------- .../skip_tests_config.cpp | 2 + .../single_op/group_normalization.hpp | 2 +- 12 files changed, 102 insertions(+), 44 deletions(-) diff --git a/src/common/snippets/include/snippets/pass/gn_tokenization.hpp b/src/common/snippets/include/snippets/pass/gn_tokenization.hpp index 4ea39b391b4d05..220f05f0bbbc88 100644 --- a/src/common/snippets/include/snippets/pass/gn_tokenization.hpp +++ b/src/common/snippets/include/snippets/pass/gn_tokenization.hpp @@ -4,7 +4,6 @@ #pragma once -#include "openvino/pass/graph_rewrite.hpp" #include "openvino/pass/pattern/matcher.hpp" #include "snippets/pass/tokenization.hpp" @@ -17,7 +16,7 @@ namespace pass { * @brief Tokenize GroupNormalization to a subgraph * @ingroup snippets */ -class TokenizeGNSnippets: public ov::pass::MatcherPass { +class TokenizeGNSnippets : public ov::pass::MatcherPass { public: OPENVINO_RTTI("TokenizeGNSnippets", "0"); TokenizeGNSnippets(); diff --git a/src/common/snippets/src/op/reshape.cpp b/src/common/snippets/src/op/reshape.cpp index 308a13c6f3e8c0..65927f2ee4e2bf 100644 --- a/src/common/snippets/src/op/reshape.cpp +++ b/src/common/snippets/src/op/reshape.cpp @@ -12,7 +12,7 @@ namespace ov { namespace snippets { namespace op { Reshape::Reshape(const Output& arg, ov::PartialShape target_shape) - : Op({arg}), m_target_shape(target_shape) { + : Op({arg}), m_target_shape(std::move(target_shape)) { constructor_validate_and_infer_types(); } @@ -23,7 +23,7 @@ void Reshape::validate_and_infer_types() { std::shared_ptr Reshape::clone_with_new_inputs(const OutputVector& new_args) const { INTERNAL_OP_SCOPE(Reshape); check_new_args_count(this, new_args); - return std::make_shared(new_args.at(0), get_target_shape()); + return std::make_shared(new_args.at(0), m_target_shape); } bool Reshape::visit_attributes(AttributeVisitor& visitor) { @@ -36,7 +36,7 @@ const ov::PartialShape& Reshape::get_target_shape() const { } void Reshape::set_target_shape(ov::PartialShape shape) { - m_target_shape = shape; + m_target_shape = std::move(shape); } }// namespace op }// namespace snippets diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index 92b76084996927..df4fc1693590f7 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -57,6 +57,8 @@ #include #include +#include "snippets/lowered/pass/serialize_control_flow.hpp" + using namespace std; using namespace ov::op::util; @@ -321,7 +323,7 @@ VectorDims Subgraph::infer_master_shape() { OPENVINO_ASSERT(!output_dims.empty(), "Can't calculate master_shape before the first shape inference"); } else { for (const auto& res : body_ptr()->get_results()) { - auto reshape = ov::as_type_ptr(res->get_input_node_shared_ptr(0)) + auto reshape = ov::as_type_ptr(res->get_input_node_shared_ptr(0)); auto res_input = reshape ? reshape->input(0) : res->input(0); OPENVINO_ASSERT(res_input.get_partial_shape().is_static(), "Result have dynamic shape in static pipeline"); // We need to account to the shape's layout stored in Output rt_info @@ -386,9 +388,10 @@ void Subgraph::data_flow_transformations(const BlockedShapeVector& blocked_input OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::data_flow_transformations") ov::snippets::pass::Manager manager; - // GroupNormalizationDecomposition should be before canonicalization(rankNorm) as scale/bias shape is C and need special process. + // GNDecomposition should be before canonicalization(rankNorm) as scale/bias shape is C and need special process. if (config.m_has_domain_sensitive_ops) manager.register_pass(); + if (!blocked_input_shapes.empty()) manager.register_pass(blocked_input_shapes); if (!input_precisions.empty() && !output_precisions.empty()) @@ -411,6 +414,12 @@ void Subgraph::data_flow_transformations(const BlockedShapeVector& blocked_input manager.register_positioned_passes(backend_passes); manager.run_passes(body_ptr()); + + // ov::pass::Manager magr; + // std::string xmlo = "data_flow.xml"; + // std::string bino = "data_flow.bin"; + // magr.register_pass(xmlo, bino); + // magr.run_passes(body_ptr()); } void Subgraph::control_flow_transformations(lowered::LinearIR& linear_ir, diff --git a/src/common/snippets/src/pass/gn_decomposition.cpp b/src/common/snippets/src/pass/gn_decomposition.cpp index 0beeba52992c4c..ea5da94483130f 100644 --- a/src/common/snippets/src/pass/gn_decomposition.cpp +++ b/src/common/snippets/src/pass/gn_decomposition.cpp @@ -34,11 +34,11 @@ GNDecomposition::GNDecomposition() { const auto num_groups = static_cast(group_norm_node->get_num_groups()); const float eps = static_cast(group_norm_node->get_epsilon()); - ////////////collapse to reduce lastDim to avoid nested loop overhead(reduce tails process)/////////// + ////////////collapse to reduce lastDim to avoid nested loop overhead(e.g. reduce tails in inner loop)/////////// // reshape [N, C, spatial] to [N, group, 1, (C / group) * spatial] const auto orig_shape = group_norm_node->get_input_partial_shape(0); size_t orig_rank = orig_shape.rank().get_length(); - size_t GNDecomposition = 4; + size_t group_rank = 4; std::vector group_dims(group_rank); group_dims[0] = orig_shape[0]; group_dims[1] = Dimension(num_groups); @@ -50,12 +50,17 @@ GNDecomposition::GNDecomposition() { } group_dims[3] = group_dims[3] * spatial_dim; ov::PartialShape group_shape(group_dims); - std::shared_ptr reshaped_node = std::make_shared(data, group_shape); + std::shared_ptr reshaped_node_orig = std::make_shared(data, group_shape); + + std::shared_ptr reshaped_node1 = reshaped_node_orig; + if (data.get_element_type() != element::f32) { + reshaped_node1 = std::make_shared(reshaped_node_orig, element::f32); + } // reduceSum on dimension [C / group * spatial] std::vector axis(1, 3); auto axis_node = std::make_shared(element::i64, Shape{axis.size()}, axis); - const auto reduce_sum = std::make_shared(reshaped_node, axis_node, true); + const auto reduce_sum = std::make_shared(reshaped_node1, axis_node, true); // reduceMean auto group_shape_static = group_shape.to_shape(); @@ -64,7 +69,11 @@ GNDecomposition::GNDecomposition() { const auto reduce_mean = std::make_shared(reduce_sum, group_size_inv_node); // x - mean - auto sub_mean = std::make_shared(reshaped_node, reduce_mean); + std::shared_ptr reshaped_node2 = reshaped_node_orig; + if (data.get_element_type() != element::f32) { + reshaped_node2 = std::make_shared(reshaped_node_orig, element::f32); + } + auto sub_mean = std::make_shared(reshaped_node2, reduce_mean); // (x - mean) ^ 2 auto sqr_const = std::make_shared(element::f32, Shape{1}, std::vector{2}); auto sqr = std::make_shared(sub_mean, sqr_const); @@ -110,8 +119,14 @@ GNDecomposition::GNDecomposition() { auto scaled_node = std::make_shared(mvn_reshaped, reshape_scale); auto biased_node = std::make_shared(scaled_node, reshape_bias); + auto result_prec = group_norm_node->get_output_element_type(0); + std::shared_ptr biased_node_convert = biased_node; + if (result_prec != element::f32) { + biased_node_convert = std::make_shared(biased_node, data.get_element_type()); + } + // reshape_back [N, group, C / group, spatial] to [N, C, spatial] - const auto reshape_back_node = std::make_shared(biased_node, orig_shape); + const auto reshape_back_node = std::make_shared(biased_node_convert, orig_shape); std::vector subtensor(group_rank, 1); subtensor[3] = PortDescriptor::ServiceDimensions::FULL_DIM; diff --git a/src/common/snippets/src/pass/gn_tokenization.cpp b/src/common/snippets/src/pass/gn_tokenization.cpp index 23a97dc657f81c..4332d4d44d66e0 100644 --- a/src/common/snippets/src/pass/gn_tokenization.cpp +++ b/src/common/snippets/src/pass/gn_tokenization.cpp @@ -3,6 +3,7 @@ // #include "snippets/pass/gn_tokenization.hpp" +#include "snippets/pass/collapse_subgraph.hpp" #include "snippets/itt.hpp" #include "snippets/op/subgraph.hpp" @@ -19,7 +20,8 @@ ov::snippets::pass::TokenizeGNSnippets::TokenizeGNSnippets() { ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::pass::TokenizeGNSnippets") auto group_norm_node = ov::as_type_ptr(m.get_match_root()); - if (group_norm_node->is_dynamic()) + if (group_norm_node->is_dynamic() || + TokenizeSnippets::get_supported_element_types().count(group_norm_node->get_element_type()) == 0) return false; auto subgraph = op::Subgraph::wrap_node_as_subgraph(group_norm_node); diff --git a/src/common/snippets/src/shape_inference/shape_infer_instances.cpp b/src/common/snippets/src/shape_inference/shape_infer_instances.cpp index c00b3ceda0e0bf..ba7ebe082a6fe4 100644 --- a/src/common/snippets/src/shape_inference/shape_infer_instances.cpp +++ b/src/common/snippets/src/shape_inference/shape_infer_instances.cpp @@ -252,9 +252,15 @@ ReshapeShapeInfer::ReshapeShapeInfer(const std::shared_ptr& n) { } Result ReshapeShapeInfer::infer(const std::vector& input_shapes) { - OPENVINO_ASSERT(input_shapes.size() == 1, "Invalid number of shapes passed ReshapeShapeInfer"); + OPENVINO_ASSERT(input_shapes.size() == 1, "Invalid number of shapes is passed in ReshapeShapeInfer"); + OPENVINO_ASSERT(target_shape.is_static(), "target_shape should be static in ReshapeShapeInfer"); VectorDims result_shape = target_shape.get_shape(); - // todo: check static and size is the same + const auto input_elems = + std::accumulate(input_shapes[0].get().begin(), input_shapes[0].get().end(), static_cast(1), std::multiplies()); + const auto output_elems = + std::accumulate(result_shape.begin(), result_shape.end(), static_cast(1), std::multiplies()); + OPENVINO_ASSERT(input_elems == output_elems, "Tensor volume should be the same after reshape in ReshapeShapeInfer"); + return {{result_shape}, ShapeInferStatus::success}; } diff --git a/src/common/snippets/src/shape_inference/shape_inference.cpp b/src/common/snippets/src/shape_inference/shape_inference.cpp index 94749d178837c0..d6c6081113ea1f 100644 --- a/src/common/snippets/src/shape_inference/shape_inference.cpp +++ b/src/common/snippets/src/shape_inference/shape_inference.cpp @@ -60,6 +60,7 @@ const IShapeInferSnippetsFactory::TRegistry IShapeInferSnippetsFactory::registry SHAPE_INFER_PREDEFINED(op::KernelStatic, EmptyShapeInfer), SHAPE_INFER_PREDEFINED(op::KernelDynamic, EmptyShapeInfer), SHAPE_INFER_PREDEFINED(op::Nop, EmptyShapeInfer), + SHAPE_INFER_OP_SPECIFIC_EXTERNAL(op::Reshape, ReshapeShapeInfer), SHAPE_INFER_OP_SPECIFIC_EXTERNAL(opset1::Select, SelectShapeInfer), SHAPE_INFER_OP_SPECIFIC_EXTERNAL(op::Brgemm, BrgemmShapeInfer), SHAPE_INFER_OP_SPECIFIC_EXTERNAL(op::ReduceMax, ReduceShapeInfer), @@ -89,8 +90,6 @@ std::shared_ptr make_shape_inference(const std::shared_ptr< ov::is_type(op) || ov::is_type(op)) { return std::make_shared(); - } else if (ov::is_type(op)) { - return std::make_shared(op); } else { OPENVINO_THROW("Operation type " + std::string(op->get_type_info().name) + " is not supported in Snippets shape inference pipeline"); } diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index f7a50ffa14852f..623d7ae247f4a7 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -641,6 +641,12 @@ void Snippet::SnippetJitExecutor::generate(const jit_snippets_compile_args* jcp) SNIPPETS_REGISTER_PASS_RELATIVE(Place::After, ov::intel_cpu::pass::FuseLoadStoreConvert, ov::intel_cpu::pass::SetBrgemmCopyBBuffersShape); + // ov::pass::Manager magr; + // std::string xmlo = "original.xml"; + // std::string bino = "original.bin"; + // magr.register_pass(xmlo, bino); + // magr.run_passes(snippetAttrs.snippet->body_ptr()); + schedule = snippetAttrs.snippet->generate_from_linear_ir(std::make_shared(), backend_passes, reinterpret_cast(jcp)); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 8d28165b924dd5..de25836e4b0417 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -67,7 +67,7 @@ #include "transformations/op_conversions/hswish_decomposition.hpp" #include "transformations/op_conversions/gru_cell_decomposition.hpp" #include "transformations/op_conversions/lstm_cell_decomposition.hpp" -#include "transformations/op_conversions/gn_decomposition.hpp" +#include "transformations/op_conversions/group_normalization_decomposition.hpp" #include "transformations/op_conversions/mvn6_decomposition.hpp" #include "transformations/op_conversions/normalize_l2_decomposition.hpp" #include "transformations/op_conversions/reduce_l1_decomposition.hpp" @@ -471,11 +471,14 @@ void Transformations::PreLpt(const std::vector& defaultPrecis }, ov::pass::NormalizeL2Decomposition); + // todo: only support f32 in first version CPU_SET_CALLBACK_X64(manager, [](const_node_ptr &node) -> bool { - return !node->is_dynamic() && node->get_output_element_type(0) == element::f32; + return !node->is_dynamic() && + ov::snippets::pass::TokenizeSnippets::get_supported_element_types().count(node->get_element_type()) != 0; }, - ov::pass::GNDecomposition); + ov::pass::GroupNormalizationDecomposition); + CPU_ENABLE_PASS_COMMON(manager, ov::pass::SoftmaxDecomposition); CPU_SET_CALLBACK_COMMON(manager, [](const_node_ptr &node) -> bool { diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/group_normalization.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/group_normalization.cpp index 68f573383c00d1..df2416102e450b 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/group_normalization.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/group_normalization.cpp @@ -1,33 +1,37 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "single_layer_tests/group_normalization.hpp" - -using namespace ov::test::subgraph; +#include "single_op_tests/group_normalization.hpp" namespace { +using ov::test::GroupNormalizationTest; const std::vector netPrecisions = { ov::element::f32, - ov::element::bf16, - ov::element::i8 + // ov::element::bf16, // remove specific merge convert + // ov::element::i8 // ref impl does not support int8 precision +}; + +// static shapes +const std::vector staticInputShapes = { + {3, 8, 3}, + {3, 8, 8}, + {3, 8, 16}, + {3, 8, 21}, + {1, 4, 8, 8}, + {1, 8, 1, 22}, + {3, 16, 1, 33}, + {1, 4, 1, 1, 34}, + {1, 8, 1, 8, 2, 2}, + {1, 8, 1, 8, 2, 2, 2}, }; -const std::vector inputShapes = { - // static shape - {{1, 4, 1, 8}, {{1, 4, 1, 8}}}, - {{3, 8, 2, 32}, {{3, 8, 2, 32}}}, - {{3, 8, 16, 8, 4}, {{3, 8, 16, 8, 4}}}, - {{3, 8, 16, 8, 64}, {{3, 8, 16, 8, 64}}}, - {{3, 8, 16, 100, 4}, {{3, 8, 16, 100, 4}}}, - {{3, 16, 16, 8, 4}, {{3, 16, 16, 8, 4}}}, - {{1, 8, 8}, {{1, 8, 8}}}, - {{1, 8, 1, 8, 2}, {{1, 8, 1, 8, 2}}}, - {{1, 8, 1, 8, 2, 2}, {{1, 8, 1, 8, 2, 2}}}, - {{1, 8, 1, 8, 2, 2, 2}, {{1, 8, 1, 8, 2, 2, 2}}}, - // dynmaic shape +// dynmaic shapes +const std::vector DynamicInputShapes = { + {{-1, -1, -1}, {{1, 8, 22}, {2, 4, 7}, {1, 8, 22}}}, {{-1, -1, -1, -1}, {{1, 16, 8, 8}, {2, 8, 4, 4}, {1, 16, 8, 8}}}, - {{{1, 4}, 16, -1, -1}, {{1, 16, 6, 6}, {4, 16, 10, 10}, {1, 16, 6, 6}}} + {{{1, 4}, {4, 16}, -1, -1}, {{1, 4, 6, 6}, {4, 16, 10, 10}, {1, 4, 6, 6}}}, + {{-1, -1, -1, -1, -1}, {{1, 16, 7, 7, 1}, {2, 8, 4, 4, 1}, {1, 16, 7, 7, 1}}}, }; const std::vector numGroups = { @@ -39,12 +43,25 @@ const std::vector epsilon = { }; INSTANTIATE_TEST_SUITE_P( - smoke_GroupNormalization, + smoke_GroupNormalizationStatic, + GroupNormalizationTest, + testing::Combine(testing::ValuesIn(netPrecisions), + ::testing::Values(ov::element::undefined), + ::testing::Values(ov::element::undefined), + testing::ValuesIn(ov::test::static_shapes_to_test_representation(staticInputShapes)), + testing::ValuesIn(numGroups), + testing::ValuesIn(epsilon), + testing::Values(ov::test::utils::DEVICE_CPU), + testing::Values(ov::AnyMap())), + GroupNormalizationTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_GroupNormalizationDyn, GroupNormalizationTest, testing::Combine(testing::ValuesIn(netPrecisions), ::testing::Values(ov::element::undefined), ::testing::Values(ov::element::undefined), - testing::ValuesIn(inputShapes), + testing::ValuesIn(DynamicInputShapes), testing::ValuesIn(numGroups), testing::ValuesIn(epsilon), testing::Values(ov::test::utils::DEVICE_CPU), diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 8280bdfe251783..cbdee358936444 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -292,6 +292,8 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(.*Extension.OnnxModelWithExtensionFromDSO.*)"); retVector.emplace_back(R"(.*ONNXQuantizedModels/QuantizedModelsTests.MaxPool.*)"); retVector.emplace_back(R"(.*ONNXQuantizedModels/QuantizedModelsTests.Convolution.*)"); + // Ticket: 134601 + retVector.emplace_back(R"(.*smoke_GroupNormalization.*)"); } // invalid test: checks u8 precision for runtime graph, while it should be f32 retVector.emplace_back(R"(smoke_NegativeQuantizedMatMulMultiplyFusion.*)"); diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/group_normalization.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/group_normalization.hpp index 233ed080bf28e0..612c53db90ab39 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/group_normalization.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/group_normalization.hpp @@ -81,7 +81,7 @@ class GroupNormalizationTest : public testing::WithParamInterfaceov::Shape { return {s[1]}; }); InputShape biasInputShape { - ov::PartialShape{shape.first[1]}, + shape.first.is_dynamic() ? ov::PartialShape{shape.first[1]} : shape.first, std::move(biasShape) }; return biasInputShape;