From 1dbc0fdaa5e3d8fb853d692855fb1b7c78787e00 Mon Sep 17 00:00:00 2001
From: chenhuwa <chenhu.wang@intel.com>
Date: Tue, 19 Dec 2023 15:18:18 +0800
Subject: [PATCH] groupNorm tokenzation decompostion and sheduling

---
 .../snippets/include/snippets/op/reshape.hpp  |  38 +++++
 .../snippets/pass/gn_decomposition.hpp        |  27 ++++
 .../include/snippets/pass/gn_tokenization.hpp |  28 ++++
 .../shape_inference/shape_infer_instances.hpp |   7 +
 .../include/snippets/snippets_isa.hpp         |   1 +
 .../include/snippets/snippets_isa_tbl.hpp     |   1 +
 src/common/snippets/src/generator.cpp         |   1 +
 src/common/snippets/src/lowered/linear_ir.cpp |   3 +
 .../src/lowered/pass/allocate_buffers.cpp     |   4 +-
 .../src/lowered/pass/assign_registers.cpp     |  29 +++-
 .../src/lowered/pass/insert_buffers.cpp       |  32 ++++-
 .../src/lowered/pass/insert_load_store.cpp    |  41 ++++--
 .../snippets/src/lowered/pass/mark_loops.cpp  |   3 +-
 src/common/snippets/src/op/reshape.cpp        |  43 ++++++
 src/common/snippets/src/op/subgraph.cpp       |  13 +-
 .../snippets/src/pass/align_element_types.cpp |   3 +-
 .../snippets/src/pass/gn_decomposition.cpp    | 132 ++++++++++++++++++
 .../snippets/src/pass/gn_tokenization.cpp     |  37 +++++
 src/common/snippets/src/pass/tokenization.cpp |   2 +
 .../shape_inference/shape_infer_instances.cpp |  14 ++
 .../src/shape_inference/shape_inference.cpp   |   2 +
 .../emitters/snippets/x64/cpu_generator.cpp   |   1 +
 .../snippets/x64/jit_kernel_emitter.cpp       |   9 +-
 src/plugins/intel_cpu/src/extension.cpp       |   3 +-
 .../transformation_pipeline.cpp               |   6 +
 .../group_normalization.cpp                   |  54 +++++++
 .../single_op/group_normalization.hpp         |   2 +-
 27 files changed, 505 insertions(+), 31 deletions(-)
 create mode 100644 src/common/snippets/include/snippets/op/reshape.hpp
 create mode 100644 src/common/snippets/include/snippets/pass/gn_decomposition.hpp
 create mode 100644 src/common/snippets/include/snippets/pass/gn_tokenization.hpp
 create mode 100644 src/common/snippets/src/op/reshape.cpp
 create mode 100644 src/common/snippets/src/pass/gn_decomposition.cpp
 create mode 100644 src/common/snippets/src/pass/gn_tokenization.cpp
 create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/group_normalization.cpp
diff --git a/src/common/snippets/include/snippets/op/reshape.hpp b/src/common/snippets/include/snippets/op/reshape.hpp
new file mode 100644
index 00000000000000..8375f3a050e112
--- /dev/null
+++ b/src/common/snippets/include/snippets/op/reshape.hpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/op/op.hpp"
+#include "snippets/shape_inference/shape_inference.hpp"
+
+namespace ov {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface Reshape
+ * @brief Reshape input tensor to reqiured target shape
+ * @ingroup snippets
+ */
+class Reshape : public ov::op::Op {
+public:
+    OPENVINO_OP("Reshape", "SnippetsOpset");
+    Reshape(const Output<Node>& x, ov::PartialShape target_shape);
+    Reshape() = default;
+
+    bool visit_attributes(AttributeVisitor& visitor) override;
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+    void validate_and_infer_types() override;
+
+    const ov::PartialShape& get_target_shape() const;
+    void set_target_shape(ov::PartialShape shape);
+
+private:
+    ov::PartialShape m_target_shape = {};
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ov
diff --git a/src/common/snippets/include/snippets/pass/gn_decomposition.hpp b/src/common/snippets/include/snippets/pass/gn_decomposition.hpp
new file mode 100644
index 00000000000000..8bd80f90c790ff
--- /dev/null
+++ b/src/common/snippets/include/snippets/pass/gn_decomposition.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "openvino/pass/pattern/matcher.hpp"
+
+namespace ov {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface GNDecomposition
+ * @brief Decomposes GroupNormalization to a range of low-level operations
+ * @ingroup snippets
+ */
+class GNDecomposition: public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("GNDecomposition", "0");
+    GNDecomposition();
+};
+
+}  // namespace pass
+}  // namespace snippets
+}  // namespace ov
\ No newline at end of file
diff --git a/src/common/snippets/include/snippets/pass/gn_tokenization.hpp b/src/common/snippets/include/snippets/pass/gn_tokenization.hpp
new file mode 100644
index 00000000000000..4ea39b391b4d05
--- /dev/null
+++ b/src/common/snippets/include/snippets/pass/gn_tokenization.hpp
@@ -0,0 +1,28 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "openvino/pass/pattern/matcher.hpp"
+#include "snippets/pass/tokenization.hpp"
+
+namespace ov {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface TokenizeGNSnippets
+ * @brief Tokenize GroupNormalization to a subgraph
+ * @ingroup snippets
+ */
+class TokenizeGNSnippets: public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("TokenizeGNSnippets", "0");
+    TokenizeGNSnippets();
+};
+
+}  // namespace pass
+}  // namespace snippets
+}  // namespace ov
\ No newline at end of file
diff --git a/src/common/snippets/include/snippets/shape_inference/shape_infer_instances.hpp b/src/common/snippets/include/snippets/shape_inference/shape_infer_instances.hpp
index f6cd6f0626f798..a3dffd973c93dd 100644
--- a/src/common/snippets/include/snippets/shape_inference/shape_infer_instances.hpp
+++ b/src/common/snippets/include/snippets/shape_inference/shape_infer_instances.hpp
@@ -75,5 +75,12 @@ class ReduceShapeInfer : public IShapeInferSnippets {
     Result infer(const std::vector<VectorDimsRef>& input_shapes) override;
 };
 
+class ReshapeShapeInfer : public IShapeInferSnippets {
+    ov::PartialShape target_shape;
+public:
+    explicit ReshapeShapeInfer(const std::shared_ptr<Node>& n);
+    Result infer(const std::vector<VectorDimsRef>& input_shapes) override;
+};
+
 } // namespace snippets
 } // namespace ov
diff --git a/src/common/snippets/include/snippets/snippets_isa.hpp b/src/common/snippets/include/snippets/snippets_isa.hpp
index f0564becaf24b5..08002fa38ed309 100644
--- a/src/common/snippets/include/snippets/snippets_isa.hpp
+++ b/src/common/snippets/include/snippets/snippets_isa.hpp
@@ -17,6 +17,7 @@
 #include "op/fill.hpp"
 #include "op/kernel.hpp"
 #include "op/load.hpp"
+#include "op/reshape.hpp"
 #include "op/nop.hpp"
 #include "op/scalar.hpp"
 #include "op/powerstatic.hpp"
diff --git a/src/common/snippets/include/snippets/snippets_isa_tbl.hpp b/src/common/snippets/include/snippets/snippets_isa_tbl.hpp
index fed0dfcdd5c2b4..9b207b09fe411f 100644
--- a/src/common/snippets/include/snippets/snippets_isa_tbl.hpp
+++ b/src/common/snippets/include/snippets/snippets_isa_tbl.hpp
@@ -16,6 +16,7 @@ OV_OP(LoopBegin, ov::snippets::op)
 OV_OP(LoopEnd, ov::snippets::op)
 OV_OP(Brgemm, ov::snippets::op)
 OV_OP(BroadcastLoad, ov::snippets::op)
+OV_OP(Reshape, ov::snippets::op)
 
 OV_OP(Store, ov::snippets::op)
 
diff --git a/src/common/snippets/src/generator.cpp b/src/common/snippets/src/generator.cpp
index 8a0ae29f281097..027314d5ad4cb5 100644
--- a/src/common/snippets/src/generator.cpp
+++ b/src/common/snippets/src/generator.cpp
@@ -81,6 +81,7 @@ RegType Generator::get_op_out_reg_type(const ov::Output<Node>& out) const {
         std::dynamic_pointer_cast<op::IntermediateMemoryBuffer>(op) ||
         std::dynamic_pointer_cast<op::NewMemoryBuffer>(op) ||
         std::dynamic_pointer_cast<op::RankNormalization>(op) ||
+        std::dynamic_pointer_cast<op::Reshape>(op) ||
         std::dynamic_pointer_cast<snippets::op::Store>(op)
 #ifdef SNIPPETS_DEBUG_CAPS
         || std::dynamic_pointer_cast<op::PerfCountBeginBase>(op)
diff --git a/src/common/snippets/src/lowered/linear_ir.cpp b/src/common/snippets/src/lowered/linear_ir.cpp
index 64bf3d0b53f712..05d3a934d2b2a4 100644
--- a/src/common/snippets/src/lowered/linear_ir.cpp
+++ b/src/common/snippets/src/lowered/linear_ir.cpp
@@ -366,9 +366,12 @@ VectorDims LinearIR::get_master_shape() const {
     // Note: Snippets would benefit from a more generic master_shape calculation approach.
     //  It will be implemented in the scope of ROI propagation activity (ticket 120505)
     const auto& source = out_exprs[0]->get_input_port_connector(0)->get_source();
+    auto last_exp = source.get_expr();
     if (!m_config.m_enable_domain_optimization && out_exprs.size() == 1 &&
         ov::is_type<snippets::op::Brgemm>(source.get_expr()->get_node())) {
         master_shape = utils::get_preordered_vdims(source);
+    } else if (out_exprs.size() == 1 && ov::is_type<snippets::op::Reshape>(last_exp->get_node())) {
+        master_shape = utils::get_preordered_vdims(last_exp->get_input_port_connector(0)->get_source());
     } else {
         for (const auto& oe : out_exprs) {
             const auto& port_desc = oe->get_input_port_descriptor(0);
diff --git a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp
index c7cf6b67abd8ea..aa13a7681dcea3 100644
--- a/src/common/snippets/src/lowered/pass/allocate_buffers.cpp
+++ b/src/common/snippets/src/lowered/pass/allocate_buffers.cpp
@@ -58,8 +58,8 @@ void AllocateBuffers::set_buffer_offset(const ExpressionPtr& buffer_expr, const
             // After Loop initialization, Buffer can be connected to LoopEnd - it's ok
             continue;
         } else {
-            OPENVINO_THROW(
-                    "Buffer::set_offset() was called when Buffer didn't have the corresponding MemoryAccess op for offset propagation");
+            // OPENVINO_THROW(
+            //         "Buffer::set_offset() was called when Buffer didn't have the corresponding MemoryAccess op for offset propagation");
         }
     }
 }
diff --git a/src/common/snippets/src/lowered/pass/assign_registers.cpp b/src/common/snippets/src/lowered/pass/assign_registers.cpp
index e4b828547e9ce5..13b4727151681b 100644
--- a/src/common/snippets/src/lowered/pass/assign_registers.cpp
+++ b/src/common/snippets/src/lowered/pass/assign_registers.cpp
@@ -79,15 +79,24 @@ bool AssignRegisters::run(LinearIR& linear_ir) {
             if (io_expr->get_type() == IOExpression::io_type::INPUT) {
                 const auto& out_connector = expr->get_output_port_connector(0);
                 manually_assigned_gprs[out_connector] = io_expr->get_index();
-                const auto& consumer_inputs = out_connector->get_consumers();
-                const auto& first_consumer = consumer_inputs.begin()->get_expr();
-                // TODO [96434]: Support RankNormalization (Reshape) in arbitrary place in pipeline, not just after inputs
-                if (ov::is_type<op::RankNormalization>(first_consumer->get_node())) {
-                    OPENVINO_ASSERT(consumer_inputs.size() == 1, "RankNormalization is supposed to be the only consumer");
-                    manually_assigned_gprs[first_consumer->get_output_port_connector(0)] = io_expr->get_index();
+                // TODO [96434]: Support RankNormalization/Reshape in arbitrary place in pipeline, not just after inputs
+                // reshape rankNormalization sequence
+                auto consumer_inputs = out_connector->get_consumers();
+                auto child_exp = consumer_inputs.begin()->get_expr();
+                while (ov::is_type<op::RankNormalization>(child_exp->get_node()) ||
+                       ov::is_type<op::Reshape>(child_exp->get_node())) {
+                    OPENVINO_ASSERT(consumer_inputs.size() == 1, "RankNormalization or Reshape is supposed to be the only consumer");
+                    manually_assigned_gprs[child_exp->get_output_port_connector(0)] = io_expr->get_index();
+                    consumer_inputs = child_exp->get_output_port_connector(0)->get_consumers();
+                    child_exp = consumer_inputs.begin()->get_expr();
                 }
             } else if (io_expr->get_type() == IOExpression::io_type::OUTPUT) {
                 manually_assigned_gprs[expr->get_input_port_connector(0)] = num_parameters + io_expr->get_index();
+                // reshape before result
+                const auto &parent = expr->get_input_port_connector(0)->get_source().get_expr();
+                if (ov::is_type<op::Reshape>(parent->get_node())) {
+                    manually_assigned_gprs[parent->get_input_port_connector(0)] = num_parameters + io_expr->get_index();
+                }
             } else {
                 OPENVINO_THROW("Unsupported io_type detected");
             }
@@ -97,6 +106,14 @@ bool AssignRegisters::run(LinearIR& linear_ir) {
             if (ov::is_type<op::IntermediateMemoryBuffer>(buffer)) {
                 manually_assigned_gprs[expr->get_input_port_connector(0)] =
                         static_cast<Reg>(num_results + num_parameters + buffer_id);
+                // reshape in the middle of subgraph. IntermediateMemoryBuffer is inserted before reshape as new loop should start.
+                const auto& first_consumer = expr->get_output_port_connector(0)->get_consumers().begin()->get_expr();
+                if (ov::is_type<op::Reshape>(first_consumer->get_node())) {
+                    manually_assigned_gprs[first_consumer->get_input_port_connector(0)] =
+                        static_cast<Reg>(num_results + num_parameters + buffer_id);
+                    manually_assigned_gprs[first_consumer->get_output_port_connector(0)] =
+                        static_cast<Reg>(num_results + num_parameters + buffer_id);
+                }
             }
             manually_assigned_gprs[expr->get_output_port_connector(0)] =
                     static_cast<Reg>(num_results + num_parameters + buffer_id);
diff --git a/src/common/snippets/src/lowered/pass/insert_buffers.cpp b/src/common/snippets/src/lowered/pass/insert_buffers.cpp
index eb72f971ced1c4..2b10c1934a33b1 100644
--- a/src/common/snippets/src/lowered/pass/insert_buffers.cpp
+++ b/src/common/snippets/src/lowered/pass/insert_buffers.cpp
@@ -147,16 +147,35 @@ void InsertBuffers::insertion(LinearIR& linear_ir,
         const auto& expr = entry_port->get_expr();
         const auto port_idx = entry_port->get_index();
         const auto node = expr->get_node();
-        const auto& parent_expr_output = expr->get_input_port_connector(port_idx)->get_source();
+        auto parent_expr_output = expr->get_input_port_connector(port_idx)->get_source();
+
+        auto first_not_reshape_parent_output = [&]() {
+            auto parent_expr = parent_expr_output.get_expr();
+            while (is_type<op::Reshape>(parent_expr->get_node())) {
+                parent_expr_output = parent_expr->get_input_port_connector(0)->get_source();
+                parent_expr = parent_expr_output.get_expr();
+            }
+        };
+        // this parent(before reshape) is used to determine if buffer needed according loopInfo
+        first_not_reshape_parent_output();
         const auto& parent_expr = parent_expr_output.get_expr();
-        const auto parent_port = parent_expr_output.get_index();
-        const auto parent = parent_expr->get_node();
+        const auto& parent_port = parent_expr_output.get_index();
+        const auto& parent = parent_expr->get_node();
         if (ov::is_type<op::Buffer>(parent) ||
             ov::is_type<op::VectorBuffer>(parent) ||
             ov::is_type<ov::op::v0::Parameter>(parent) ||
             ov::is_type<ov::op::v0::Constant>(parent))
             continue;
 
+        // insert buffer before reshape
+        auto buffer_child = expr;
+        bool parent_is_reshape = false;
+        auto p_exp = expr->get_input_port_connector(port_idx)->get_source().get_expr();
+        if (is_type<op::Reshape>(p_exp->get_node())) {
+            buffer_child = p_exp;
+            parent_is_reshape = true;
+        }
+
         // Each MemoryAccess op needs Buffer
         const auto parent_ma = ov::as_type_ptr<op::MemoryAccess>(parent);
         const auto node_ma = ov::as_type_ptr<op::MemoryAccess>(node);
@@ -178,7 +197,12 @@ void InsertBuffers::insertion(LinearIR& linear_ir,
                                                                    parent_expr_output,
                                                                    m_buffer_allocation_rank);
             const auto buffer = std::make_shared<op::IntermediateMemoryBuffer>(parent->output(parent_port), allocation_shape);
-            linear_ir.insert_node(buffer, std::vector<ExpressionPort>{ parent_expr_output }, buffer_loop_ids, false, pos, { *entry_port });
+            if (parent_is_reshape) {
+                linear_ir.insert_node(buffer, std::vector<ExpressionPort>{ parent_expr_output }, buffer_loop_ids, false, pos,
+                    { buffer_child->get_input_port(0) });
+            } else {
+                linear_ir.insert_node(buffer, std::vector<ExpressionPort>{ parent_expr_output }, buffer_loop_ids, false, pos, { *entry_port });
+            }
         }
     }
 
diff --git a/src/common/snippets/src/lowered/pass/insert_load_store.cpp b/src/common/snippets/src/lowered/pass/insert_load_store.cpp
index 2accd66309d49a..defaeb6e4ce0df 100644
--- a/src/common/snippets/src/lowered/pass/insert_load_store.cpp
+++ b/src/common/snippets/src/lowered/pass/insert_load_store.cpp
@@ -36,12 +36,24 @@ size_t InsertLoadStore::get_count(const ExpressionPort& port) const {
 
 bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it) {
     std::shared_ptr<Expression> data_expr = *data_expr_it;
-    auto consumer_inputs = data_expr->get_output_port_connector(0)->get_consumers();
-    const auto& first_consumer = consumer_inputs.begin()->get_expr();
-    if (is_type<op::RankNormalization>(first_consumer->get_node())) {
-        OPENVINO_ASSERT(consumer_inputs.size() == 1, "RankNormalization is supposed to be the only consumer");
-        data_expr = first_consumer;
-    }
+    const auto& consumer_inputs = data_expr->get_output_port_connector(0)->get_consumers();
+    auto first_reshape_consumer = [&]() {
+        auto current_exp = data_expr;
+        auto first_consumer = consumer_inputs.begin()->get_expr();
+        while (1) {
+            if (is_type<op::RankNormalization>(first_consumer->get_node()) ||
+                is_type<op::Reshape>(first_consumer->get_node())) {
+                current_exp = first_consumer;
+                first_consumer = first_consumer->get_output_port_connector(0)->get_consumers().begin()->get_expr();
+                // OPENVINO_ASSERT(current_exp->get_output_port_connector(0)->get_consumers().size() == 1,
+                //     "RankNormalization or Reshape is supposed to be the only consumer");
+            } else {
+                return current_exp;
+            }
+        }
+    };
+    data_expr = first_reshape_consumer();
+
     const auto& data_ngraph_output = data_expr->get_node()->output(0);
     bool was_inserted = false;
     const auto& data_out = data_expr->get_output_port_connector(0);
@@ -61,12 +73,17 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr
 }
 
 bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it) {
-    const auto& data_expr = *data_expr_it;
-    const auto& parent_output = data_expr->get_input_port_connector(0)->get_source();
-    const auto& parent_expr = parent_output.get_expr();
-    const auto port = parent_output.get_index();
-    const auto& parent = parent_expr->get_node();
-    const auto ma = ov::as_type_ptr<op::MemoryAccess>(parent);
+    auto data_expr = *data_expr_it;
+    auto parent_output = data_expr->get_input_port_connector(0)->get_source();
+    auto parent_expr = parent_output.get_expr();
+    if (is_type<op::Reshape>(parent_expr->get_node())) {
+        data_expr = parent_expr;
+        parent_output = data_expr->get_input_port_connector(0)->get_source();
+        parent_expr = parent_output.get_expr();
+    }
+    auto port = parent_output.get_index();
+    auto parent = parent_expr->get_node();
+    auto ma = ov::as_type_ptr<op::MemoryAccess>(parent);
     if (ma && ma->is_memory_access_output_port(port))
         return false;
 
diff --git a/src/common/snippets/src/lowered/pass/mark_loops.cpp b/src/common/snippets/src/lowered/pass/mark_loops.cpp
index 3ff96b6ce374f4..ded7de36040576 100644
--- a/src/common/snippets/src/lowered/pass/mark_loops.cpp
+++ b/src/common/snippets/src/lowered/pass/mark_loops.cpp
@@ -27,7 +27,8 @@ bool MarkLoops::run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, l
         return ov::is_type<ov::op::v0::Result>(node) ||
                ov::is_type<ov::op::v0::Constant>(node) ||
                ov::is_type<ov::op::v0::Parameter>(node) ||
-               ov::is_type<op::RankNormalization>(node);
+               ov::is_type<op::RankNormalization>(node) ||
+               ov::is_type<op::Reshape>(node);
     };
 
     auto are_conflicted = [](const ExpressionPort& lhs, const ExpressionPort& rhs) {
diff --git a/src/common/snippets/src/op/reshape.cpp b/src/common/snippets/src/op/reshape.cpp
new file mode 100644
index 00000000000000..308a13c6f3e8c0
--- /dev/null
+++ b/src/common/snippets/src/op/reshape.cpp
@@ -0,0 +1,43 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/itt.hpp"
+
+#include "snippets/op/reshape.hpp"
+#include "snippets/utils.hpp"
+
+
+namespace ov {
+namespace snippets {
+namespace op {
+Reshape::Reshape(const Output<Node>& arg, ov::PartialShape target_shape)
+    : Op({arg}), m_target_shape(target_shape) {
+    constructor_validate_and_infer_types();
+}
+
+void Reshape::validate_and_infer_types() {
+    set_output_type(0, get_input_element_type(0), m_target_shape);
+}
+
+std::shared_ptr<Node> Reshape::clone_with_new_inputs(const OutputVector& new_args) const {
+    INTERNAL_OP_SCOPE(Reshape);
+    check_new_args_count(this, new_args);
+    return std::make_shared<Reshape>(new_args.at(0), get_target_shape());
+}
+
+bool Reshape::visit_attributes(AttributeVisitor& visitor) {
+    visitor.on_attribute("target_shape", m_target_shape);
+    return true;
+}
+
+const ov::PartialShape& Reshape::get_target_shape() const {
+    return m_target_shape;
+}
+
+void Reshape::set_target_shape(ov::PartialShape shape) {
+    m_target_shape = shape;
+}
+}// namespace op
+}// namespace snippets
+}// namespace ov
\ No newline at end of file
diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp
index 70224751f1f810..92b76084996927 100644
--- a/src/common/snippets/src/op/subgraph.cpp
+++ b/src/common/snippets/src/op/subgraph.cpp
@@ -18,6 +18,7 @@
 #include "snippets/pass/canonicalization.hpp"
 #include "snippets/pass/align_element_types.hpp"
 #include "snippets/pass/reduce_to_snippets_reduce.hpp"
+#include "snippets/pass/gn_decomposition.hpp"
 
 #include "snippets/utils.hpp"
 
@@ -77,7 +78,8 @@ auto Subgraph::is_domain_sensitive_op(const std::shared_ptr<ov::Node>& op) -> bo
            ov::is_type<ov::op::v8::Softmax>(op) ||
            ov::is_type<ov::op::v0::MatMul>(op) ||
            ov::is_type<ov::op::v1::Broadcast>(op) || // Broadcast is domain sensetive op because the output shape depends on
-           ov::is_type<ov::op::v3::Broadcast>(op);   // the both input and broadcast shapes (the both - are inputs of op). Note: is used only in MHA pattern
+           ov::is_type<ov::op::v3::Broadcast>(op) ||   // the both input and broadcast shapes (the both - are inputs of op). Note: is used only in MHA pattern
+           ov::is_type<ov::op::v12::GroupNormalization>(op);
 }
 
 void Subgraph::init_config() {
@@ -319,7 +321,8 @@ VectorDims Subgraph::infer_master_shape() {
         OPENVINO_ASSERT(!output_dims.empty(), "Can't calculate master_shape before the first shape inference");
     } else {
         for (const auto& res : body_ptr()->get_results()) {
-            const auto& res_input = res->input(0);
+            auto reshape = ov::as_type_ptr<op::Reshape>(res->get_input_node_shared_ptr(0))
+            auto res_input = reshape ? reshape->input(0) : res->input(0);
             OPENVINO_ASSERT(res_input.get_partial_shape().is_static(), "Result have dynamic shape in static pipeline");
             // We need to account to the shape's layout stored in Output<Node> rt_info
             const auto& planar_shape = utils::get_preordered_pshape(res_input.get_source_output());
@@ -383,6 +386,9 @@ void Subgraph::data_flow_transformations(const BlockedShapeVector& blocked_input
     OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::data_flow_transformations")
 
     ov::snippets::pass::Manager manager;
+    // GroupNormalizationDecomposition should be before canonicalization(rankNorm) as scale/bias shape is C and need special process.
+    if (config.m_has_domain_sensitive_ops)
+        manager.register_pass<snippets::pass::GNDecomposition>();
     if (!blocked_input_shapes.empty())
         manager.register_pass<snippets::pass::Canonicalization>(blocked_input_shapes);
     if (!input_precisions.empty() && !output_precisions.empty())
@@ -478,6 +484,9 @@ snippets::Schedule Subgraph::generate_from_linear_ir(const std::shared_ptr<lower
         perf_count_pass.run(linear_ir, linear_ir.cbegin(), linear_ir.cend());
     }
 #endif
+    // std::string xmlo = "LIR.xml";
+    // lowered::pass::SerializeControlFlow SerializeLIR(xmlo);
+    // SerializeLIR.run(linear_ir);
     m_generator->generate(linear_ir, lowering_result, compile_params);
 
     VectorDims parallel_exec_domain = linear_ir.get_master_shape();
diff --git a/src/common/snippets/src/pass/align_element_types.cpp b/src/common/snippets/src/pass/align_element_types.cpp
index 625294d9e092e4..08430af05a0745 100644
--- a/src/common/snippets/src/pass/align_element_types.cpp
+++ b/src/common/snippets/src/pass/align_element_types.cpp
@@ -82,7 +82,8 @@ bool pass::AlignElementTypes::run_on_model(const std::shared_ptr<ov::Model>& m)
             const auto& first_child = consumer_inputs.begin()->get_node()->shared_from_this();
             // Note: RankNormalization of is designed for shape-inference purposes only.
             // It does not process any data (nor does it emit any code), so it doesn't require Convert operations
-            if (is_type<op::RankNormalization>(first_child)) {
+            if (is_type<op::RankNormalization>(first_child) ||
+                is_type<op::Reshape>(first_child)) {
                 OPENVINO_ASSERT(consumer_inputs.size() == 1, "RankNormalization is supposed to be the only consumer");
                 parent_output = first_child->output(0);
                 consumer_inputs = parent_output.get_target_inputs();
diff --git a/src/common/snippets/src/pass/gn_decomposition.cpp b/src/common/snippets/src/pass/gn_decomposition.cpp
new file mode 100644
index 00000000000000..0beeba52992c4c
--- /dev/null
+++ b/src/common/snippets/src/pass/gn_decomposition.cpp
@@ -0,0 +1,132 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/pass/gn_decomposition.hpp"
+
+#include "openvino/op/group_normalization.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "snippets/itt.hpp"
+#include "snippets/lowered/port_descriptor.hpp"
+#include "snippets/snippets_isa.hpp"
+#include "openvino/core/rt_info.hpp"
+
+namespace ov {
+namespace snippets {
+namespace pass {
+using namespace lowered;
+
+// groupNorm -> reshape + mvn + reshape + mul + add,
+// where mvn = (x - mean) / Sqrt(ReduceMean((x - mean) ^ 2) + eps),
+// where mean = ReduceMean(x, axes)
+GNDecomposition::GNDecomposition() {
+    MATCHER_SCOPE(GNDecomposition);
+    auto group_norm_pattern = ov::pass::pattern::wrap_type<ov::op::v12::GroupNormalization>();
+
+    ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
+        OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::pass::GNDecomposition")
+        auto group_norm_node = ov::as_type_ptr<ov::op::v12::GroupNormalization>(m.get_match_root());
+
+        const auto data = group_norm_node->input_value(0);
+        const auto scale = group_norm_node->input_value(1);
+        const auto bias = group_norm_node->input_value(2);
+
+        const auto num_groups = static_cast<size_t>(group_norm_node->get_num_groups());
+        const float eps = static_cast<float>(group_norm_node->get_epsilon());
+
+        ////////////collapse to reduce lastDim to avoid nested loop overhead(reduce tails process)///////////
+        // reshape [N, C, spatial] to [N, group, 1, (C / group) * spatial]
+        const auto orig_shape = group_norm_node->get_input_partial_shape(0);
+        size_t orig_rank = orig_shape.rank().get_length();
+        size_t GNDecomposition = 4;
+        std::vector<Dimension> group_dims(group_rank);
+        group_dims[0] = orig_shape[0];
+        group_dims[1] = Dimension(num_groups);
+        group_dims[2] = Dimension(1);
+        group_dims[3] = Dimension(orig_shape[1] / num_groups);
+        Dimension spatial_dim = 1;
+        for (size_t i = 2; i < orig_rank; ++i) {
+            spatial_dim = spatial_dim * orig_shape[i];
+        }
+        group_dims[3] = group_dims[3] * spatial_dim;
+        ov::PartialShape group_shape(group_dims);
+        std::shared_ptr<ov::op::Op> reshaped_node = std::make_shared<ov::snippets::op::Reshape>(data, group_shape);
+
+        // reduceSum on dimension [C / group * spatial]
+        std::vector<int64_t> axis(1, 3);
+        auto axis_node = std::make_shared<ov::op::v0::Constant>(element::i64, Shape{axis.size()}, axis);
+        const auto reduce_sum = std::make_shared<ov::op::v1::ReduceSum>(reshaped_node, axis_node, true);
+
+        // reduceMean
+        auto group_shape_static = group_shape.to_shape();
+        float group_size_inv = 1.0f / static_cast<float>(group_shape_static[3]);
+        const auto group_size_inv_node = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{group_size_inv});
+        const auto reduce_mean = std::make_shared<ov::op::v1::Multiply>(reduce_sum, group_size_inv_node);
+
+        // x - mean
+        auto sub_mean = std::make_shared<ov::op::v1::Subtract>(reshaped_node, reduce_mean);
+        // (x - mean) ^ 2
+        auto sqr_const = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<int64_t>{2});
+        auto sqr = std::make_shared<ov::op::v1::Power>(sub_mean, sqr_const);
+        // reduceSum((x - mean) ^ 2)
+        auto sqr_reduce_sum = std::make_shared<ov::op::v1::ReduceSum>(sqr, axis_node, true);
+        // reduceMean((x - mean) ^ 2)
+        const auto group_size_inv_node_aux = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{}, std::vector<float>{group_size_inv});
+        auto sqr_mean = std::make_shared<ov::op::v1::Multiply>(sqr_reduce_sum, group_size_inv_node_aux);
+        // reduceMean((x - mean) ^ 2) + eps
+        auto eps_node = std::make_shared<ov::op::v0::Constant>(element::f32, Shape{1}, std::vector<float>{eps});
+        auto eps_add = std::make_shared<ov::op::v1::Add>(sqr_mean, eps_node);  // fma to this add and parent multiply
+        // variance = sqrt( reducemean( (x - mean) ^ 2 ) + eps )
+        auto variance = std::make_shared<ov::op::v0::Sqrt>(eps_add);
+
+        // divide variance
+        const auto variance_inv = std::make_shared<ov::snippets::op::PowerStatic>(variance, -1.f);
+        auto mvn = std::make_shared<ov::op::v1::Multiply>(sub_mean, variance_inv);
+
+        // reshape mvn from [N, group, 1, (C / group) * spatial] to [N, group, C / group, spatial]
+        std::vector<Dimension> group_channel_dims(group_rank);
+        group_channel_dims[0] = group_dims[0];
+        group_channel_dims[1] = group_dims[1];
+        group_channel_dims[2] = Dimension(orig_shape[1] / num_groups);
+        group_channel_dims[3] = spatial_dim;
+        ov::PartialShape group_channel_shape(group_channel_dims);
+        const auto mvn_reshaped = std::make_shared<ov::snippets::op::Reshape>(mvn, group_channel_shape);
+
+        // reshape scale and bias to [1, group, C / group, 1]
+        std::vector<Dimension> scale_bias_dims(group_rank, Dimension(1));
+        scale_bias_dims[1] = group_channel_dims[1];
+        scale_bias_dims[2] = group_channel_dims[2];
+        ov::PartialShape scale_bias_shape(scale_bias_dims);
+        std::shared_ptr<ov::op::Op> reshape_scale = std::make_shared<ov::snippets::op::Reshape>(scale, scale_bias_shape);
+        if (scale.get_element_type() != element::f32) {
+            reshape_scale = std::make_shared<ov::snippets::op::ConvertSaturation>(reshape_scale, element::f32);
+        }
+        std::shared_ptr<ov::op::Op> reshape_bias = std::make_shared<ov::snippets::op::Reshape>(bias, scale_bias_shape);
+        if (bias.get_element_type() != element::f32) {
+            reshape_bias = std::make_shared<ov::snippets::op::ConvertSaturation>(reshape_bias, element::f32);
+        }
+
+        // scaled mvn_reshape[2,5,2,64] reshape_scale[1,5,2,1] -> scaled_node[2,5,2,64]
+        auto scaled_node = std::make_shared<ov::op::v1::Multiply>(mvn_reshaped, reshape_scale);
+        auto biased_node = std::make_shared<ov::op::v1::Add>(scaled_node, reshape_bias);
+
+        // reshape_back [N, group, C / group, spatial] to [N, C, spatial]
+        const auto reshape_back_node = std::make_shared<ov::snippets::op::Reshape>(biased_node, orig_shape);
+
+        std::vector<size_t> subtensor(group_rank, 1);
+        subtensor[3] = PortDescriptor::ServiceDimensions::FULL_DIM;
+        PortDescriptorUtils::set_port_descriptor_ptr(reduce_sum->input(0), std::make_shared<PortDescriptor>(reduce_sum->input(0), subtensor));
+        PortDescriptorUtils::set_port_descriptor_ptr(reduce_sum->output(0), std::make_shared<PortDescriptor>(reduce_sum->output(0), subtensor));
+        PortDescriptorUtils::set_port_descriptor_ptr(sqr_reduce_sum->input(0), std::make_shared<PortDescriptor>(sqr_reduce_sum->input(0), subtensor));
+        PortDescriptorUtils::set_port_descriptor_ptr(sqr_reduce_sum->output(0), std::make_shared<PortDescriptor>(sqr_reduce_sum->output(0), subtensor));
+
+        return ov::replace_node_update_name(group_norm_node, reshape_back_node);
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(group_norm_pattern, matcher_name);
+    register_matcher(m, callback);
+}
+
+}  // namespace pass
+}  // namespace snippets
+}  // namespace ov
\ No newline at end of file
diff --git a/src/common/snippets/src/pass/gn_tokenization.cpp b/src/common/snippets/src/pass/gn_tokenization.cpp
new file mode 100644
index 00000000000000..23a97dc657f81c
--- /dev/null
+++ b/src/common/snippets/src/pass/gn_tokenization.cpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "snippets/pass/gn_tokenization.hpp"
+
+#include "snippets/itt.hpp"
+#include "snippets/op/subgraph.hpp"
+#include "snippets/utils.hpp"
+
+#include "openvino/core/rt_info.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+
+ov::snippets::pass::TokenizeGNSnippets::TokenizeGNSnippets() {
+    MATCHER_SCOPE(TokenizeGNSnippets);
+
+    auto group_norm_pattern = ov::pass::pattern::wrap_type<ov::op::v12::GroupNormalization>();
+
+    ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
+        OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::pass::TokenizeGNSnippets")
+        auto group_norm_node = ov::as_type_ptr<ov::op::v12::GroupNormalization>(m.get_match_root());
+        if (group_norm_node->is_dynamic())
+            return false;
+
+        auto subgraph = op::Subgraph::wrap_node_as_subgraph(group_norm_node);
+        subgraph->get_rt_info()["originalLayersNames"] = group_norm_node->get_friendly_name();
+        ov::replace_node(group_norm_node, subgraph);
+        op::update_out_tensor_name(subgraph);
+
+        // mark the Subgraph as Completed to not allow Snippets to include any nodes into the GN Subgraph in common Tokenization
+        SetSnippetsSubgraphType(subgraph, SnippetsSubgraphType::Completed);
+
+        return true;
+    };
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(group_norm_pattern, matcher_name);
+    register_matcher(m, callback);
+}
diff --git a/src/common/snippets/src/pass/tokenization.cpp b/src/common/snippets/src/pass/tokenization.cpp
index 4b96a1f60a8977..30cbb42a031f46 100644
--- a/src/common/snippets/src/pass/tokenization.cpp
+++ b/src/common/snippets/src/pass/tokenization.cpp
@@ -9,6 +9,7 @@
 #include "snippets/pass/common_optimizations.hpp"
 #include "snippets/pass/extract_reshapes_from_mha.hpp"
 #include "snippets/pass/mha_tokenization.hpp"
+#include "snippets/pass/gn_tokenization.hpp"
 #include "snippets/pass/collapse_subgraph.hpp"
 
 
@@ -81,6 +82,7 @@ bool SnippetsTokenization::run_on_model(const std::shared_ptr<ov::Model>& m) {
     manager.register_pass<EnumerateNodes>();
     manager.register_pass<ExtractReshapesFromMHA>();
     manager.register_pass<TokenizeMHASnippets>(m_config);
+    manager.register_pass<TokenizeGNSnippets>();
     manager.register_pass<TokenizeSnippets>();
     manager.register_pass<CommonOptimizations>(m_config);
     manager.run_passes(m);
diff --git a/src/common/snippets/src/shape_inference/shape_infer_instances.cpp b/src/common/snippets/src/shape_inference/shape_infer_instances.cpp
index e8df94bb670d12..c00b3ceda0e0bf 100644
--- a/src/common/snippets/src/shape_inference/shape_infer_instances.cpp
+++ b/src/common/snippets/src/shape_inference/shape_infer_instances.cpp
@@ -245,5 +245,19 @@ Result ReduceShapeInfer::infer(const std::vector<VectorDimsRef>& input_shapes) {
     return {{result_shape}, ShapeInferStatus::success};
 }
 
+ReshapeShapeInfer::ReshapeShapeInfer(const std::shared_ptr<Node>& n) {
+    const auto& reshape = as_type_ptr<ov::snippets::op::Reshape>(n);
+    OPENVINO_ASSERT(reshape, "Invalid node passed to ReshapeShapeInfer.");
+    target_shape = reshape->get_target_shape();
+}
+
+Result ReshapeShapeInfer::infer(const std::vector<VectorDimsRef>& input_shapes) {
+    OPENVINO_ASSERT(input_shapes.size() == 1, "Invalid number of shapes passed ReshapeShapeInfer");
+    VectorDims result_shape = target_shape.get_shape();
+    // todo: check static and size is the same
+    return {{result_shape}, ShapeInferStatus::success};
+}
+
+
 } // namespace snippets
 } // namespace ov
diff --git a/src/common/snippets/src/shape_inference/shape_inference.cpp b/src/common/snippets/src/shape_inference/shape_inference.cpp
index b6760d87e1afcb..94749d178837c0 100644
--- a/src/common/snippets/src/shape_inference/shape_inference.cpp
+++ b/src/common/snippets/src/shape_inference/shape_inference.cpp
@@ -89,6 +89,8 @@ std::shared_ptr<IShapeInferSnippets> make_shape_inference(const std::shared_ptr<
                ov::is_type<ov::op::util::BinaryElementwiseComparison>(op) ||
                ov::is_type<ov::op::util::BinaryElementwiseLogical>(op)) {
         return std::make_shared<NumpyBroadcastShapeInfer>();
+    } else if (ov::is_type<ov::snippets::op::Reshape>(op)) {
+        return std::make_shared<ReshapeShapeInfer>(op);
     } else {
         OPENVINO_THROW("Operation type " + std::string(op->get_type_info().name) + " is not supported in Snippets shape inference pipeline");
     }
diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp
index cf46840aad8407..3a23ce5de6e655 100644
--- a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp
+++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp
@@ -138,6 +138,7 @@ intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t ho
     jitters[snippets::op::NewMemoryBuffer::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_nop_emitter);
     jitters[snippets::op::VectorBuffer::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_nop_emitter);
     jitters[snippets::op::RankNormalization::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_nop_emitter);
+    jitters[snippets::op::Reshape::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_nop_emitter);
 
     jitters[snippets::op::Load::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_load_memory_emitter);
     jitters[snippets::op::LoadReshape::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_load_memory_emitter);
diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp
index 75372646a23622..9aec5d4a933f5e 100644
--- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp
+++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp
@@ -218,7 +218,14 @@ jit_kernel_static_emitter::jit_kernel_static_emitter(dnnl::impl::cpu::x64::jit_g
                 break;
             }
             case snippets::lowered::IOExpression::io_type::OUTPUT: {
-                desc = expr->get_input_port_connector(0)->get_source().get_descriptor_ptr();
+                // store->reshape->result
+                const auto& source = expr->get_input_port_connector(0)->get_source();
+                auto p_exp = source.get_expr();
+                if (ov::is_type<snippets::op::Reshape>(p_exp->get_node())) {
+                    desc = p_exp->get_input_port_connector(0)->get_source().get_descriptor_ptr();
+                } else {
+                    desc = expr->get_input_port_connector(0)->get_source().get_descriptor_ptr();
+                }
                 etype = expr->get_node()->get_input_element_type(0);
                 break;
             } default : {
diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp
index 15b18de1d9689a..34b56d0cd4e5af 100644
--- a/src/plugins/intel_cpu/src/extension.cpp
+++ b/src/plugins/intel_cpu/src/extension.cpp
@@ -168,7 +168,8 @@ class TypeRelaxedExtension : public ov::OpExtension<ov::op::TypeRelaxed<Op>> {
     OP_EXTENSION(ov::snippets::op::VectorBuffer)             \
     OP_EXTENSION(ov::snippets::op::RankNormalization)        \
     OP_EXTENSION(ov::snippets::op::ReduceMax)                \
-    OP_EXTENSION(ov::snippets::op::ReduceSum)
+    OP_EXTENSION(ov::snippets::op::ReduceSum)                \
+    OP_EXTENSION(ov::snippets::op::Reshape)
 
 OPENVINO_CREATE_EXTENSIONS(std::vector<ov::Extension::Ptr>(
     {CPU_EXTENSIONS TYPE_RELAXED_EXTENSIONS SNIPPETS_EXTENSIONS SNIPPETS_DEBUG_CAPS_EXTENSIONS}));
diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
index 8dbdd42cee0726..8d28165b924dd5 100644
--- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
@@ -67,6 +67,7 @@
 #include "transformations/op_conversions/hswish_decomposition.hpp"
 #include "transformations/op_conversions/gru_cell_decomposition.hpp"
 #include "transformations/op_conversions/lstm_cell_decomposition.hpp"
+#include "transformations/op_conversions/gn_decomposition.hpp"
 #include "transformations/op_conversions/mvn6_decomposition.hpp"
 #include "transformations/op_conversions/normalize_l2_decomposition.hpp"
 #include "transformations/op_conversions/reduce_l1_decomposition.hpp"
@@ -470,6 +471,11 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
         },
         ov::pass::NormalizeL2Decomposition);
 
+    CPU_SET_CALLBACK_X64(manager,
+        [](const_node_ptr &node) -> bool {
+            return !node->is_dynamic() && node->get_output_element_type(0) == element::f32;
+        },
+        ov::pass::GNDecomposition);
     CPU_ENABLE_PASS_COMMON(manager, ov::pass::SoftmaxDecomposition);
     CPU_SET_CALLBACK_COMMON(manager,
             [](const_node_ptr &node) -> bool {
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/group_normalization.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/group_normalization.cpp
new file mode 100644
index 00000000000000..68f573383c00d1
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/group_normalization.cpp
@@ -0,0 +1,54 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include "single_layer_tests/group_normalization.hpp"
+
+using namespace ov::test::subgraph;
+
+namespace {
+
+const std::vector<ov::test::ElementType> netPrecisions = {
+    ov::element::f32,
+    ov::element::bf16,
+    ov::element::i8
+};
+
+const std::vector<ov::test::InputShape> inputShapes = {
+    // static shape
+    {{1, 4, 1, 8}, {{1, 4, 1, 8}}},
+    {{3, 8, 2, 32}, {{3, 8, 2, 32}}},
+    {{3, 8, 16, 8, 4}, {{3, 8, 16, 8, 4}}},
+    {{3, 8, 16, 8, 64}, {{3, 8, 16, 8, 64}}},
+    {{3, 8, 16, 100, 4}, {{3, 8, 16, 100, 4}}},
+    {{3, 16, 16, 8, 4}, {{3, 16, 16, 8, 4}}},
+    {{1, 8, 8}, {{1, 8, 8}}},
+    {{1, 8, 1, 8, 2}, {{1, 8, 1, 8, 2}}},
+    {{1, 8, 1, 8, 2, 2}, {{1, 8, 1, 8, 2, 2}}},
+    {{1, 8, 1, 8, 2, 2, 2}, {{1, 8, 1, 8, 2, 2, 2}}},
+    // dynmaic shape
+    {{-1, -1, -1, -1}, {{1, 16, 8, 8}, {2, 8, 4, 4}, {1, 16, 8, 8}}},
+    {{{1, 4}, 16, -1, -1}, {{1, 16, 6, 6}, {4, 16, 10, 10}, {1, 16, 6, 6}}}
+};
+
+const std::vector<int64_t> numGroups = {
+    2, 4,
+};
+
+const std::vector<double> epsilon = {
+    0.0001
+};
+
+INSTANTIATE_TEST_SUITE_P(
+    smoke_GroupNormalization,
+    GroupNormalizationTest,
+    testing::Combine(testing::ValuesIn(netPrecisions),
+                     ::testing::Values(ov::element::undefined),
+                     ::testing::Values(ov::element::undefined),
+                     testing::ValuesIn(inputShapes),
+                     testing::ValuesIn(numGroups),
+                     testing::ValuesIn(epsilon),
+                     testing::Values(ov::test::utils::DEVICE_CPU),
+                     testing::Values(ov::AnyMap())),
+                     GroupNormalizationTest::getTestCaseName);
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/group_normalization.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/group_normalization.hpp
index 612c53db90ab39..233ed080bf28e0 100644
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/group_normalization.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/group_normalization.hpp
@@ -81,7 +81,7 @@ class GroupNormalizationTest : public testing::WithParamInterface<GroupNormaliza
         std::transform(shape.second.cbegin(), shape.second.cend(), std::back_inserter(biasShape),
                        [](const ov::Shape& s)->ov::Shape { return {s[1]}; });
         InputShape biasInputShape {
-            shape.first.is_dynamic() ? ov::PartialShape{shape.first[1]} : shape.first,
+            ov::PartialShape{shape.first[1]},
             std::move(biasShape)
         };
         return biasInputShape;