From 9487cc34d4b8cd20c00e110753f9cc7155a28eb7 Mon Sep 17 00:00:00 2001
From: Evgeniia Nugmanova <evgeniia.nugmanova@intel.com>
Date: Tue, 10 Oct 2023 15:30:51 +0400
Subject: [PATCH] De-Reshape MatMul

---
 .../dereshape_matmul.hpp                      |  67 +++
 .../symbolic_optimizations.hpp                |   8 +
 .../symbolic_transformations/utils.hpp        |  10 +
 .../dereshape_matmul.cpp                      | 335 +++++++++++++++
 .../symbolic_optimizations.cpp                |  69 ++-
 .../symbolic_transformations/utils.cpp        |  17 +
 .../dereshape_matmul.cpp                      | 399 ++++++++++++++++++
 7 files changed, 904 insertions(+), 1 deletion(-)
 create mode 100644 src/common/transformations/include/transformations/symbolic_transformations/dereshape_matmul.hpp
 create mode 100644 src/common/transformations/src/transformations/symbolic_transformations/dereshape_matmul.cpp
 create mode 100644 src/common/transformations/tests/symbolic_transformations/dereshape_matmul.cpp
diff --git a/src/common/transformations/include/transformations/symbolic_transformations/dereshape_matmul.hpp b/src/common/transformations/include/transformations/symbolic_transformations/dereshape_matmul.hpp
new file mode 100644
index 00000000000000..53922ce5df323a
--- /dev/null
+++ b/src/common/transformations/include/transformations/symbolic_transformations/dereshape_matmul.hpp
@@ -0,0 +1,67 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <openvino/pass/graph_rewrite.hpp>
+#include <transformations_visibility.hpp>
+
+namespace ov {
+namespace pass {
+class TRANSFORMATIONS_API DeReshapeMatMul;
+}  // namespace pass
+}  // namespace ov
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief Transformation uses symbol / label information to optimize out Reshape operations surrounding MatMul.
+ * It checks that surrounding Reshapes are only manipulating with batch dimensions of tensor in a do-undo kind of way.
+ *
+ * Example:
+ *   Before:
+ *     [A,B,C,D] -> Reshape -> [A*B,C,D]
+ *                                       MatMul [A*B,C,E] -> Reshape -> [A,B,C,E]
+ *     [A,B,D,E] -> Reshape -> [A*B,D,E]
+ *
+ *   After:
+ *     [A,B,C,D]  ->
+ *                   MatMul -> [A,B,C,E]
+ *     [A,B,D,E]  ->
+ *
+ *  Transformation allows slightly different variations of the pattern on inputs of MatMul.
+ *    - Simplest pattern contains only Reshape operation on MatMul input:
+ *        Reshape -> MatMul
+ *
+ *    - The next acceptable variation is Concat of two inputs on MatMul input:
+ *        Reshape -[-> Concat -]-> MatMul
+ *      This variation would be transformed with realignment of the other input of Concat and the other outputs of
+ *      Concat with the help of Reshape operations
+ *
+ *    - The most complex variation on the MatMul input pattern is with Binary Elementwise Operation with scalar second
+ *      input: Reshape -[-> Concat -]-[-> BEA (scalar) -]-> MatMul
+ *
+ *  Additionally, transformation supports variation of the pattern on output of MatMul. It allows for
+ *  Binary Elementwise Arithmetic operation without second input scalar restriction.
+ *        MatMul -[-> BEA -]-> Reshape
+ *  this pattern variation is only applicable for the case when input reshapes are 4D -> 3D and output reshape is 3D ->
+ *  4D. Additionally, shape labels on output of MatMul should be equal to the input shape labels of the last Reshape,
+ *  meaning that this Binary Elementwise Arithmetic doesn't perform any broadcasting of input coming from MatMul -- only
+ *  other input may be broadcasted to the MatMul input of this BEA. This effect (equality of MatMul output shape labels
+ *  and output shape of BEA) is being handled by LabelResolvingThroughSelect transformation in the particular models that
+ *  this variation targets.
+ *
+ *  Full pattern this transformation searches for:
+ *     -> Reshape -[-> Concat -]-[-> BEA (scalar) -]->
+ *                                                     MatMul -[-> BEA -]-> Reshape ->
+ *     -> Reshape -[-> Concat -]-[-> BEA (scalar) -]->
+ *
+ *   NOTE: input branches could be (and in observed model cases are) asymmetrical, meaning that the presence of Concat
+ *         on one input of MatMul doesn't require the other input to also have Concat
+ */
+class ov::pass::DeReshapeMatMul : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("DeReshapeMatMul", "0");
+    DeReshapeMatMul();
+};
diff --git a/src/common/transformations/include/transformations/symbolic_transformations/symbolic_optimizations.hpp b/src/common/transformations/include/transformations/symbolic_transformations/symbolic_optimizations.hpp
index 1cf3cf9577dc78..71e234cfeabd29 100644
--- a/src/common/transformations/include/transformations/symbolic_transformations/symbolic_optimizations.hpp
+++ b/src/common/transformations/include/transformations/symbolic_transformations/symbolic_optimizations.hpp
@@ -14,6 +14,7 @@ namespace ov {
 namespace pass {
 class TRANSFORMATIONS_API SymbolicOptimizations;
 class TRANSFORMATIONS_API SymbolicPropagation;
+class TRANSFORMATIONS_API LabelResolvingThroughSelect;
 }  // namespace pass
 }  // namespace ov
 
@@ -48,3 +49,10 @@ class ov::pass::SymbolicPropagation : public ov::pass::ModelPass {
 private:
     std::shared_ptr<ov::TableOfEquivalence> m_te;
 };
+
+// TODO: add description and order
+class ov::pass::LabelResolvingThroughSelect : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("LabelResolvingThroughSelect", "0");
+    LabelResolvingThroughSelect();
+};
\ No newline at end of file
diff --git a/src/common/transformations/include/transformations/symbolic_transformations/utils.hpp b/src/common/transformations/include/transformations/symbolic_transformations/utils.hpp
index 2f3d84dfe825ff..8d6e927e25a995 100644
--- a/src/common/transformations/include/transformations/symbolic_transformations/utils.hpp
+++ b/src/common/transformations/include/transformations/symbolic_transformations/utils.hpp
@@ -38,6 +38,16 @@ TRANSFORMATIONS_API bool get_labels(const ov::Output<ov::Node>& output, ov::Tens
 ///
 /// \return true if labels are unique and equal between lhs and rhs else false
 TRANSFORMATIONS_API bool are_unique_and_equal_labels(const ov::TensorLabel& lhs, const ov::TensorLabel& rhs);
+
+/// \brief Compares dimensions: if dimensions are static compares values of dimensions, if dimensions are dynamic
+/// compares their respective labels using TableOfEquivalence
+///
+/// \param lhs   Dimension object to compare
+/// \param rhs   Dimension object to compare
+///
+/// \return true if static dimensions are equal and dynamic dimensions have equal labels else false
+TRANSFORMATIONS_API bool dims_are_equal(const ov::Dimension& lhs, const ov::Dimension& rhs);
+
 }  // namespace util
 }  // namespace symbol
 }  // namespace ov
diff --git a/src/common/transformations/src/transformations/symbolic_transformations/dereshape_matmul.cpp b/src/common/transformations/src/transformations/symbolic_transformations/dereshape_matmul.cpp
new file mode 100644
index 00000000000000..be866fbc7b1c4f
--- /dev/null
+++ b/src/common/transformations/src/transformations/symbolic_transformations/dereshape_matmul.cpp
@@ -0,0 +1,335 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/symbolic_transformations/dereshape_matmul.hpp"
+
+#include <openvino/core/dimension_tracker.hpp>
+#include <openvino/op/concat.hpp>
+#include <openvino/op/matmul.hpp>
+#include <openvino/op/reshape.hpp>
+#include <openvino/op/util/binary_elementwise_arithmetic.hpp>
+#include <openvino/pass/pattern/op/or.hpp>
+#include <openvino/pass/pattern/op/wrap_type.hpp>
+#include <transformations/symbolic_transformations/utils.hpp>
+
+#include "itt.hpp"
+#include "openvino/core/validation_util.hpp"
+#include "transformations/utils/utils.hpp"
+
+using namespace ov::symbol::util;
+
+namespace {
+bool concat_predicate(ov::Output<ov::Node> output) {
+    auto output_pshape = output.get_partial_shape();
+    if (output_pshape.rank().is_dynamic() || output_pshape.size() <= 2)
+        return false;
+    const auto& concat = ov::as_type_ptr<ov::op::v0::Concat>(output.get_node_shared_ptr());
+    if (!concat)
+        return false;
+    return concat->get_concatenation_axis() >= output_pshape.rank().get_length() - 2;
+}
+
+bool last_two_dims_are_equal(const ov::PartialShape& lhs, const ov::PartialShape& rhs) {
+    if (lhs.rank().is_dynamic() || lhs.size() < 2)
+        return false;
+    if (rhs.rank().is_dynamic() || rhs.size() < 2)
+        return false;
+    for (size_t i = 2; i > 0; --i)
+        if (!dims_are_equal(lhs[lhs.size() - i], rhs[rhs.size() - i]))
+            return false;
+    return true;
+}
+
+bool reshape_keeps_last_two_dims(const std::shared_ptr<ov::Node>& op) {
+    return last_two_dims_are_equal(op->get_input_partial_shape(0), op->get_output_partial_shape(0));
+}
+
+bool batches_are_equal(const ov::PartialShape& lhs, const ov::PartialShape& rhs, bool one_dim_can_differ = false) {
+    if (lhs.rank().is_dynamic() || rhs.rank().is_dynamic() || lhs.size() != rhs.size())
+        return false;
+    size_t num_dims_differ = 0;
+    for (size_t i = 0; i < lhs.size() - 2; ++i)
+        num_dims_differ += !dims_are_equal(lhs[i], rhs[i]);
+    return num_dims_differ <= one_dim_can_differ;
+}
+
+bool batches_are_equal(const std::shared_ptr<ov::Node>& op_0, const std::shared_ptr<ov::Node>& op_1) {
+    auto input_0 = op_0->get_input_partial_shape(0);
+    auto input_1 = op_1->get_input_partial_shape(0);
+    auto output_0 = op_0->get_output_partial_shape(0);
+    auto output_1 = op_1->get_output_partial_shape(0);
+    return batches_are_equal(input_0, input_1, true) && batches_are_equal(output_0, output_1);
+}
+
+ov::Output<ov::Node> get_shape_from_sources(const ov::Output<ov::Node>& batch_dims_source,
+                                            const ov::Output<ov::Node>& non_batch_dims_source,
+                                            const std::vector<std::shared_ptr<ov::Node>>& copy_rt_info_from) {
+    ov::NodeVector dims;
+    size_t num_batch_dims = batch_dims_source.get_partial_shape().size() - 2;
+    std::vector<size_t> non_constant_ids;
+    for (size_t i = 0; i < num_batch_dims; ++i) {
+        auto node = ov::op::util::node_to_get_shape_value_of_indices_from_shape_source(batch_dims_source,
+                                                                                       {i},
+                                                                                       copy_rt_info_from);
+        OPENVINO_SUPPRESS_DEPRECATED_START
+        if (auto constant = ov::get_constant_from_source(node)) {
+            OPENVINO_SUPPRESS_DEPRECATED_END
+            node = constant;
+        } else {
+            non_constant_ids.push_back(i);
+        }
+        dims.push_back(node);
+    }
+    if (non_constant_ids.size() == 1) {
+        dims[non_constant_ids[0]] = ov::op::v0::Constant::create(ov::element::i64, {1}, {-1});
+    }
+
+    size_t non_batch_dims_start = non_batch_dims_source.get_partial_shape().size() - 2;
+    for (size_t i = non_batch_dims_start; i < non_batch_dims_start + 2; ++i) {
+        auto node = ov::op::util::node_to_get_shape_value_of_indices_from_shape_source(non_batch_dims_source,
+                                                                                       {i},
+                                                                                       copy_rt_info_from);
+        OPENVINO_SUPPRESS_DEPRECATED_START
+        if (auto constant = ov::get_constant_from_source(node)) {
+            OPENVINO_SUPPRESS_DEPRECATED_END
+            node = constant;
+        }
+        dims.push_back(node);
+    }
+
+    for (size_t curr_i = 1; curr_i < dims.size(); ++curr_i) {
+        const auto& curr_node = dims[curr_i];
+        if (bool current_node_is_constant = ov::op::util::is_constant(curr_node)) {
+            size_t prev_i = curr_i - 1;
+            const auto& prev_node = dims[prev_i];
+            if (bool previous_node_exists_and_is_constant = prev_node && ov::op::util::is_constant(prev_node)) {
+                dims[curr_i] = ov::op::util::make_try_fold<ov::op::v0::Concat>(ov::NodeVector{prev_node, curr_node}, 0);
+                dims[prev_i] = nullptr;
+            }
+        }
+    }
+    dims.erase(std::remove_if(dims.begin(),
+                              dims.end(),
+                              [](const std::shared_ptr<ov::Node>& node) {
+                                  return node == nullptr;
+                              }),
+               dims.end());
+    auto target_shape = ov::op::util::make_try_fold<ov::op::v0::Concat>(dims, 0);
+    ov::copy_runtime_info(copy_rt_info_from, target_shape);
+    return target_shape->output(0);
+}
+
+void pull_reshape_through_optional_concat_and_bea(const ov::pass::pattern::PatternValueMap& vm,
+                                                  std::shared_ptr<ov::Node> concat_label,
+                                                  std::shared_ptr<ov::Node> bea_label,
+                                                  ov::Output<ov::Node> reshape_output,
+                                                  ov::Input<ov::Node> matmul_input,
+                                                  std::vector<ov::Node*>& nodes_for_revalidation) {
+    // Reshape -- [Concat] -- [BEA with scalar] -- > MatMul
+    auto original_reshape = reshape_output.get_node_shared_ptr();
+    if (vm.count(concat_label)) {
+        auto concat_node = ov::as_type_ptr<ov::op::v0::Concat>(vm.at(concat_label).get_node_shared_ptr());
+        OPENVINO_ASSERT(concat_node != nullptr,
+                        "DeReshapeMatMul transformation matched operation which should be Concat -- but it is not");
+        auto rank = concat_node->get_output_partial_shape(0).rank().get_length();
+        auto axis = (concat_node->get_concatenation_axis() == (rank - 1)) ? -1 : -2;
+
+        auto idx_of_reshape_input = reshape_output == concat_node->input_value(0) ? 0 : 1;
+        auto idx_of_non_reshape_input = static_cast<size_t>(!idx_of_reshape_input);
+
+        auto target_shape_of_input = get_shape_from_sources(original_reshape->input_value(0),
+                                                            concat_node->input_value(idx_of_non_reshape_input),
+                                                            {original_reshape});
+
+        auto input_reshape = original_reshape->clone_with_new_inputs(
+            {concat_node->input_value(idx_of_non_reshape_input), target_shape_of_input});
+        ov::copy_runtime_info(original_reshape, input_reshape);
+
+        ov::replace_output_update_name(reshape_output, original_reshape->input_value(0));
+
+        ov::OutputVector new_concat_inputs(2);
+        new_concat_inputs[idx_of_reshape_input] = concat_node->input_value(idx_of_reshape_input);
+        new_concat_inputs[idx_of_non_reshape_input] = input_reshape->output(0);
+
+        auto new_concat = std::make_shared<ov::op::v0::Concat>(new_concat_inputs, axis);
+        ov::copy_runtime_info({concat_node, original_reshape}, new_concat);
+
+        auto target_shape_of_output =
+            get_shape_from_sources(input_reshape->input_value(0), new_concat->output(0), {original_reshape});
+        auto output_reshape = original_reshape->clone_with_new_inputs({new_concat->output(0), target_shape_of_output});
+        ov::copy_runtime_info(original_reshape, output_reshape);
+
+        if (vm.count(bea_label)) {
+            auto bea_node = vm.at(bea_label).get_node_shared_ptr();
+            auto idx_of_non_scalar_data = bea_node->input_value(0) == vm.at(concat_label) ? 0 : 1;
+            bea_node->input(idx_of_non_scalar_data).replace_source_output(new_concat);
+            nodes_for_revalidation.insert(nodes_for_revalidation.begin(), bea_node.get());
+        } else {
+            matmul_input.replace_source_output(new_concat);
+        }
+        ov::replace_output_update_name(concat_node->output(0), output_reshape->output(0));
+    } else {
+        // no Concat and it doesn't matter if BEA is present -- just delete reshape
+        ov::replace_output_update_name(reshape_output, original_reshape->input_value(0));
+    }
+}
+}  // namespace
+
+#define IN_RESHAPE                                                                                            \
+    pattern::wrap_type<op::v1::Reshape>(pattern::op::as_value_predicate([](std::shared_ptr<Node> n) -> bool { \
+        return pattern::consumers_count(1)(n->output(0)) && reshape_keeps_last_two_dims(n);                   \
+    }));
+
+#define SCALAR_INPUT                                                                        \
+    pattern::any_input([](ov::Output<Node> out) {                                           \
+        return out.get_partial_shape().is_static() && ov::shape_size(out.get_shape()) == 1; \
+    });
+
+ov::pass::DeReshapeMatMul::DeReshapeMatMul() {
+    MATCHER_SCOPE(DeReshapeMatMul);
+    // BEGIN: symmetrical patterns for MatMul inputs
+
+    // lhs of MatMul
+    auto lhs_reshape = IN_RESHAPE;
+
+    auto lhs_concat_0 = pattern::wrap_type<op::v0::Concat>({pattern::any_input(), lhs_reshape}, concat_predicate);
+    auto lhs_concat_1 = pattern::wrap_type<op::v0::Concat>({lhs_reshape, pattern::any_input()}, concat_predicate);
+    auto lhs_concat = std::make_shared<pattern::op::Or>(OutputVector{lhs_concat_0, lhs_concat_1});
+
+    auto lhs_reshape_or_concat = std::make_shared<pattern::op::Or>(OutputVector{lhs_reshape, lhs_concat});
+
+    auto lhs_bea_scalar = SCALAR_INPUT;
+    auto lhs_bea = pattern::wrap_type<op::util::BinaryElementwiseArithmetic>({lhs_reshape_or_concat, lhs_bea_scalar},
+                                                                             pattern::consumers_count(1));
+
+    auto lhs_bea_or_concat = std::make_shared<pattern::op::Or>(OutputVector{lhs_reshape_or_concat, lhs_bea});
+
+    // rhs of MatMul
+    auto rhs_reshape = IN_RESHAPE;
+
+    auto rhs_concat_0 = pattern::wrap_type<op::v0::Concat>({pattern::any_input(), rhs_reshape}, concat_predicate);
+    auto rhs_concat_1 = pattern::wrap_type<op::v0::Concat>({rhs_reshape, pattern::any_input()}, concat_predicate);
+    auto rhs_concat = std::make_shared<pattern::op::Or>(OutputVector{rhs_concat_0, rhs_concat_1});
+
+    auto rhs_reshape_or_concat = std::make_shared<pattern::op::Or>(OutputVector{rhs_reshape, rhs_concat});
+
+    auto rhs_bea_scalar = SCALAR_INPUT;
+    auto rhs_bea = pattern::wrap_type<op::util::BinaryElementwiseArithmetic>({rhs_reshape_or_concat, rhs_bea_scalar},
+                                                                             pattern::consumers_count(1));
+
+    auto rhs_bea_or_concat = std::make_shared<pattern::op::Or>(OutputVector{rhs_reshape_or_concat, rhs_bea});
+    // END: symmetrical patterns for MatMul inputs
+
+    auto matmul =
+        pattern::wrap_type<op::v0::MatMul>({lhs_bea_or_concat, rhs_bea_or_concat}, pattern::consumers_count(1));
+
+    auto add = pattern::wrap_type<op::util::BinaryElementwiseArithmetic>(
+        OutputVector{matmul, pattern::any_input()},
+        [](ov::Output<Node> out) -> bool {
+            if (!pattern::consumers_count(1)(out))
+                return false;
+            auto input_0_pshape = out.get_node_shared_ptr()->get_input_partial_shape(0);
+            auto input_1_pshape = out.get_node_shared_ptr()->get_input_partial_shape(1);
+            auto output_pshape = out.get_partial_shape();
+            ov::TensorLabel output_labels, input_0_labels, input_1_labels;
+            if (get_labels(input_0_pshape, input_0_labels) && get_labels(input_1_pshape, input_1_labels) &&
+                get_labels(output_pshape, output_labels)) {
+                if (input_0_pshape.size() != 3 || input_1_pshape.size() != 3 || output_pshape.size() != 3)
+                    return false;
+                return are_unique_and_equal_labels(input_0_labels, output_labels) ||
+                       are_unique_and_equal_labels(input_1_labels, output_labels);
+            } else {
+                return false;
+            }
+        });
+
+    auto matmul_or_add = std::make_shared<pattern::op::Or>(OutputVector{matmul, add});
+    auto final_reshape =
+        pattern::wrap_type<op::v1::Reshape>({matmul_or_add, pattern::any_input()},
+                                            pattern::op::as_value_predicate([](std::shared_ptr<Node> n) -> bool {
+                                                return reshape_keeps_last_two_dims(n);
+                                            }));
+
+    ov::matcher_pass_callback matcher_pass_callback = [=](pattern::Matcher& m) {
+        const auto& pm = m.get_pattern_map();
+        const auto& vm = m.get_pattern_value_map();
+        std::vector<Node*> nodes_for_revalidation{pm.at(matmul).get()};
+        // reshapes check: BEGIN
+        // reshape_keeps_last_two_dims checks were already applied for all Reshapes in the pattern predicates
+        auto in_reshape_0 = pm.at(lhs_reshape);
+        auto in_reshape_1 = pm.at(rhs_reshape);
+        auto out_reshape = pm.at(final_reshape);
+        if (!batches_are_equal(in_reshape_0, in_reshape_1) ||
+            !batches_are_equal(in_reshape_0->get_output_partial_shape(0), out_reshape->get_input_partial_shape(0)) ||
+            !batches_are_equal(in_reshape_0->get_input_partial_shape(0),
+                               out_reshape->get_output_partial_shape(0),
+                               true)) {
+            return false;
+        }
+        // reshapes check: END
+
+        if (vm.count(add)) {
+            const auto& in_reshape_0_in_pshape = in_reshape_0->get_input_partial_shape(0);
+            if (in_reshape_0_in_pshape.size() != 4 || in_reshape_0_in_pshape[1].is_dynamic())
+                return false;
+            // we only allow MatMul -> Add pattern to be optimized in case of 4d -> 3d -> 4d DeReshaping
+        }
+
+        // preventing wrong matches
+        if (vm.count(lhs_concat) && !ov::as_type_ptr<ov::op::v0::Concat>(pm.at(lhs_concat)))
+            return false;
+        if (vm.count(rhs_concat) && !ov::as_type_ptr<ov::op::v0::Concat>(pm.at(rhs_concat)))
+            return false;
+
+        pull_reshape_through_optional_concat_and_bea(vm,
+                                                     lhs_concat,
+                                                     lhs_bea,
+                                                     in_reshape_0,
+                                                     pm.at(matmul)->input(0),
+                                                     nodes_for_revalidation);
+        pull_reshape_through_optional_concat_and_bea(vm,
+                                                     rhs_concat,
+                                                     rhs_bea,
+                                                     in_reshape_1,
+                                                     pm.at(matmul)->input(1),
+                                                     nodes_for_revalidation);
+
+        for (auto& node : nodes_for_revalidation)
+            node->validate_and_infer_types();
+
+        if (vm.count(add)) {
+            // TODO: make sure other elements of the shape are equal -- only those which aren't equal should be handled
+            auto add_node = pm.at(add);
+            size_t matmul_port = (add_node->input_value(0) == vm.at(matmul) ? 0 : 1);
+            size_t non_matmul_port = static_cast<size_t>(!matmul_port);
+
+            auto first_batch_dim =
+                ov::op::util::node_to_get_shape_value_of_indices_from_shape_source(add_node->input_value(matmul_port),
+                                                                                   {0},
+                                                                                   {in_reshape_0, in_reshape_1});
+            auto divisor =
+                ov::op::util::node_to_get_shape_value_of_indices_from_shape_source(in_reshape_0->input_value(0),
+                                                                                   {1},
+                                                                                   {in_reshape_0, in_reshape_1});
+            first_batch_dim = std::make_shared<ov::op::v1::Divide>(first_batch_dim, divisor, true);
+            auto minus_one = ov::op::v0::Constant::create(element::i64, {1}, {-1});
+            auto non_batch_dims = ov::op::util::node_to_get_shape_value_of_indices_from_shape_source(
+                add_node->input_value(non_matmul_port),
+                {1, 2},
+                {in_reshape_0, in_reshape_1});
+            auto pattern =
+                std::make_shared<ov::op::v0::Concat>(OutputVector{first_batch_dim, minus_one, non_batch_dims}, 0);
+            auto other_input_reshape =
+                op::util::make_try_fold<ov::op::v1::Reshape>(add_node->input_value(non_matmul_port), pattern, true);
+            add_node->input(non_matmul_port).replace_source_output(other_input_reshape->output(0));
+            ov::copy_runtime_info({in_reshape_0, in_reshape_1}, {first_batch_dim, minus_one, other_input_reshape});
+            add_node->validate_and_infer_types();
+        }
+        ov::replace_output_update_name(out_reshape->output(0), out_reshape->input_value(0));
+        return true;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(final_reshape, matcher_name);
+    register_matcher(m, matcher_pass_callback);
+}
diff --git a/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp b/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp
index 7451df397ba33c..4892f213f63871 100644
--- a/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp
+++ b/src/common/transformations/src/transformations/symbolic_transformations/symbolic_optimizations.cpp
@@ -6,16 +6,26 @@
 
 #include <openvino/core/dimension_tracker.hpp>
 #include <openvino/core/validation_util.hpp>
+#include <openvino/op/reshape.hpp>
 #include <openvino/op/util/symbolic_info.hpp>
 #include <openvino/pass/manager.hpp>
+#include <openvino/pass/pattern/op/pattern.hpp>
+#include <openvino/pass/pattern/op/wrap_type.hpp>
 #include <transformations/common_optimizations/dimension_tracking.hpp>
 #include <transformations/common_optimizations/nop_elimination.hpp>
 #include <transformations/common_optimizations/shared_ops_optimization.hpp>
+#include <transformations/common_optimizations/simplify_shape_of_sub_graph.hpp>
 #include <transformations/symbolic_transformations/chained_maximum.hpp>
+#include <transformations/symbolic_transformations/dereshape_matmul.hpp>
 #include <transformations/symbolic_transformations/label_optimization.hpp>
 #include <transformations/symbolic_transformations/nop_broadcast.hpp>
+#include <transformations/symbolic_transformations/utils.hpp>
 
 #include "itt.hpp"
+#include "openvino/pass/pattern/op/or.hpp"
+
+using namespace ov::pass;
+using namespace ov::symbol::util;
 
 namespace {
 void symbolic_set_up_for_shape(ov::DimensionTracker& dt, ov::PartialShape& shape) {
@@ -116,6 +126,60 @@ bool ov::pass::SymbolicPropagation::run_on_model(const std::shared_ptr<ov::Model
     return true;
 }
 
+ov::pass::LabelResolvingThroughSelect::LabelResolvingThroughSelect() {
+    MATCHER_SCOPE(LabelResolvingThroughSelect);
+    auto add = pattern::wrap_type<op::util::BinaryElementwiseArithmetic>();
+    auto input_reshape = pattern::wrap_type<op::v1::Reshape>({add, pattern::any_input()});
+
+    auto select_then = pattern::wrap_type<op::v1::Select>({pattern::any_input(), input_reshape, pattern::any_input()});
+    auto select_else = pattern::wrap_type<op::v1::Select>({pattern::any_input(), pattern::any_input(), input_reshape});
+    auto select = std::make_shared<pass::pattern::op::Or>(OutputVector{select_then, select_else});
+
+    auto softmax = pattern::wrap_type<op::v1::Softmax>({select});
+    auto reshape = pattern::wrap_type<op::v1::Reshape>({softmax, pattern::any_input()});
+
+    ov::matcher_pass_callback matcher_pass_callback = [=](pattern::Matcher& m) {
+        const auto& value_map = m.get_pattern_value_map();
+        ov::TensorLabel reshape_labels, add_0_labels, add_1_labels;
+        if (!get_labels(value_map.at(reshape).get_partial_shape(), reshape_labels))
+            return false;
+        auto add_node = value_map.at(add).get_node_shared_ptr();
+        auto add_0_pshape = add_node->input_value(0).get_partial_shape();
+        auto add_1_pshape = add_node->input_value(1).get_partial_shape();
+        if (!get_labels(add_0_pshape, add_0_labels) && !get_labels(add_1_pshape, add_1_labels))
+            return false;
+
+        if (are_unique_and_equal_labels(reshape_labels, add_0_labels)) {
+            // we detected that no broadcasting was done during binary elementwise and select, propagating labels
+            // through
+            add_node->set_output_type(0, add_node->get_output_element_type(0), add_0_pshape);
+        } else if (are_unique_and_equal_labels(reshape_labels, add_1_labels)) {
+            // we detected that no broadcasting was done during binary elementwise and select, propagating labels
+            // through
+            add_node->set_output_type(0, add_node->get_output_element_type(0), add_1_pshape);
+        } else {
+            return false;
+        }
+
+        std::shared_ptr<ov::Node> select_node = nullptr;
+        if (value_map.count(select_then))
+            select_node = value_map.at(select_then).get_node_shared_ptr();
+        if (value_map.count(select_else))
+            select_node = value_map.at(select_else).get_node_shared_ptr();
+        if (select_node == nullptr)
+            return false;
+
+        auto select_output = select_node->output(0);
+        const auto& reshape_pshape = value_map.at(input_reshape).get_partial_shape();
+        select_node->set_output_type(0, select_node->get_output_element_type(0), reshape_pshape);
+        value_map.at(softmax).get_node_shared_ptr()->validate_and_infer_types();
+        return true;
+    };
+
+    auto m = std::make_shared<pattern::Matcher>(reshape, matcher_name);
+    register_matcher(m, matcher_pass_callback);
+}
+
 ov::pass::SymbolicOptimizations::SymbolicOptimizations(bool full_run) {
     m_manager = std::make_shared<pass::Manager>();
     m_manager->set_per_pass_validation(false);
@@ -134,7 +198,10 @@ ov::pass::SymbolicOptimizations::SymbolicOptimizations(bool full_run) {
     // transformations which use labels for optimizations
     REGISTER_SYMBOLIC(ApplyTableOfEquivalence)
     if (full_run) {
-        REGISTER_SYMBOLIC(OptimizeLabelsUsedAsValues)  // reduce shape sub-graphs
+        REGISTER_SYMBOLIC(OptimizeLabelsUsedAsValues)   // reduce shape sub-graphs
+        REGISTER_SYMBOLIC(LabelResolvingThroughSelect)  // figures out that broadcasting didn't happen through Select op
+        REGISTER_SYMBOLIC(DeReshapeMatMul)
+        REGISTER_SYMBOLIC(SimplifyShapeOfSubGraph)
     }
 }
 
diff --git a/src/common/transformations/src/transformations/symbolic_transformations/utils.cpp b/src/common/transformations/src/transformations/symbolic_transformations/utils.cpp
index 3fedc3bd4c85be..32b572908c5fa5 100644
--- a/src/common/transformations/src/transformations/symbolic_transformations/utils.cpp
+++ b/src/common/transformations/src/transformations/symbolic_transformations/utils.cpp
@@ -32,3 +32,20 @@ bool ov::symbol::util::are_unique_and_equal_labels(const ov::TensorLabel& lhs, c
             return false;
     return true;
 }
+
+bool dims_are_equal(const ov::Dimension& lhs, const ov::Dimension& rhs) {
+    bool labels_exist_and_equal = false;
+
+    auto lhs_label = ov::DimensionTracker::get_label(lhs);
+    auto rhs_label = ov::DimensionTracker::get_label(rhs);
+    auto table_l = ov::DimensionTracker::get_table_of_equivalence(lhs);
+    auto table_r = ov::DimensionTracker::get_table_of_equivalence(rhs);
+    if (table_l)
+        labels_exist_and_equal = lhs_label != ov::no_label && table_l->are_equal(lhs, rhs);
+    else if (table_r)
+        labels_exist_and_equal = lhs_label != ov::no_label && table_r->are_equal(lhs, rhs);
+    else
+        labels_exist_and_equal = lhs_label != ov::no_label && lhs_label == rhs_label;
+    bool dims_are_static_and_equal = lhs.is_static() && lhs == rhs;
+    return labels_exist_and_equal || dims_are_static_and_equal;
+}
diff --git a/src/common/transformations/tests/symbolic_transformations/dereshape_matmul.cpp b/src/common/transformations/tests/symbolic_transformations/dereshape_matmul.cpp
new file mode 100644
index 00000000000000..6e87090e488316
--- /dev/null
+++ b/src/common/transformations/tests/symbolic_transformations/dereshape_matmul.cpp
@@ -0,0 +1,399 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/symbolic_transformations/dereshape_matmul.hpp"
+
+#include <gtest/gtest.h>
+
+#include <openvino/core/model.hpp>
+#include <openvino/op/matmul.hpp>
+#include <openvino/op/parameter.hpp>
+#include <openvino/op/reshape.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include "openvino/core/dimension_tracker.hpp"
+#include "transformations/utils/utils.hpp"
+
+using namespace ov;
+using namespace ov::op;
+using namespace std;
+
+namespace {
+/* Helps to organize dimension representation in the following tests:
+ * 1. Creates requested amount of dimensions
+ * 2. Labels them automatically
+ * 3. Creates value representation of the dimension via creating Parameter->Shape->Gather subgraph
+ * 4. Gives access to dimension and its value representation via operator[]
+ * 5. Gives access to utility Parameter via get_parameter -- only used for ov::Model creation in tests
+ * */
+class DimensionTestHelper {
+public:
+    struct DimensionWithOutput {
+        Dimension dim;
+        Output<Node> source;
+    };
+
+    explicit DimensionTestHelper(const size_t& num_dims) {
+        auto te = make_shared<ov::TableOfEquivalence>();
+        auto dt = ov::DimensionTracker(te);
+        auto dimensions = PartialShape::dynamic(Rank(num_dims));
+        dt.set_up_for_tracking(dimensions);
+        parameter = make_shared<v0::Parameter>(element::f32, dimensions);
+        for (size_t i = 0; i < num_dims; ++i)
+            m_map[i] = {dimensions[i], op::util::node_to_get_shape_value_of_indices_from_shape_source(parameter, {i})};
+    }
+
+    DimensionWithOutput operator[](size_t idx) const {
+        return m_map.at(idx);
+    }
+
+    ov::PartialShape make_shape(const vector<size_t>& dim_indices) const {
+        auto shape = PartialShape::dynamic(Rank(dim_indices.size()));
+        for (size_t i = 0; i < dim_indices.size(); ++i)
+            shape[i] = m_map.at(dim_indices[i]).dim;
+        return shape;
+    }
+
+    shared_ptr<Node> make_reshape(const Output<Node>& source, const vector<size_t>& dims_indices) const {
+        OutputVector sources(dims_indices.size());
+        for (size_t i = 0; i < dims_indices.size(); ++i)
+            sources[i] = m_map.at(dims_indices[i]).source;
+        auto concat = make_shared<v0::Concat>(sources, 0);
+        return make_shared<v1::Reshape>(source, concat, false);
+    }
+
+    std::shared_ptr<v0::Parameter> get_parameter() const {
+        return parameter;
+    }
+
+private:
+    std::shared_ptr<v0::Parameter> parameter;
+    std::map<size_t, DimensionWithOutput> m_map;
+};
+
+size_t max_element(const vector<vector<size_t>>& vectors) {
+    size_t current_max = 0;
+    for (const auto& vector : vectors)
+        current_max = max(current_max, *std::max_element(vector.begin(), vector.end()));
+    return current_max;
+}
+
+shared_ptr<Node> reshape(const Output<Node>& source,
+                         const vector<size_t>& dims_indices,
+                         const DimensionTestHelper& helper) {
+    OutputVector sources(dims_indices.size());
+    for (size_t i = 0; i < dims_indices.size(); ++i)
+        sources[i] = helper[dims_indices[i]].source;
+    auto concat = make_shared<v0::Concat>(sources, 0);
+    return make_shared<v1::Reshape>(source, concat, false);
+}
+
+ov::Output<ov::Node> get_shape_from_sources(const ov::Output<ov::Node>& batch_dims_source,
+                                            const ov::Output<ov::Node>& non_batch_dims_source) {
+    auto batch_indices = std::vector<size_t>(batch_dims_source.get_partial_shape().size() - 2);
+    std::iota(batch_indices.begin(), batch_indices.end(), 0);
+    auto batch_dims =
+        ov::op::util::node_to_get_shape_value_of_indices_from_shape_source(batch_dims_source, batch_indices);
+    auto non_batch_indices = std::vector<size_t>(2);
+    std::iota(non_batch_indices.begin(), non_batch_indices.end(), non_batch_dims_source.get_partial_shape().size() - 2);
+    auto non_batch_dims =
+        ov::op::util::node_to_get_shape_value_of_indices_from_shape_source(non_batch_dims_source, non_batch_indices);
+    auto target_shape =
+        ov::op::util::make_try_fold<ov::op::v0::Concat>(ov::OutputVector{batch_dims, non_batch_dims}, 0);
+    return target_shape->output(0);
+}
+
+PartialShape make_concat_input_pshape(const DimensionTestHelper& dims, const vector<size_t>& dims_indices) {
+    auto another_pshape = dims.make_shape(dims_indices);
+    size_t rank = dims_indices.size();
+    // To reduce test graph we avoid changing Concat axis dimension with this Concat
+    another_pshape[rank - 1] = Dimension(0);
+    return another_pshape;
+}
+
+static std::ostream& operator<<(std::ostream& os, const vector<size_t>& vals) {
+    bool first = true;
+    for (const auto& val : vals) {
+        if (!first)
+            os << "_";
+        first = false;
+        os << val;
+    }
+    return os;
+}
+}  // namespace
+
+using DeReshapeMatMulParameters =
+    tuple<tuple<vector<size_t>, vector<size_t>, vector<size_t>, vector<size_t>, vector<size_t>>,
+          size_t,
+          size_t,
+          size_t>;
+
+class DeReshapeMatMulTest : public TransformationTestsF, public testing::WithParamInterface<DeReshapeMatMulParameters> {
+public:
+    void SetUp() override {
+        TransformationTestsF::SetUp();
+        const auto& params = std::get<0>(GetParam());
+
+        const auto& lhs_shape_idx = std::get<0>(params);
+        const auto& lhs_reshape_idx = std::get<1>(params);
+        const auto& rhs_shape_idx = std::get<2>(params);
+        const auto& rhs_reshape_idx = std::get<3>(params);
+        const auto& out_reshape_idx = std::get<4>(params);
+
+        // 0 - no bea, 1 - lhs, 2 - rhs, 3 - lhs and rhs
+        const size_t& bea_scalar_mode = std::get<1>(GetParam());
+
+        // 0 - no concat
+        // 10 - concat on lhs, reshape on 0 port
+        // 11 - concat on lhs, reshape on 1 port
+        // 20 - concat on rhs, reshape on 0 port
+        // 21 - concat on rhs, reshape on 1 port
+        // 300 - concat on both sizes, both reshapes on 0 port of concats
+        // 301 - concat on both sizes, lhs reshape on 0 port, rhs reshape on 1 port
+        // 310 - concat on both sizes, lhs reshape on 1 port, rhs reshape on 0 port
+        // 311 - concat on both sizes, both reshapes on 1 port of concats
+        const size_t& concat_mode = std::get<2>(GetParam());
+
+        // 0 - no add, 1 - add has matmul on lhs, 2 - add has matmul on rhs
+        const size_t& final_add_mode = std::get<3>(GetParam());
+
+        const auto& max_idx =
+            max_element({lhs_shape_idx, rhs_shape_idx, lhs_reshape_idx, rhs_reshape_idx, out_reshape_idx});
+        const DimensionTestHelper dims(max_idx + 1);
+
+        PartialShape lhs_original_pshape = dims.make_shape(lhs_shape_idx);
+        PartialShape rhs_original_pshape = dims.make_shape(rhs_shape_idx);
+
+        get_model(dims,
+                  lhs_original_pshape,
+                  rhs_original_pshape,
+                  lhs_reshape_idx,
+                  rhs_reshape_idx,
+                  out_reshape_idx,
+                  bea_scalar_mode,
+                  concat_mode,
+                  final_add_mode);
+        manager.register_pass<pass::DeReshapeMatMul>();
+        get_model_ref(dims,
+                      lhs_original_pshape,
+                      rhs_original_pshape,
+                      lhs_reshape_idx,
+                      rhs_reshape_idx,
+                      bea_scalar_mode,
+                      concat_mode,
+                      final_add_mode);
+    }
+
+    void get_model(const DimensionTestHelper& dims,
+                   const PartialShape& lhs_original_pshape,
+                   const PartialShape& rhs_original_pshape,
+                   const vector<size_t>& lhs_reshape_idx,
+                   const vector<size_t>& rhs_reshape_idx,
+                   const vector<size_t>& out_reshape_idx,
+                   const size_t& bea_scalar_mode,
+                   const size_t& concat_mode,
+                   const size_t& final_add_mode) {
+        ParameterVector inputs;
+        OutputVector outputs;
+
+        // LHS input of MatMul
+        auto lhs_input = make_shared<v0::Parameter>(element::f32, lhs_original_pshape);
+        auto lhs_output = dims.make_reshape(lhs_input, lhs_reshape_idx);
+
+        if (set<size_t>{10, 11, 300, 301, 310, 311}.count(concat_mode)) {
+            const auto& another_pshape = make_concat_input_pshape(dims, lhs_reshape_idx);
+            const auto& another_input = make_shared<v0::Parameter>(element::f32, another_pshape);
+
+            if (set<size_t>{10, 300, 301}.count(concat_mode)) {  // reshape on 0 port
+                lhs_output = make_shared<v0::Concat>(OutputVector{lhs_output, another_input}, -1);
+            } else if (set<size_t>{11, 310, 311}.count(concat_mode)) {  // reshape on 1 port
+                lhs_output = make_shared<v0::Concat>(OutputVector{another_input, lhs_output}, -1);
+            } else {
+                ASSERT_TRUE(false) << "Unknown mode of concat: " << concat_mode;
+            }
+            inputs.push_back(another_input);
+            outputs.emplace_back(lhs_output);
+        }
+
+        if (bea_scalar_mode == 1 || bea_scalar_mode == 3)
+            lhs_output = make_shared<v1::Multiply>(lhs_output, v0::Constant::create(element::f32, {}, {0.125}));
+
+        // RHS input of MatMul
+        auto rhs_input = make_shared<v0::Parameter>(element::f32, rhs_original_pshape);
+        auto rhs_output = dims.make_reshape(rhs_input, rhs_reshape_idx);
+
+        if (set<size_t>{20, 21, 300, 301, 310, 311}.count(concat_mode)) {
+            const auto& another_pshape = make_concat_input_pshape(dims, rhs_reshape_idx);
+            const auto& another_input = make_shared<v0::Parameter>(element::f32, another_pshape);
+            if (set<size_t>{20, 300, 310}.count(concat_mode)) {  // reshape on 0 port
+                rhs_output = make_shared<v0::Concat>(OutputVector{rhs_output, another_input}, -1);
+            } else if (set<size_t>{21, 301, 311}.count(concat_mode)) {  // reshape on 1 port
+                rhs_output = make_shared<v0::Concat>(OutputVector{another_input, rhs_output}, -1);
+            } else {
+                ASSERT_TRUE(false) << "Unknown mode of concat: " << concat_mode;
+            }
+            inputs.push_back(another_input);
+            outputs.emplace_back(rhs_output);
+        }
+
+        if (bea_scalar_mode == 2 || bea_scalar_mode == 3)
+            rhs_output = make_shared<v1::Multiply>(rhs_output, v0::Constant::create(element::f32, {}, {0.125}));
+
+        Output<Node> matmul = make_shared<v0::MatMul>(lhs_output, rhs_output);
+
+        if (final_add_mode == 1)  // 1 - add has matmul on lhs
+            matmul =
+                make_shared<v1::Add>(matmul, v0::Constant::create(element::f32, Shape(lhs_reshape_idx.size(), 1), {1}));
+        else if (final_add_mode == 2)  // 2 - add has matmul on rhs
+            matmul =
+                make_shared<v1::Add>(v0::Constant::create(element::f32, Shape(lhs_reshape_idx.size(), 1), {1}), matmul);
+
+        auto output_reshape = reshape(matmul, out_reshape_idx, dims);
+
+        inputs.push_back(dims.get_parameter());
+        inputs.push_back(lhs_input);
+        inputs.push_back(rhs_input);
+        outputs.emplace_back(output_reshape);
+
+        for (auto& output : outputs)
+            output = std::make_shared<v1::Reshape>(output, v0::Constant::create(element::i32, {1}, {-1}), false);
+        auto output = make_shared<v0::Concat>(outputs, 0);
+        model = make_shared<Model>(output, inputs, "Tested model");
+    }
+
+    void get_model_ref(const DimensionTestHelper& dims,
+                       const PartialShape& lhs_original_pshape,
+                       const PartialShape& rhs_original_pshape,
+                       const vector<size_t>& lhs_reshape_idx,
+                       const vector<size_t>& rhs_reshape_idx,
+                       const size_t& bea_scalar_mode,
+                       const size_t& concat_mode,
+                       const size_t& final_add_mode) {
+        ParameterVector inputs;
+        OutputVector outputs;
+
+        // LHS input of MatMul
+        auto lhs_input = make_shared<v0::Parameter>(element::f32, lhs_original_pshape);
+        auto lhs_output = lhs_input->output(0);
+
+        if (set<size_t>{10, 11, 300, 301, 310, 311}.count(concat_mode)) {
+            const auto& another_pshape = make_concat_input_pshape(dims, lhs_reshape_idx);
+            const auto& another_input = make_shared<v0::Parameter>(element::f32, another_pshape);
+
+            auto target_shape_of_input = get_shape_from_sources(lhs_output, another_input);
+            auto input_reshape = make_shared<v1::Reshape>(another_input, target_shape_of_input, false);
+
+            if (set<size_t>{10, 300, 301}.count(concat_mode)) {  // reshape on 0 port
+                lhs_output = make_shared<v0::Concat>(OutputVector{lhs_output, input_reshape}, -1);
+            } else if (set<size_t>{11, 310, 311}.count(concat_mode)) {  // reshape on 1 port
+                lhs_output = make_shared<v0::Concat>(OutputVector{input_reshape, lhs_output}, -1);
+            } else {
+                ASSERT_TRUE(false) << "Unknown mode of concat: " << concat_mode;
+            }
+
+            auto target_shape_of_output = get_shape_from_sources(input_reshape->input_value(0), lhs_output);
+            auto output_reshape = make_shared<v1::Reshape>(lhs_output, target_shape_of_output, false);
+
+            inputs.push_back(another_input);
+            outputs.emplace_back(output_reshape);
+        }
+
+        if (bea_scalar_mode == 1 || bea_scalar_mode == 3)
+            lhs_output = make_shared<v1::Multiply>(lhs_output, v0::Constant::create(element::f32, {}, {0.125}));
+
+        // RHS input of MatMul
+        auto rhs_input = make_shared<v0::Parameter>(element::f32, rhs_original_pshape);
+        auto rhs_output = rhs_input->output(0);
+
+        if (set<size_t>{20, 21, 300, 301, 310, 311}.count(concat_mode)) {
+            const auto& another_pshape = make_concat_input_pshape(dims, rhs_reshape_idx);
+            const auto& another_input = make_shared<v0::Parameter>(element::f32, another_pshape);
+
+            auto target_shape_of_input = get_shape_from_sources(rhs_output, another_input);
+            auto input_reshape = make_shared<v1::Reshape>(another_input, target_shape_of_input, false);
+
+            if (set<size_t>{20, 300, 310}.count(concat_mode)) {  // reshape on 0 port
+                rhs_output = make_shared<v0::Concat>(OutputVector{rhs_output, input_reshape}, -1);
+            } else if (set<size_t>{21, 301, 311}.count(concat_mode)) {  // reshape on 1 port
+                rhs_output = make_shared<v0::Concat>(OutputVector{input_reshape, rhs_output}, -1);
+            } else {
+                ASSERT_TRUE(false) << "Unknown mode of concat: " << concat_mode;
+            }
+            auto target_shape_of_output = get_shape_from_sources(input_reshape->input_value(0), rhs_output);
+            auto output_reshape = make_shared<v1::Reshape>(rhs_output, target_shape_of_output, false);
+
+            inputs.push_back(another_input);
+            outputs.emplace_back(output_reshape);
+        }
+
+        if (bea_scalar_mode == 2 || bea_scalar_mode == 3)
+            rhs_output = make_shared<v1::Multiply>(rhs_output, v0::Constant::create(element::f32, {}, {0.125}));
+
+        Output<Node> matmul = make_shared<v0::MatMul>(lhs_output, rhs_output);
+
+        if (final_add_mode == 1)  // 1 - add has matmul on lhs
+            matmul =
+                make_shared<v1::Add>(matmul, v0::Constant::create(element::f32, Shape(lhs_reshape_idx.size(), 1), {1}));
+        else if (final_add_mode == 2)  // 2 - add has matmul on rhs
+            matmul =
+                make_shared<v1::Add>(v0::Constant::create(element::f32, Shape(lhs_reshape_idx.size(), 1), {1}), matmul);
+
+        inputs.push_back(dims.get_parameter());
+        inputs.push_back(lhs_input);
+        inputs.push_back(rhs_input);
+        outputs.emplace_back(matmul);
+
+        for (auto& output : outputs)
+            output = std::make_shared<v1::Reshape>(output, v0::Constant::create(element::i32, {1}, {-1}), false);
+        auto output = make_shared<v0::Concat>(outputs, 0);
+
+        model_ref = make_shared<Model>(output, inputs, "Reference model");
+    }
+
+    static std::string getTestCaseName(const testing::TestParamInfo<DeReshapeMatMulParameters>& obj) {
+        vector<size_t> lhs_input_shape_indices, lhs_reshape_indices;
+        vector<size_t> rhs_input_shape_indices, rhs_reshape_indices;
+        vector<size_t> output_reshape_indices;
+        size_t bea_scalar_mode, concat_mode, final_add_mode;
+
+        tuple<vector<size_t>, vector<size_t>, vector<size_t>, vector<size_t>, vector<size_t>> tmp;
+
+        std::tie(tmp, bea_scalar_mode, concat_mode, final_add_mode) = obj.param;
+        std::tie(lhs_input_shape_indices,
+                 lhs_reshape_indices,
+                 rhs_input_shape_indices,
+                 rhs_reshape_indices,
+                 output_reshape_indices) = tmp;
+
+        std::ostringstream result;
+        result << "l_in_shape_idx=" << lhs_input_shape_indices << "_l_reshape_idx=" << lhs_reshape_indices
+               << "_r_in_shape_idx=" << rhs_input_shape_indices << "_r_reshape_idx=" << rhs_reshape_indices
+               << "_out_reshape_idx=" << output_reshape_indices << "_bea_scalar_mode=" << bea_scalar_mode
+               << "_concat_mode=" << concat_mode << "_final_add_mode=" << final_add_mode;
+        return result.str();
+    }
+};
+
+const auto shape_test_cases =
+    vector<tuple<vector<size_t>, vector<size_t>, vector<size_t>, vector<size_t>, vector<size_t>>>{
+        {{0, 1, 2, 3}, {5, 2, 3}, {0, 1, 3, 4}, {5, 3, 4}, {0, 1, 2, 4}},                 // 4D -> 3D -> 4D
+        {{5, 2, 3}, {0, 1, 2, 3}, {5, 3, 4}, {0, 1, 3, 4}, {5, 2, 4}},                    // 3D -> 4D -> 3D
+        {{0, 1, 2, 3, 4}, {0, 6, 3, 4}, {0, 1, 2, 4, 5}, {0, 6, 4, 5}, {0, 1, 2, 3, 5}},  // 5D -> 4D -> 5D
+    };
+
+const auto bea_scalar_modes = vector<size_t>{0, 1, 2, 3};
+const auto concat_modes = vector<size_t>{0, 10, 11, 20, 21, 300, 301, 310, 311};
+const auto final_add_modes = vector<size_t>{0, 1, 2};
+
+TEST_P(DeReshapeMatMulTest, DeReshapeTests) {}
+
+INSTANTIATE_TEST_SUITE_P(
+    TransformationTestsF,
+    DeReshapeMatMulTest,
+    testing::Combine(testing::ValuesIn(shape_test_cases),  // lhs_idx, rhs_idx, reshape_idx, reshape_idx, reshape_idx
+                     testing::ValuesIn(bea_scalar_modes),
+                     testing::ValuesIn(concat_modes),
+                     testing::ValuesIn(final_add_modes)),
+    DeReshapeMatMulTest::getTestCaseName);