Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] sink reshape for reorder+reshape+permute pattern opt #28183

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
2912c2a
initial trial
songbell Dec 17, 2024
59a00b0
sink reshape for further optimization
songbell Dec 23, 2024
a827247
clean up code
songbell Dec 23, 2024
58aafd5
optimize code
songbell Dec 24, 2024
293a707
relaxed pattern match to allow reorder not in place
songbell Dec 24, 2024
98e39cc
use random values in test
songbell Dec 25, 2024
561d1bb
Update src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transf…
songbell Jan 3, 2025
b3437ba
opt the recursive function for more strict pattern check
songbell Jan 3, 2025
7d27b93
Merge branch 'bell/fuse_reorder_reshape_transpose' of https://github.…
songbell Jan 3, 2025
aec66d3
Merge branch 'master' into bell/fuse_reorder_reshape_transpose
peterchen-intel Jan 6, 2025
0995188
Merge branch 'master' into bell/fuse_reorder_reshape_transpose
songbell Jan 13, 2025
b74aa66
more strict check of reshape node
songbell Jan 14, 2025
878583d
Merge branch 'bell/fuse_reorder_reshape_transpose' of https://github.…
songbell Jan 14, 2025
593097f
typo
songbell Jan 14, 2025
bbdba1c
move to ngraph transformation
songbell Jan 16, 2025
e3ca2ae
Merge branch 'master' into bell/fuse_reorder_reshape_transpose
songbell Jan 16, 2025
65e24b6
correct copyright
songbell Jan 16, 2025
7cc8153
apply review comments
songbell Jan 20, 2025
e47ed87
Merge branch 'master' into bell/fuse_reorder_reshape_transpose
songbell Jan 23, 2025
b13fff4
Merge branch 'master' into bell/fuse_reorder_reshape_transpose
songbell Feb 5, 2025
c15e88c
apply review comments
songbell Feb 6, 2025
0b4682c
Merge branch 'bell/fuse_reorder_reshape_transpose' of https://github.…
songbell Feb 6, 2025
7230aac
Merge branch 'master' into bell/fuse_reorder_reshape_transpose
songbell Feb 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 165 additions & 0 deletions src/plugins/intel_gpu/src/plugin/transformations/sink_reshape.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "sink_reshape.hpp"

#include "intel_gpu/op/convolution.hpp"
#include "openvino/core/rt_info.hpp"
#include "openvino/opsets/opset1.hpp"
#include "openvino/pass/pattern/op/or.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "transformations/utils/utils.hpp"

namespace ov {
namespace intel_gpu {

SinkReshape::SinkReshape() {
using namespace ov::pass::pattern;
using ov::pass::pattern::op::Or;
using namespace ov::op;

auto reshape_predicate = [](const ov::Output<ov::Node>& output) -> bool {
auto supported_conv_act_post_ops_for_fuse = [](const std::shared_ptr<const Node>& node) -> bool {
return ov::is_type<v0::Relu>(node) || ov::is_type<v0::Elu>(node) || ov::is_type<v0::Sigmoid>(node) ||
ov::is_type<v5::HSigmoid>(node) || ov::is_type<v0::Clamp>(node) || ov::is_type<v4::Swish>(node) ||
ov::is_type<v4::HSwish>(node) || ov::is_type<v4::Mish>(node) || ov::is_type<v5::Round>(node) ||
ov::is_type<v4::Mish>(node) || ov::is_type<v5::Round>(node);
};
auto supported_conv_eltwise_post_ops_for_fuse = [](const std::shared_ptr<const Node>& node) -> bool {
if (ov::is_type<v1::Add>(node) || ov::is_type<v1::Subtract>(node) || ov::is_type<v1::Multiply>(node) ||
ov::is_type<v1::Divide>(node))
return std::dynamic_pointer_cast<v0::Constant>(node->get_input_node_shared_ptr(1)) != nullptr;
return ov::is_type<v0::Exp>(node);
};
std::function<bool(const std::shared_ptr<ov::Node>&)> is_suitable_parent;
is_suitable_parent = [&](const std::shared_ptr<ov::Node>& node) -> bool {
if (node->get_users().size() != 1 || node->is_dynamic())
return false;
if (ov::as_type_ptr<op::Convolution>(node))
return true;
for (size_t idx = 0; idx < node->get_input_size(); idx++) {
auto input = node->get_input_node_shared_ptr(idx);
if (ov::as_type_ptr<v0::Constant>(node))
continue;
if (supported_conv_eltwise_post_ops_for_fuse(node)) {
return is_suitable_parent(input);
} else if (supported_conv_act_post_ops_for_fuse(node)) {
return is_suitable_parent(input);
}
return false;
}
return false;
};
// reshape supported only in one case, if two consecutive input dims are merged into 1
auto is_suitable_reshape = [](const std::shared_ptr<ov::Node>& node) -> bool {
if (node->is_dynamic())
return false;
auto& in_ps = node->get_input_partial_shape(0);
auto& out_ps = node->get_output_partial_shape(0);
if (in_ps.size() - out_ps.size() != 1)
return false;
size_t mismatch_count = 0;
for (size_t i = 0; i < out_ps.size(); ++i) {
if (i + mismatch_count >= in_ps.size())
return false;
if (out_ps[i] != in_ps[i + mismatch_count]) {
mismatch_count++;
}
}
return mismatch_count == 1;
};
const auto reshape = ov::as_type_ptr<v1::Reshape>(output.get_node_shared_ptr());
return is_suitable_reshape(reshape) && is_suitable_parent(reshape->get_input_node_shared_ptr(0));
};

auto reshape_m = wrap_type<v1::Reshape>(reshape_predicate);
auto transpose_const_m = wrap_type<v0::Constant>();
auto transpose_m = wrap_type<v1::Transpose>({reshape_m, transpose_const_m});

ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) {
const auto& pattern_map = m.get_pattern_value_map();
auto reshape = std::dynamic_pointer_cast<v1::Reshape>(pattern_map.at(reshape_m).get_node_shared_ptr());
if (!reshape || transformation_callback(reshape)) {
return false;
}

auto update_order = [](std::vector<uint16_t> original_order, const std::shared_ptr<v1::Reshape>& reshape_node) {
// Example. For this sequence, there is Reshape node which merges 2 consecutive dims into one
// order must be updated like permute is done before reshape
// [1,3,4,6] -> Reshape[1,3,24]-> permute(0,2,1) -> [1,24,3]
// updated order must be (0,2,3,1):
// dim with index=2 is split into 2 parts: 2 and 3
auto reshape_in_shape = reshape_node->get_input_partial_shape(0).to_shape();
auto reshape_out_shape = reshape_node->get_output_partial_shape(0).to_shape();
auto transformed_order = original_order;
ov::Shape new_shape(transformed_order.size());
const uint16_t merge_dim_idx = [&]() {
for (uint16_t i = 0; i < reshape_out_shape.size(); ++i) {
if (reshape_in_shape[i] != reshape_out_shape[i])
return i;
}
OPENVINO_THROW("same input/output for reshape node");
}();
auto insertIt = transformed_order.end();
for (auto it = transformed_order.begin(); it != transformed_order.end(); ++it) {
auto& elem = *it;
if (elem > merge_dim_idx) {
elem++;
} else if (elem == merge_dim_idx) {
insertIt = it + 1;
}
}
transformed_order.insert(insertIt, merge_dim_idx + 1);
return transformed_order;
};

// allow tranposes which rotate feature dim to back to be taken as inner-most axis
auto check_transpose_order = [](std::vector<uint16_t>& order) -> bool {
songbell marked this conversation as resolved.
Show resolved Hide resolved
if (order.size() <= 2)
return false;
if ((int32_t)order[order.size() - 2] != order.size() - 1)
return false;
if ((int32_t)order[0] != 0)
return false;
for (int32_t i = 2; i < (int32_t)order.size(); ++i) {
if ((int32_t)order[i - 1] != i)
return false;
}
return true;
};

auto transpose = std::dynamic_pointer_cast<v1::Transpose>(pattern_map.at(transpose_m).get_node_shared_ptr());
if (pattern_map.count(transpose_const_m) > 0) {
auto org_transpose_m = pattern_map.at(transpose_const_m).get_node_shared_ptr();
auto org_transpose_os = transpose->get_output_shape(0);
auto tranpose_order = std::dynamic_pointer_cast<v0::Constant>(org_transpose_m);
auto updated_order = update_order(tranpose_order->cast_vector<uint16_t>(), reshape);
if (check_transpose_order(updated_order)) {
auto updated_transpose_order = std::make_shared<v0::Constant>(tranpose_order->get_element_type(),
ov::Shape(1, updated_order.size()),
updated_order);
updated_transpose_order->set_friendly_name(tranpose_order->get_friendly_name() + "_updated");
auto new_transpose =
std::make_shared<v1::Transpose>(reshape->input(0).get_source_output(), updated_transpose_order);
new_transpose->set_friendly_name(transpose->get_friendly_name() + "_with_updated_order");
copy_runtime_info(transpose, new_transpose);
ov::replace_node(reshape, new_transpose);
auto new_pattern_const = std::make_shared<ov::op::v0::Constant>(ov::element::i32,
ov::Shape{org_transpose_os.size()},
org_transpose_os);
auto new_reshape = std::make_shared<ov::op::v1::Reshape>(new_transpose,
new_pattern_const,
reshape->get_special_zero());
new_reshape->set_friendly_name(reshape->get_friendly_name() + "_sinked_after_transpose");
copy_runtime_info(reshape, new_reshape);
ov::replace_node(transpose, new_reshape);
}
}
return true;
};
auto m = std::make_shared<ov::pass::pattern::Matcher>(transpose_m, "SinkReshapeIfNeeded");
this->register_matcher(m, callback);
}
} // namespace intel_gpu
} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/pass/graph_rewrite.hpp"

namespace ov {
namespace intel_gpu {

class SinkReshape: public ov::pass::MatcherPass {
public:
OPENVINO_MATCHER_PASS_RTTI("SinkReshapeIfNeeded");
SinkReshape();
};

} // namespace intel_gpu
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
#include "plugin/transformations/dynamic_quantize_fully_connected.hpp"
#include "plugin/transformations/optimize_subsequent_reshapes.hpp"
#include "plugin/transformations/lora_horizontal_fusion.hpp"
#include "plugin/transformations/sink_reshape.hpp"
#include "transformations/common_optimizations/nop_elimination.hpp"
#include "transformations/common_optimizations/rms_fusion.hpp"
#include "transformations/common_optimizations/broadcast_elementwise_fusion.hpp"
Expand Down Expand Up @@ -1097,6 +1098,8 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
// This Validate is needed for proper data type propagation after applying IncreasePositionIdsPrecision pass
manager.register_pass<ov::pass::Validate>();

manager.register_pass<ov::intel_gpu::SinkReshape>();

if (device_info.supports_immad) {
auto dynamic_quantization_group_size = config.get_property(ov::hint::dynamic_quantization_group_size);
pass_config->set_callback<ov::intel_gpu::DynamicQuantizeFullyConnected>([=](const_node_ptr& root) -> bool {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>

#include <string>
#include <memory>

#include <openvino/core/model.hpp>
#include <openvino/opsets/opset1.hpp>
#include "openvino/op/softmax.hpp"
#include <transformations/init_node_info.hpp>
#include <transformations/utils/utils.hpp>
#include "plugin/transformations/sink_reshape.hpp"
#include "plugin/transformations/convert_convolution.hpp"
#include "common_test_utils/ov_test_utils.hpp"

using namespace testing;
using namespace ov::intel_gpu;

using SinkReshapeParams = std::tuple<bool, // add eltwise
bool, // add activation
bool, // eligible rotation
bool>; // eligible reshape

class SinkReshapeTests : public TransformationTestsF, public WithParamInterface<SinkReshapeParams> {
public:
static std::string get_test_case_name(testing::TestParamInfo<SinkReshapeParams> obj) {
std::pair<ov::PartialShape, ov::Shape> input_shapes;
bool add_eltwise;
bool add_activation;
bool eligible_rotataion;
bool eligible_reshape;
std::tie(add_eltwise, add_activation, eligible_rotataion, eligible_reshape) = obj.param;

std::ostringstream result;
result << ")_add_eltwise=" << add_eltwise << "_add_activationt=" << add_activation << "_eligible_rotation=" << eligible_rotataion << "_eligible_reshape=" << eligible_reshape;
return result.str();
}

static std::shared_ptr<ov::Model> init_model(const bool add_eltwise,
const bool add_activation,
const bool eligible_rotation,
const bool eligible_reshape,
const bool ref) {
ov::Strides strides{1, 1};
ov::Strides dilations{1, 1};
ov::CoordinateDiff pads_begin{0, 0};
ov::CoordinateDiff pads_end{0, 0};
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{ 2, 3, 12, 12 });
auto weights_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 3, 3, 3 }, { 1 });
auto conv = std::make_shared<ov::op::v1::Convolution>(input,
weights_const,
strides,
pads_begin,
pads_end,
dilations,
ov::op::PadType::EXPLICIT);
std::shared_ptr<ov::Node> reshape_input_node = conv;
if (add_eltwise) {
auto sub_const = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1}, {1});
reshape_input_node = std::make_shared<ov::opset1::Subtract>(reshape_input_node, sub_const);
}

if (add_activation) {
reshape_input_node = std::make_shared<ov::opset1::Sigmoid>(reshape_input_node);
}
std::shared_ptr<ov::Model> model = nullptr;
if (!ref) {
auto shape = eligible_reshape ? std::vector<int>{2, 4, 100} : std::vector<int>{2, 2, 20};
auto reshape_const = ov::opset1::Constant::create(ov::element::i32, {3}, shape);
auto reshape = std::make_shared<ov::opset1::Reshape>(reshape_input_node, reshape_const, true);
auto order = eligible_rotation ? std::vector<int>{0 ,2, 1} : std::vector<int>{2, 1, 0};
auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {3}, order);
auto transpose = std::make_shared<ov::opset1::Transpose>(reshape, transpose_const);

auto softmax = std::make_shared<ov::op::v8::Softmax>(transpose);
model = std::make_shared<ov::Model>(ov::NodeVector{softmax}, ov::ParameterVector{input});
} else {
auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {4}, {0, 2, 3, 1});
auto transpose = std::make_shared<ov::opset1::Transpose>(reshape_input_node, transpose_const);
auto reshape_const = ov::opset1::Constant::create(ov::element::i32, {3}, {2, 100, 4});
auto reshape = std::make_shared<ov::opset1::Reshape>(transpose, reshape_const, true);
auto softmax = std::make_shared<ov::op::v8::Softmax>(reshape);
model = std::make_shared<ov::Model>(ov::NodeVector{softmax}, ov::ParameterVector{input});
}
ov::pass::Manager manager;
manager.register_pass<ConvertConvolutionToInternal>();
if (!ref)
manager.register_pass<SinkReshape>();
manager.run_passes(model);
return model;
}

protected:
void SetUp() override {
TransformationTestsF::SetUp();
bool add_eltwise;
bool add_activation;
bool eligible_rotation;
bool eligible_reshape;
std::tie(add_eltwise, add_activation, eligible_rotation, eligible_reshape) = this->GetParam();

model = init_model(add_eltwise, add_activation, eligible_rotation, eligible_reshape, true);
if (!eligible_rotation || !eligible_reshape)
model_ref = model->clone();
else
model_ref = init_model(add_eltwise, add_activation, eligible_rotation, eligible_reshape, false);
}
};

TEST_P(SinkReshapeTests, CompareFunctions) {}

const std::vector<bool> add_eltwise = {false, true};
const std::vector<bool> add_activation = {false, true};
const std::vector<bool> eligible_rotation = {false, true};
const std::vector<bool> eligible_reshape = {false, true};

INSTANTIATE_TEST_SUITE_P(smoke_TransformationTests_reshape_transpose, SinkReshapeTests,
::testing::Combine(
::testing::ValuesIn(add_eltwise),
::testing::ValuesIn(add_activation),
::testing::ValuesIn(eligible_rotation),
::testing::ValuesIn(eligible_reshape)),
SinkReshapeTests::get_test_case_name);
Loading