-
Notifications
You must be signed in to change notification settings - Fork 2.6k
[GPU] sink reshape for reorder+reshape+permute pattern opt #28183
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
yeonbok
merged 23 commits into
openvinotoolkit:master
from
songbell:bell/fuse_reorder_reshape_transpose
Feb 11, 2025
Merged
Changes from all commits
Commits
Show all changes
23 commits
Select commit
Hold shift + click to select a range
2912c2a
initial trial
songbell 59a00b0
sink reshape for further optimization
songbell a827247
clean up code
songbell 58aafd5
optimize code
songbell 293a707
relaxed pattern match to allow reorder not in place
songbell 98e39cc
use random values in test
songbell 561d1bb
Update src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transf…
songbell b3437ba
opt the recursive function for more strict pattern check
songbell 7d27b93
Merge branch 'bell/fuse_reorder_reshape_transpose' of https://github.…
songbell aec66d3
Merge branch 'master' into bell/fuse_reorder_reshape_transpose
peterchen-intel 0995188
Merge branch 'master' into bell/fuse_reorder_reshape_transpose
songbell b74aa66
more strict check of reshape node
songbell 878583d
Merge branch 'bell/fuse_reorder_reshape_transpose' of https://github.…
songbell 593097f
typo
songbell bbdba1c
move to ngraph transformation
songbell e3ca2ae
Merge branch 'master' into bell/fuse_reorder_reshape_transpose
songbell 65e24b6
correct copyright
songbell 7cc8153
apply review comments
songbell e47ed87
Merge branch 'master' into bell/fuse_reorder_reshape_transpose
songbell b13fff4
Merge branch 'master' into bell/fuse_reorder_reshape_transpose
songbell c15e88c
apply review comments
songbell 0b4682c
Merge branch 'bell/fuse_reorder_reshape_transpose' of https://github.…
songbell 7230aac
Merge branch 'master' into bell/fuse_reorder_reshape_transpose
songbell File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
165 changes: 165 additions & 0 deletions
165
src/plugins/intel_gpu/src/plugin/transformations/sink_reshape.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
// Copyright (C) 2018-2025 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "sink_reshape.hpp" | ||
|
||
#include "intel_gpu/op/convolution.hpp" | ||
#include "openvino/core/rt_info.hpp" | ||
#include "openvino/opsets/opset1.hpp" | ||
#include "openvino/pass/pattern/op/or.hpp" | ||
#include "openvino/pass/pattern/op/wrap_type.hpp" | ||
#include "transformations/utils/utils.hpp" | ||
|
||
namespace ov { | ||
namespace intel_gpu { | ||
|
||
SinkReshape::SinkReshape() { | ||
using namespace ov::pass::pattern; | ||
using ov::pass::pattern::op::Or; | ||
using namespace ov::op; | ||
|
||
auto reshape_predicate = [](const ov::Output<ov::Node>& output) -> bool { | ||
auto supported_conv_act_post_ops_for_fuse = [](const std::shared_ptr<const Node>& node) -> bool { | ||
return ov::is_type<v0::Relu>(node) || ov::is_type<v0::Elu>(node) || ov::is_type<v0::Sigmoid>(node) || | ||
ov::is_type<v5::HSigmoid>(node) || ov::is_type<v0::Clamp>(node) || ov::is_type<v4::Swish>(node) || | ||
ov::is_type<v4::HSwish>(node) || ov::is_type<v4::Mish>(node) || ov::is_type<v5::Round>(node) || | ||
ov::is_type<v4::Mish>(node) || ov::is_type<v5::Round>(node); | ||
}; | ||
auto supported_conv_eltwise_post_ops_for_fuse = [](const std::shared_ptr<const Node>& node) -> bool { | ||
if (ov::is_type<v1::Add>(node) || ov::is_type<v1::Subtract>(node) || ov::is_type<v1::Multiply>(node) || | ||
ov::is_type<v1::Divide>(node)) | ||
return std::dynamic_pointer_cast<v0::Constant>(node->get_input_node_shared_ptr(1)) != nullptr; | ||
return ov::is_type<v0::Exp>(node); | ||
}; | ||
std::function<bool(const std::shared_ptr<ov::Node>&)> is_suitable_parent; | ||
is_suitable_parent = [&](const std::shared_ptr<ov::Node>& node) -> bool { | ||
if (node->get_users().size() != 1 || node->is_dynamic()) | ||
return false; | ||
if (ov::as_type_ptr<op::Convolution>(node)) | ||
return true; | ||
for (size_t idx = 0; idx < node->get_input_size(); idx++) { | ||
auto input = node->get_input_node_shared_ptr(idx); | ||
if (ov::as_type_ptr<v0::Constant>(node)) | ||
continue; | ||
if (supported_conv_eltwise_post_ops_for_fuse(node)) { | ||
return is_suitable_parent(input); | ||
} else if (supported_conv_act_post_ops_for_fuse(node)) { | ||
return is_suitable_parent(input); | ||
} | ||
return false; | ||
} | ||
return false; | ||
}; | ||
// reshape supported only in one case, if two consecutive input dims are merged into 1 | ||
auto is_suitable_reshape = [](const std::shared_ptr<ov::Node>& node) -> bool { | ||
if (node->is_dynamic()) | ||
return false; | ||
auto& in_ps = node->get_input_partial_shape(0); | ||
auto& out_ps = node->get_output_partial_shape(0); | ||
if (in_ps.size() - out_ps.size() != 1) | ||
return false; | ||
size_t mismatch_count = 0; | ||
for (size_t i = 0; i < out_ps.size(); ++i) { | ||
if (i + mismatch_count >= in_ps.size()) | ||
return false; | ||
if (out_ps[i] != in_ps[i + mismatch_count]) { | ||
mismatch_count++; | ||
} | ||
} | ||
return mismatch_count == 1; | ||
}; | ||
const auto reshape = ov::as_type_ptr<v1::Reshape>(output.get_node_shared_ptr()); | ||
return is_suitable_reshape(reshape) && is_suitable_parent(reshape->get_input_node_shared_ptr(0)); | ||
}; | ||
|
||
auto reshape_m = wrap_type<v1::Reshape>(reshape_predicate); | ||
auto transpose_const_m = wrap_type<v0::Constant>(); | ||
auto transpose_m = wrap_type<v1::Transpose>({reshape_m, transpose_const_m}); | ||
|
||
ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) { | ||
const auto& pattern_map = m.get_pattern_value_map(); | ||
auto reshape = std::dynamic_pointer_cast<v1::Reshape>(pattern_map.at(reshape_m).get_node_shared_ptr()); | ||
if (!reshape || transformation_callback(reshape)) { | ||
return false; | ||
} | ||
|
||
auto update_order = [](std::vector<uint16_t> original_order, const std::shared_ptr<v1::Reshape>& reshape_node) { | ||
// Example. For this sequence, there is Reshape node which merges 2 consecutive dims into one | ||
// order must be updated like permute is done before reshape | ||
// [1,3,4,6] -> Reshape[1,3,24]-> permute(0,2,1) -> [1,24,3] | ||
// updated order must be (0,2,3,1): | ||
// dim with index=2 is split into 2 parts: 2 and 3 | ||
auto reshape_in_shape = reshape_node->get_input_partial_shape(0).to_shape(); | ||
auto reshape_out_shape = reshape_node->get_output_partial_shape(0).to_shape(); | ||
auto transformed_order = original_order; | ||
ov::Shape new_shape(transformed_order.size()); | ||
const uint16_t merge_dim_idx = [&]() { | ||
for (uint16_t i = 0; i < reshape_out_shape.size(); ++i) { | ||
if (reshape_in_shape[i] != reshape_out_shape[i]) | ||
return i; | ||
} | ||
OPENVINO_THROW("same input/output for reshape node"); | ||
}(); | ||
auto insertIt = transformed_order.end(); | ||
for (auto it = transformed_order.begin(); it != transformed_order.end(); ++it) { | ||
auto& elem = *it; | ||
if (elem > merge_dim_idx) { | ||
elem++; | ||
} else if (elem == merge_dim_idx) { | ||
insertIt = it + 1; | ||
} | ||
} | ||
transformed_order.insert(insertIt, merge_dim_idx + 1); | ||
return transformed_order; | ||
}; | ||
|
||
// allow tranposes which rotate feature dim to back to be taken as inner-most axis | ||
auto check_transpose_order = [](std::vector<uint16_t>& order) -> bool { | ||
if (order.size() <= 2) | ||
return false; | ||
if ((int32_t)order[order.size() - 2] != order.size() - 1) | ||
return false; | ||
if ((int32_t)order[0] != 0) | ||
return false; | ||
for (int32_t i = 2; i < (int32_t)order.size(); ++i) { | ||
if ((int32_t)order[i - 1] != i) | ||
return false; | ||
} | ||
return true; | ||
}; | ||
|
||
auto transpose = std::dynamic_pointer_cast<v1::Transpose>(pattern_map.at(transpose_m).get_node_shared_ptr()); | ||
if (pattern_map.count(transpose_const_m) > 0) { | ||
auto org_transpose_m = pattern_map.at(transpose_const_m).get_node_shared_ptr(); | ||
auto org_transpose_os = transpose->get_output_shape(0); | ||
auto tranpose_order = std::dynamic_pointer_cast<v0::Constant>(org_transpose_m); | ||
auto updated_order = update_order(tranpose_order->cast_vector<uint16_t>(), reshape); | ||
if (check_transpose_order(updated_order)) { | ||
auto updated_transpose_order = std::make_shared<v0::Constant>(tranpose_order->get_element_type(), | ||
ov::Shape(1, updated_order.size()), | ||
updated_order); | ||
updated_transpose_order->set_friendly_name(tranpose_order->get_friendly_name() + "_updated"); | ||
auto new_transpose = | ||
std::make_shared<v1::Transpose>(reshape->input(0).get_source_output(), updated_transpose_order); | ||
new_transpose->set_friendly_name(transpose->get_friendly_name() + "_with_updated_order"); | ||
copy_runtime_info(transpose, new_transpose); | ||
ov::replace_node(reshape, new_transpose); | ||
auto new_pattern_const = std::make_shared<ov::op::v0::Constant>(ov::element::i32, | ||
ov::Shape{org_transpose_os.size()}, | ||
org_transpose_os); | ||
auto new_reshape = std::make_shared<ov::op::v1::Reshape>(new_transpose, | ||
new_pattern_const, | ||
reshape->get_special_zero()); | ||
new_reshape->set_friendly_name(reshape->get_friendly_name() + "_sinked_after_transpose"); | ||
copy_runtime_info(reshape, new_reshape); | ||
ov::replace_node(transpose, new_reshape); | ||
} | ||
} | ||
return true; | ||
}; | ||
auto m = std::make_shared<ov::pass::pattern::Matcher>(transpose_m, "SinkReshapeIfNeeded"); | ||
this->register_matcher(m, callback); | ||
} | ||
} // namespace intel_gpu | ||
} // namespace ov |
19 changes: 19 additions & 0 deletions
19
src/plugins/intel_gpu/src/plugin/transformations/sink_reshape.hpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
// Copyright (C) 2018-2025 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "openvino/pass/graph_rewrite.hpp" | ||
|
||
namespace ov { | ||
namespace intel_gpu { | ||
|
||
class SinkReshape: public ov::pass::MatcherPass { | ||
public: | ||
OPENVINO_MATCHER_PASS_RTTI("SinkReshapeIfNeeded"); | ||
SinkReshape(); | ||
}; | ||
|
||
} // namespace intel_gpu | ||
} // namespace ov |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
125 changes: 125 additions & 0 deletions
125
src/plugins/intel_gpu/tests/unit/transformations/sink_reshape_test.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
// Copyright (C) 2018-2025 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
#include <gtest/gtest.h> | ||
|
||
#include <string> | ||
#include <memory> | ||
|
||
#include <openvino/core/model.hpp> | ||
#include <openvino/opsets/opset1.hpp> | ||
#include "openvino/op/softmax.hpp" | ||
#include <transformations/init_node_info.hpp> | ||
#include <transformations/utils/utils.hpp> | ||
#include "plugin/transformations/sink_reshape.hpp" | ||
#include "plugin/transformations/convert_convolution.hpp" | ||
#include "common_test_utils/ov_test_utils.hpp" | ||
|
||
using namespace testing; | ||
using namespace ov::intel_gpu; | ||
|
||
using SinkReshapeParams = std::tuple<bool, // add eltwise | ||
bool, // add activation | ||
bool, // eligible rotation | ||
bool>; // eligible reshape | ||
|
||
class SinkReshapeTests : public TransformationTestsF, public WithParamInterface<SinkReshapeParams> { | ||
public: | ||
static std::string get_test_case_name(testing::TestParamInfo<SinkReshapeParams> obj) { | ||
std::pair<ov::PartialShape, ov::Shape> input_shapes; | ||
bool add_eltwise; | ||
bool add_activation; | ||
bool eligible_rotataion; | ||
bool eligible_reshape; | ||
std::tie(add_eltwise, add_activation, eligible_rotataion, eligible_reshape) = obj.param; | ||
|
||
std::ostringstream result; | ||
result << ")_add_eltwise=" << add_eltwise << "_add_activationt=" << add_activation << "_eligible_rotation=" << eligible_rotataion << "_eligible_reshape=" << eligible_reshape; | ||
return result.str(); | ||
} | ||
|
||
static std::shared_ptr<ov::Model> init_model(const bool add_eltwise, | ||
const bool add_activation, | ||
const bool eligible_rotation, | ||
const bool eligible_reshape, | ||
const bool ref) { | ||
ov::Strides strides{1, 1}; | ||
ov::Strides dilations{1, 1}; | ||
ov::CoordinateDiff pads_begin{0, 0}; | ||
ov::CoordinateDiff pads_end{0, 0}; | ||
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{ 2, 3, 12, 12 }); | ||
auto weights_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{ 4, 3, 3, 3 }, { 1 }); | ||
auto conv = std::make_shared<ov::op::v1::Convolution>(input, | ||
weights_const, | ||
strides, | ||
pads_begin, | ||
pads_end, | ||
dilations, | ||
ov::op::PadType::EXPLICIT); | ||
std::shared_ptr<ov::Node> reshape_input_node = conv; | ||
if (add_eltwise) { | ||
auto sub_const = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1}, {1}); | ||
reshape_input_node = std::make_shared<ov::opset1::Subtract>(reshape_input_node, sub_const); | ||
} | ||
|
||
if (add_activation) { | ||
reshape_input_node = std::make_shared<ov::opset1::Sigmoid>(reshape_input_node); | ||
} | ||
std::shared_ptr<ov::Model> model = nullptr; | ||
if (!ref) { | ||
auto shape = eligible_reshape ? std::vector<int>{2, 4, 100} : std::vector<int>{2, 2, 20}; | ||
auto reshape_const = ov::opset1::Constant::create(ov::element::i32, {3}, shape); | ||
auto reshape = std::make_shared<ov::opset1::Reshape>(reshape_input_node, reshape_const, true); | ||
auto order = eligible_rotation ? std::vector<int>{0 ,2, 1} : std::vector<int>{2, 1, 0}; | ||
auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {3}, order); | ||
auto transpose = std::make_shared<ov::opset1::Transpose>(reshape, transpose_const); | ||
|
||
auto softmax = std::make_shared<ov::op::v8::Softmax>(transpose); | ||
model = std::make_shared<ov::Model>(ov::NodeVector{softmax}, ov::ParameterVector{input}); | ||
} else { | ||
auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {4}, {0, 2, 3, 1}); | ||
auto transpose = std::make_shared<ov::opset1::Transpose>(reshape_input_node, transpose_const); | ||
auto reshape_const = ov::opset1::Constant::create(ov::element::i32, {3}, {2, 100, 4}); | ||
auto reshape = std::make_shared<ov::opset1::Reshape>(transpose, reshape_const, true); | ||
auto softmax = std::make_shared<ov::op::v8::Softmax>(reshape); | ||
model = std::make_shared<ov::Model>(ov::NodeVector{softmax}, ov::ParameterVector{input}); | ||
} | ||
ov::pass::Manager manager; | ||
manager.register_pass<ConvertConvolutionToInternal>(); | ||
if (!ref) | ||
manager.register_pass<SinkReshape>(); | ||
manager.run_passes(model); | ||
return model; | ||
} | ||
|
||
protected: | ||
void SetUp() override { | ||
TransformationTestsF::SetUp(); | ||
bool add_eltwise; | ||
bool add_activation; | ||
bool eligible_rotation; | ||
bool eligible_reshape; | ||
std::tie(add_eltwise, add_activation, eligible_rotation, eligible_reshape) = this->GetParam(); | ||
|
||
model = init_model(add_eltwise, add_activation, eligible_rotation, eligible_reshape, true); | ||
if (!eligible_rotation || !eligible_reshape) | ||
model_ref = model->clone(); | ||
else | ||
model_ref = init_model(add_eltwise, add_activation, eligible_rotation, eligible_reshape, false); | ||
} | ||
}; | ||
|
||
TEST_P(SinkReshapeTests, CompareFunctions) {} | ||
|
||
const std::vector<bool> add_eltwise = {false, true}; | ||
const std::vector<bool> add_activation = {false, true}; | ||
const std::vector<bool> eligible_rotation = {false, true}; | ||
const std::vector<bool> eligible_reshape = {false, true}; | ||
|
||
INSTANTIATE_TEST_SUITE_P(smoke_TransformationTests_reshape_transpose, SinkReshapeTests, | ||
::testing::Combine( | ||
::testing::ValuesIn(add_eltwise), | ||
::testing::ValuesIn(add_activation), | ||
::testing::ValuesIn(eligible_rotation), | ||
::testing::ValuesIn(eligible_reshape)), | ||
SinkReshapeTests::get_test_case_name); |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.