Skip to content

Commit

Permalink
optimize code
Browse files Browse the repository at this point in the history
Signed-off-by: fishbell <[email protected]>
  • Loading branch information
songbell committed Dec 24, 2024
1 parent a827247 commit 8362209
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 91 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ void prepare_primitive_fusing::fuse_reorders(program &p) {

program_helpers::do_for_types<reorder>(*node, [&p](reorder_node& node) {
auto& input = node.input();

// Restrictions:
// - inputs cannot be padded
// - primitives input cannot be output
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ void reshape_transfer::run(program& p) {
// (reorder) + reshape + transpose
// sink reshape for further possible optimization
auto is_suitable_permute = [](cldnn::program_node* node) {
return node->get_users().size() == 1 && node->is_dynamic() == false;
return node->get_users().size() == 1 && node->is_dynamic() == false &&
node->get_output_layout().get_rank() == 4;
};

auto is_suitable_reshape = [](cldnn::program_node* node) -> bool {
Expand All @@ -28,8 +29,8 @@ void reshape_transfer::run(program& p) {
return true;
return false;
};
std::function<bool(cldnn::program_node*)> is_suitable_reorder;

std::function<bool(cldnn::program_node*)> is_suitable_reorder;
is_suitable_reorder = [&is_suitable_reorder](const cldnn::program_node* node) -> bool {
if (node->get_users().size() != 1 || node->is_dynamic())
return false;
Expand Down Expand Up @@ -58,51 +59,32 @@ void reshape_transfer::run(program& p) {
// updated order must be (0,2,3,1):
// dim with index=2 is split into 2 parts: 2 and 3
const auto& reshape_in_shape = reshape->get_input_layout().get_dims();
const auto& reshape_out_dim = reshape->get_output_layout().get_dims();
auto reshape_out_shape = reshape_out_dim;
const auto& reshape_out_shape = reshape->get_output_layout().get_dims();
auto transformed_order = original_order;
ov::Shape new_shape(transformed_order.size());
if (original_order.size() < reshape_out_dim.size() && reshape_out_dim.size() == 4) {
// if order dims is less than reshape dims, means reshape shape has been converted to upper dims some time
// before merge spatial dims
reshape_out_shape.resize(original_order.size());
for (size_t i = 0; i < reshape_out_dim.size(); ++i) {
if (i < 2) {
reshape_out_shape[i] = reshape_out_dim[i];
} else {
reshape_out_shape[2] *= reshape_out_dim[i];
}
const size_t merge_dim_idx = [&]() {
for (size_t i = 0; i < reshape_in_shape.size(); ++i) {
if (reshape_in_shape[i] != reshape_out_shape[i])
return i;
}
const size_t merge_dim_idx = [&]() {
for (size_t i = 0; i < reshape_in_shape.size(); ++i) {
if (reshape_in_shape[i] != reshape_out_shape[i])
return i;
}
OPENVINO_THROW("merged_dim_idx can not be found");
}();
auto insertIt = transformed_order.end();
for (auto it = transformed_order.begin(); it != transformed_order.end(); ++it) {
auto& elem = *it;
if (elem > merge_dim_idx) {
elem++;
} else if (elem == merge_dim_idx) {
insertIt = it + 1;
}
OPENVINO_THROW("same input/output for reshape node");
}();
auto insertIt = transformed_order.end();
for (auto it = transformed_order.begin(); it != transformed_order.end(); ++it) {
auto& elem = *it;
if (elem > merge_dim_idx) {
elem++;
} else if (elem == merge_dim_idx) {
insertIt = it + 1;
}
transformed_order.insert(insertIt, merge_dim_idx + 1);
} else {
auto reorder_orders = [](std::vector<uint16_t>& order, std::vector<uint16_t> place_order) {
// for all elements to put in place
for (size_t i = 0; i < order.size() - 1; ++i) {
while (i != place_order[i]) {
// swap it with the element at its final place
auto alt = place_order[i];
std::swap(order[i], order[alt]);
std::swap(place_order[i], place_order[alt]);
}
}
};
reorder_orders(transformed_order, std::vector<uint16_t>({0, 1, 3, 2}));
}
transformed_order.insert(insertIt, merge_dim_idx + 1);
// remove invalid orders
if (transformed_order.size() > reshape_out_shape.size()) {
transformed_order.erase(
std::remove_if(transformed_order.begin(), transformed_order.end(), [&](uint16_t& order) {
return order >= reshape_out_shape.size();
}));
}
return transformed_order;
};
Expand Down Expand Up @@ -136,20 +118,24 @@ void reshape_transfer::run(program& p) {
reshape_node = &(inter_node->as<reshape>());

auto transpose_order = update_order(transpose_node.get_permute_order(), reshape_node);
auto next_node = transpose_node.get_users().front();
auto new_reshape_tensor = transpose_node.get_output_layout().get_tensor();
p.move_node(*reshape_node, *node, *next_node);
// replace the permute node and reshape node
auto new_permute =
std::make_shared<permute>(transpose_node.id() + "_reordered", parent_node->id(), transpose_order);
auto& new_permute_node = p.get_or_create(new_permute);
auto new_reshape =
std::make_shared<reshape>(reshape_node->id() + "_sinked", new_permute_node.id(), new_reshape_tensor);
auto& new_reshape_node = p.get_or_create(new_reshape);
if (new_permute_node.as<permute>().is_rotating_except_batch()) {
auto next_node = transpose_node.get_users().front();
auto new_reshape_tensor = transpose_node.get_output_layout().get_tensor();
p.move_node(*reshape_node, *node, *next_node);
// replace the permute node and reshape node
auto new_reshape =
std::make_shared<reshape>(reshape_node->id() + "_sinked", new_permute_node.id(), new_reshape_tensor);
auto& new_reshape_node = p.get_or_create(new_reshape);

p.replace(transpose_node, new_permute_node);
p.replace(*reshape_node, new_reshape_node);
new_permute_node.recalc_output_layout(false);
new_reshape_node.recalc_output_layout(false);
p.replace(transpose_node, new_permute_node);
p.replace(*reshape_node, new_reshape_node);
new_permute_node.recalc_output_layout(false);
new_reshape_node.recalc_output_layout(false);
} else {
p.remove_if_dangling(new_permute_node);
}
}
}
91 changes: 54 additions & 37 deletions src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
using namespace cldnn;
using namespace ::tests;

TEST(merge_reorder_reshape_permute, no_reshape) {
TEST(opt_reorder_reshape_permute, no_reshape) {
auto& engine = get_test_engine();
auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx};
auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx});
Expand Down Expand Up @@ -60,7 +60,6 @@ TEST(merge_reorder_reshape_permute, no_reshape) {

ref_network.set_input_data("input", input);
auto ref_output = ref_network.execute();

auto ref_out_mem = ref_output.at("softmax").get_memory();
mem_lock<ov::float16> lock_ref(ref_out_mem, get_test_stream());
for (size_t i = 0; i < out_mem->count(); i++) {
Expand All @@ -69,33 +68,25 @@ TEST(merge_reorder_reshape_permute, no_reshape) {
}
}

// output in byxf layout, check further....
/*TEST(merge_reorder_reshape_permute, no_reorder) {
TEST(opt_reorder_reshape_permute, no_reorder_no_reshape) {
auto& engine = get_test_engine();
auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx};
auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx});
auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx});
set_values<ov::float16>(input, {2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.f, 2.f, 3.f, 1.f, 2.f, 4.f,
5.f, 1.f, 1.f, 2.f, 1.f, 2.f, 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f,
3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, 2.f, 4.f, 1.f, 1.f, 2.f, 1.f,
1.f, 2.f, 0.f, 2.f, 5.f, 2.f, 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f});

set_values<ov::float16>(input, { 2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f,
1.f, 2.f, 3.f, 1.f, 2.f, 4.f,
5.f, 1.f, 1.f, 2.f, 1.f, 2.f,
2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f,
3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f,
2.f, 4.f, 1.f, 1.f, 2.f, 1.f,
1.f, 2.f, 0.f, 2.f, 5.f, 2.f,
4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f});
set_values<ov::float16>(weight, { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f});
set_values<ov::float16>(weight, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f});
topology topology;
topology.add(input_layout("input", in_layout));
topology.add(data("weight", weight));
topology.add(convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0},
false)); topology.add(reshape("reshape_inter", input_info("convolution"), false, {1, 3, 24}, ov::PartialShape{1, 3,
24})); topology.add(permute("permute_inter", input_info("reshape_inter"), {0, 2, 1})); topology.add(softmax("softmax",
input_info("permute_inter"), 1)); ExecutionConfig config_test = get_test_default_config(engine);
ov::intel_gpu::ImplementationDesc softmax_impl_test = { format::bfyx, "", impl_types::ocl };
config_test.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "softmax_inter",
softmax_impl_test } })); ExecutionConfig config = get_test_default_config(engine);
topology.add(
convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false));
topology.add(permute("permute_inter", input_info("convolution"), {0, 2, 3, 1}));
topology.add(softmax("softmax", input_info("permute_inter"), 1));
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
auto prog = program::build_program(engine, topology, config);
Expand All @@ -104,18 +95,27 @@ softmax_impl_test } })); ExecutionConfig config = get_test_default_config(engine

net.set_input_data("input", input);
auto output = net.execute();

ExecutionConfig ref_config = get_test_default_config(engine);
ref_config.set_property(ov::intel_gpu::optimize_data(false));
cldnn::network ref_network(engine, topology, ref_config);
// select preferred formats, conv + permute
auto permute_inst = net.get_primitive("permute_inter");
//ASSERT_TRUE(permute_inst->can_be_optimized());
ASSERT_TRUE(permute_inst->can_be_optimized());
auto out_mem = output.at("softmax").get_memory();
mem_lock<ov::float16> lock(out_mem, get_test_stream());

ref_network.set_input_data("input", input);
auto ref_output = ref_network.execute();
auto ref_out_mem = ref_output.at("softmax").get_memory();
mem_lock<ov::float16> lock_ref(ref_out_mem, get_test_stream());
for (size_t i = 0; i < out_mem->count(); i++) {
float actual = lock[i];
ASSERT_EQ(actual, ref_output[i]);
ASSERT_EQ(actual, lock_ref[i]);
}
}*/
}

TEST(merge_reorder_reshape_permute, no_reorder_no_reshape) {
TEST(opt_reorder_reshape_permute, cutomized_net_yolov6_alike) {
auto& engine = get_test_engine();
auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx};
auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx});
Expand All @@ -131,24 +131,42 @@ TEST(merge_reorder_reshape_permute, no_reorder_no_reshape) {
topology.add(data("weight", weight));
topology.add(
convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false));
topology.add(permute("permute_inter", input_info("convolution"), {0, 2, 3, 1}));
topology.add(reorder("reorder_inter", input_info("convolution"), format::bfyx, data_types::f16));
topology.add(
reshape("reshape_inter", input_info("reorder_inter"), false, {1, 3, 24, 1}, ov::PartialShape{1, 3, 24, 1}));
topology.add(permute("permute_inter", input_info("reshape_inter"), {0, 2, 1}));
topology.add(softmax("softmax", input_info("permute_inter"), 1));
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::allow_new_shape_infer(false));
config.set_property(ov::intel_gpu::optimize_data(true));
auto prog = program::build_program(engine, topology, config);

network net(prog);

net.set_input_data("input", input);
auto output = net.execute();

ExecutionConfig ref_config = get_test_default_config(engine);
ref_config.set_property(ov::intel_gpu::optimize_data(false));
cldnn::network ref_network(engine, topology, ref_config);
// select preferred formats, conv + permute

net.set_input_data("input", input);
auto output = net.execute();
auto optimzed_nodes = net.get_program()->get_optimized();
auto it =
std::find_if(std::begin(optimzed_nodes), std::end(optimzed_nodes), [&](cldnn::program::optimized_info& oi) {
return oi.first == "reorder_inter";
});
ASSERT_NE(it, optimzed_nodes.end());
auto permute_inst = net.get_primitive("permute_inter");
ASSERT_TRUE(permute_inst->can_be_optimized());
auto reshape_inst = net.get_primitive("reshape_inter");
ASSERT_TRUE(reshape_inst->can_be_optimized());

auto& processing_order = prog->get_processing_order();

auto reshape_node = std::find(processing_order.begin(), processing_order.end(), &prog->get_node("reshape_inter"));
size_t reshape_dist = std::distance(processing_order.begin(), reshape_node);

auto permute_node = std::find(processing_order.begin(), processing_order.end(), &prog->get_node("permute_inter"));
size_t permute_dist = std::distance(processing_order.begin(), permute_node);
ASSERT_TRUE(reshape_dist > permute_dist);
auto out_mem = output.at("softmax").get_memory();
mem_lock<ov::float16> lock(out_mem, get_test_stream());

Expand All @@ -163,7 +181,7 @@ TEST(merge_reorder_reshape_permute, no_reorder_no_reshape) {
}
}

TEST(merge_reorder_reshape_permute, cutomized_net_yolov6_alike) {
TEST(opt_reorder_reshape_permute, cutomized_net_yolov6_alike_4d) {
auto& engine = get_test_engine();
auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx};
auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx});
Expand All @@ -182,7 +200,7 @@ TEST(merge_reorder_reshape_permute, cutomized_net_yolov6_alike) {
topology.add(reorder("reorder_inter", input_info("convolution"), format::bfyx, data_types::f16));
topology.add(
reshape("reshape_inter", input_info("reorder_inter"), false, {1, 3, 24, 1}, ov::PartialShape{1, 3, 24, 1}));
topology.add(permute("permute_inter", input_info("reshape_inter"), {0, 2, 1}));
topology.add(permute("permute_inter", input_info("reshape_inter"), {0, 2, 1, 3}));
topology.add(softmax("softmax", input_info("permute_inter"), 1));
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(false));
Expand Down Expand Up @@ -225,12 +243,11 @@ TEST(merge_reorder_reshape_permute, cutomized_net_yolov6_alike) {
mem_lock<ov::float16> lock_ref(ref_out_mem, get_test_stream());
for (size_t i = 0; i < out_mem->count(); i++) {
float actual = lock[i];
std::cout << actual << ", " << std::endl;
ASSERT_EQ(actual, lock_ref[i]);
}
}

TEST(merge_reorder_reshape_permute, not_sinking_reshape) {
TEST(opt_reorder_reshape_permute, not_sinking_reshape) {
auto& engine = get_test_engine();
auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx};
auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx});
Expand All @@ -248,7 +265,7 @@ TEST(merge_reorder_reshape_permute, not_sinking_reshape) {
convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false));
topology.add(reorder("reorder_inter", input_info("convolution"), format::bfyx, data_types::f16));
topology.add(
reshape("reshape_inter", input_info("reorder_inter"), false, {1, 3, 2, 1}, ov::PartialShape{1, 3, 2, 1}));
reshape("reshape_inter", input_info("reorder_inter"), false, {1, 18, 4, 1}, ov::PartialShape{1, 18, 4, 1}));
topology.add(permute("permute_inter", input_info("reshape_inter"), {0, 2, 1}));
topology.add(softmax("softmax", input_info("permute_inter"), 1));
ExecutionConfig config = get_test_default_config(engine);
Expand Down

0 comments on commit 8362209

Please sign in to comment.