From 67d8cc8d31a305811c989278bc4c5a655a18b88d Mon Sep 17 00:00:00 2001 From: wilson-seok Date: Thu, 2 Jan 2025 22:32:50 +0000 Subject: [PATCH 1/2] add check of multiple axis broadcasting in is_valid_fusion() --- src/plugins/intel_gpu/src/graph/primitive_inst.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 0737362405ff9c..85277910004407 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -2658,6 +2658,19 @@ bool primitive_inst::is_valid_fusion() const { if (fd.is_type()) can_broadcast = ov::PartialShape::broadcast_merge_into(merged_shape, outer_dep_pshape, fd.typed_desc()->broadcast_spec); + // Check if broadcast happens more than single axis. + // Current FUSED_OP_LOAD macro cannot support broadcast on dynamic dimension. + if (can_broadcast == true && (merged_shape.is_static() && outer_dep_pshape.is_static()) && + outer_dep.first->_is_dynamic == true && merged_shape.rank().get_length() == outer_dep_pshape.rank().get_length()) { + uint8_t broadcast_more_than_single_axis = 0; + for (int64_t i = 0; i < merged_shape.rank().get_length(); i++) { + if (merged_shape.get_shape().at(i) != outer_dep_pshape.get_shape().at(i)) + broadcast_more_than_single_axis++; + } + if (broadcast_more_than_single_axis > 1) + can_broadcast = false; + } + #ifdef ENABLE_ONEDNN_FOR_GPU // WA for OneDNN binary add fusions: we need to broadcast batch dimension to avoid situation with // batch dimension mismatch in OneDNN tensor descriptors as follow: From bbf6c498f3351df64a27ecebeb03e279112faee7 Mon Sep 17 00:00:00 2001 From: wilson-seok Date: Fri, 3 Jan 2025 15:16:40 +0000 Subject: [PATCH 2/2] update condition and add func test --- .../intel_gpu/src/graph/primitive_inst.cpp | 5 +- .../dynamic/dynamic_unfusion.cpp | 95 +++++++++++++++++++ 2 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_unfusion.cpp diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 85277910004407..8a4d7071bf0f8d 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -2659,9 +2659,8 @@ bool primitive_inst::is_valid_fusion() const { can_broadcast = ov::PartialShape::broadcast_merge_into(merged_shape, outer_dep_pshape, fd.typed_desc()->broadcast_spec); // Check if broadcast happens more than single axis. - // Current FUSED_OP_LOAD macro cannot support broadcast on dynamic dimension. - if (can_broadcast == true && (merged_shape.is_static() && outer_dep_pshape.is_static()) && - outer_dep.first->_is_dynamic == true && merged_shape.rank().get_length() == outer_dep_pshape.rank().get_length()) { + // Current gemm_tiled_opt kernel FUSED_OP_LOAD macro cannot support broadcast on dynamic dimension. + if (_node->is_type() && can_broadcast == true && merged_shape.rank().get_length() == outer_dep_pshape.rank().get_length()) { uint8_t broadcast_more_than_single_axis = 0; for (int64_t i = 0; i < merged_shape.rank().get_length(); i++) { if (merged_shape.get_shape().at(i) != outer_dep_pshape.get_shape().at(i)) diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_unfusion.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_unfusion.cpp new file mode 100644 index 00000000000000..1cc079a10b82f6 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_unfusion.cpp @@ -0,0 +1,95 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/ov_tensor_utils.hpp" +#include "common_test_utils/file_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +#include "openvino/op/parameter.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/multiply.hpp" + +namespace { + +using ov::test::InputShape; + +using DynamicUnfusionsParams = std::tuple, // input shapes + ov::element::Type>; // input precision + +class DynamicUnfusions : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseTest { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + std::vector input_shapes; + ov::element::Type input_precision; + + std::tie(input_shapes, input_precision) = obj.param; + + std::ostringstream result; + result << "IS=("; + for (const auto& shape : input_shapes) { + result << ov::test::utils::partialShape2str({shape.first}) << "_"; + } + result << ")_TS="; + for (const auto& shape : input_shapes) { + result << "("; + if (!shape.second.empty()) { + auto itr = shape.second.begin(); + do { + result << ov::test::utils::vec2str(*itr); + } while (++itr != shape.second.end() && result << "_"); + } + result << ")_"; + } + result << "input_precision=" << input_precision; + return result.str(); + } + +protected: + std::shared_ptr init_subgraph(std::vector& input_shapes, + const ov::element::Type input_precision) { + auto input0 = std::make_shared(input_precision, input_shapes[0]); + auto input1 = std::make_shared(input_precision, input_shapes[1]); + auto input2 = std::make_shared(input_precision, input_shapes[2]); + + auto matmul = std::make_shared(input0, input1); + auto mul = std::make_shared(matmul, input2); + + matmul->set_friendly_name("MatMul"); + mul->set_friendly_name("Multiply"); + + return std::make_shared(ov::NodeVector{mul}, ov::ParameterVector{input0, input1, input2}, "DynamicUnfusions"); + } + + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_GPU; + + std::vector input_shapes; + ov::element::Type input_precision; + + std::tie(input_shapes, input_precision) = GetParam(); + + init_input_shapes(input_shapes); + + inType = outType = input_precision; + function = init_subgraph(inputDynamicShapes, input_precision); + } +}; + +TEST_P(DynamicUnfusions, Inference) { + run(); +} + +const std::vector input_precisions = {ov::element::f32}; + +const std::vector> input_shapes_dyn = { + {{{1024, -1}, {{1024, 1024}}}, {{-1, 1024}, {{1024, 1024}}}, {{1, -1}, {{1, 1}}}}, +}; + +INSTANTIATE_TEST_SUITE_P(DynamicUnfusions_basic, + DynamicUnfusions, + ::testing::Combine(::testing::ValuesIn(input_shapes_dyn), + ::testing::ValuesIn(input_precisions)), + DynamicUnfusions::getTestCaseName); +} // namespace