Skip to content

Commit

Permalink
review fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
dmitry-gorokhov committed Dec 18, 2024
1 parent 2476be5 commit 51b67ca
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 95 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ static DnnlPrimitiveAttrs createPrimitiveAttrs(const MatMulAttrs& attrs,

const auto maxRank =
std::max({srcDesc->getShape().getRank(), weiDesc->getShape().getRank(), dstDesc->getShape().getRank()});
auto normWeiDims = normalizeToRank(weiDesc->getShape().getStaticDims(), maxRank);
const auto normWeiDims = normalizeToRank(weiDesc->getShape().getStaticDims(), maxRank);
if (memory.count(ARG_WEI | ARG_ATTR_SCALES)) {
auto dstPrc = ov::element::f32;
dnnlpoc.appendDecompressionScales(memory.at(ARG_WEI | ARG_ATTR_SCALES),
Expand Down
36 changes: 36 additions & 0 deletions src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,42 @@ namespace ov {
namespace intel_cpu {
namespace node {

ov::element::TypeVector FullyConnected::getSupportedCompressedWeightsTypes() {
using ov::element::Type_t;

bool useMatmulPrim = false;
CPU_DEBUG_CAP_ENABLE(useMatmulPrim = getEnvBool("OV_CPU_ENABLE_DNNL_MAMTUL_FOR_FC");)

if (useMatmulPrim) {
return {Type_t::u8, Type_t::i8};
} else {
#if defined(OPENVINO_ARCH_X86_64)
return {Type_t::u8, Type_t::i8, Type_t::u4, Type_t::i4, Type_t::nf4, Type_t::f4e2m1};
#else
return {};
#endif
}
}

ov::element::TypeVector FullyConnected::getSupportedCompressedActivationsTypes() {
using ov::element::Type_t;

bool useMatmulPrim = false;
CPU_DEBUG_CAP_ENABLE(useMatmulPrim = getEnvBool("OV_CPU_ENABLE_DNNL_MAMTUL_FOR_FC");)

if (useMatmulPrim) {
return {Type_t::f32, Type_t::f16};
} else {
#if defined(OPENVINO_ARCH_X86_64)
// @todo enable for bf16 as well
// after EnforceInferencePrecision is replaced with ConvertPrecision
return {Type_t::f32};
#else
return {};
#endif
}
}

bool FullyConnected::isSupportedOperation(const std::shared_ptr<const ov::Node>& op,
std::string& errorMessage) noexcept {
try {
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_cpu/src/nodes/fullyconnected.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ class FullyConnected : public Node {
size_t OC,
size_t G,
ov::element::Type inferencePrecision) noexcept;
static ov::element::TypeVector getSupportedCompressedWeightsTypes();
static ov::element::TypeVector getSupportedCompressedActivationsTypes();

bool isExecutable() const override {
return !isInputTensorAtPortEmpty(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ namespace ov {
namespace intel_cpu {

inline void ConvertToCPUSpecificOpset(std::shared_ptr<ov::Model>& model, const Config& config) {
using ov::element::Type_t;

RUN_ON_FUNCTION_SCOPE(ConvertToCPUSpecificOpset);

ov::pass::Manager manager("CPU:ConvertToCPUSpecificOpset");
Expand All @@ -38,28 +36,11 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr<ov::Model>& model, const C
CPU_REGISTER_PASS_COMMON(manager, ConvertMatMulToFC);
CPU_REGISTER_PASS_COMMON(manager, FullyConnectedBiasFusion);

std::vector<ov::element::Type> supported_activation_types;
std::vector<ov::element::Type> supported_compressed_weights_types;

bool useMatmulPrim = false;
CPU_DEBUG_CAP_ENABLE(useMatmulPrim = getEnvBool("OV_CPU_ENABLE_DNNL_MAMTUL_FOR_FC");)

if (useMatmulPrim) {
supported_activation_types = {Type_t::f32, Type_t::f16};
supported_compressed_weights_types = {Type_t::u8, Type_t::i8};
} else {
// @todo enable for bf16 as well
// after EnforceInferencePrecision is replaced with ConvertPrecision
supported_activation_types = {Type_t::f32};
supported_compressed_weights_types =
{Type_t::u8, Type_t::i8, Type_t::u4, Type_t::i4, Type_t::nf4, Type_t::f4e2m1};
}

CPU_REGISTER_PASS_COMMON(
manager,
pass::ConvertFullyConnectedToFullyConnectedCompressed,
supported_activation_types,
supported_compressed_weights_types,
ov::intel_cpu::node::FullyConnected::getSupportedCompressedActivationsTypes(),
ov::intel_cpu::node::FullyConnected::getSupportedCompressedWeightsTypes(),
[&config](const std::shared_ptr<ov::op::internal::FullyConnected>& fc, size_t IC, size_t OC, size_t G) {
return ov::intel_cpu::node::FullyConnected::isSupportedCompressedOperation(fc,
IC,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -338,23 +338,12 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
// We need to fuse Transpose to MatMul to have a simpler callback for the next transformation
CPU_REGISTER_PASS_X64(decompression_handling_manager, ov::pass::TransposeMatMul);
CPU_REGISTER_PASS_ARM(decompression_handling_manager, ov::pass::TransposeMatMul);
ov::element::TypeVector decompression_precisions{ov::element::u8,
ov::element::i8,
ov::element::u4,
ov::element::i4,
ov::element::nf4,
ov::element::f4e2m1};

CPU_REGISTER_PASS_X64(decompression_handling_manager,
ov::pass::MarkDequantization,
decompression_precisions,
false,
true);
CPU_REGISTER_PASS_ARM(decompression_handling_manager,
ov::pass::MarkDequantizationSubgraph,
decompression_precisions,
false,
true);
const auto& decompression_precisions = ov::intel_cpu::node::FullyConnected::getSupportedCompressedWeightsTypes();
CPU_REGISTER_PASS_COMMON(decompression_handling_manager,
ov::pass::MarkDequantization,
decompression_precisions,
false,
true);
CPU_SET_CALLBACK_COMMON(
decompression_handling_manager,
[&](const_node_ptr& node) -> bool {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
// SPDX-License-Identifier: Apache-2.0
//

#ifdef CPU_DEBUG_CAPS

#include "custom/subgraph_tests/src/classes/matmul_weights_decompression.hpp"

#include "openvino/util/env_util.hpp"

using namespace CPUTestUtils;

namespace ov {
Expand All @@ -20,14 +20,22 @@ std::vector<ov::AnyMap> filter_additional_config_basic() {
const std::vector<ov::test::ElementType> decompression_precisions = {ov::element::f32};
const std::vector<ov::test::ElementType> weights_precisions = {ov::element::u8, ov::element::i8};

bool should_use_decompression_impl() {
#ifdef CPU_DEBUG_CAPS
return ov::util::getenv_bool("OV_CPU_ENABLE_DNNL_MAMTUL_FOR_FC");
#else
return false;
#endif
}

const std::vector<MatMulDecompressionShapeParams> input_shapes = {
{{{-1, -1, -1}, {{1, 4, 16}, {10, 16, 16}}}, {16, 32}},
{{{}, {{1, 8, 16}}}, {16, 32}, 4ul},
{{{}, {{1, 4, 16}}}, {1, 16, 32}},
{{{}, {{5, 40, 496}}}, {1, 496, 240}},
{{{}, {{5, 40, 96}}}, {1, 96, 240}},
{{{}, {{1, 4, 48}}}, {48, 256}},
{{{}, {{1, 11, 154}}}, {154, 77}, 154ul},
{{{-1, -1, -1}, {{10, 40, 480}, {11, 40, 480}}}, {1, 480, 256}},
{{{}, {{1, 11, 104}}}, {104, 77}, 104ul},
{{{-1, -1, -1}, {{10, 40, 110}, {11, 40, 110}}}, {1, 110, 256}},
};
const std::vector<fusingSpecificParams> fusing_params{emptyFusingSpec, fusingBias};

Expand All @@ -42,10 +50,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights,
::testing::Values(false),
::testing::ValuesIn(filter_additional_config_basic()),
::testing::ValuesIn(fusing_params),
::testing::Values(true)),
::testing::Values(should_use_decompression_impl())),
MatmulWeightsDecompression::getTestCaseName);


const std::vector<MatMulDecompressionShapeParams> input_shapes_corner_cases = {
{{{-1, -1, -1}, {{1, 4, 16}}}, {1, 16, 32}},
{{{-1, -1, -1}, {{1, 4, 16}}}, {16, 32}},
Expand All @@ -54,8 +61,9 @@ const std::vector<MatMulDecompressionShapeParams> input_shapes_corner_cases = {
};

const std::vector<bool> transpose_weights = {true, false};
const std::vector<DecompressionSubtractType> decompression_subtract_type = {
DecompressionSubtractType::full, DecompressionSubtractType::scalar, DecompressionSubtractType::empty};
const std::vector<DecompressionSubtractType> decompression_subtract_type = {DecompressionSubtractType::full,
DecompressionSubtractType::scalar,
DecompressionSubtractType::empty};
const std::vector<bool> reshape_on_decompression = {true, false};
const std::vector<ov::test::ElementType> decompression_precisions_corner_cases = {ov::element::f16, ov::element::f32};

Expand All @@ -70,12 +78,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_corner_cases,
::testing::ValuesIn(reshape_on_decompression),
::testing::ValuesIn(filter_additional_config_basic()),
::testing::Values(emptyFusingSpec),
::testing::Values(true)),
::testing::Values(should_use_decompression_impl())),
MatmulWeightsDecompression::getTestCaseName);

} // namespace

} // namespace test
} // namespace ov

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,6 @@ using namespace CPUTestUtils;

namespace ov {
namespace test {
/*
* WP - weights precision
* DP - decompression precision
* IP - input precision
* SP - scale precision
* Opt - optional
* Subtract_const(WP)
* /
* Weights(WP) Convert(DP)
* | / Multiply_const(SP)
* Convert(DP) Reshape (Opt) /
* \ / Convert(if SP != DP)
* Subtract(Opt) /
* \ Reshape (Opt)
* \ /
* Multiply
* |
* Reshape (in case of group decompression)
* |
* Convert (if IP != DP)
* |
* Data(IP) Transpose(Opt)
* \ /
* Matmul
* |
* Bias
*/

std::string MatmulWeightsDecompression::getTestCaseName(testing::TestParamInfo<MatmulWeightsDecompressionParams> obj) {
MatMulDecompressionShapeParams shape_params;
Expand Down Expand Up @@ -144,7 +117,7 @@ void MatmulWeightsDecompression::SetUp() {

if (configuration.count(ov::hint::inference_precision.name()) &&
configuration.at(ov::hint::inference_precision.name()) == ov::element::f16) {
abs_threshold = 0.1;
abs_threshold = 0.2;
}

ElementType netType = ov::element::f32;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,32 +1,56 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
// #include "openvino/opsets/opset13.hpp"
// #include "openvino/pass/manager.hpp"
// #include "transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp"

#include "common_test_utils/ov_tensor_utils.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "shared_test_classes/subgraph/weights_decompression_builders.hpp"
#include "utils/cpu_test_utils.hpp"
#include "utils/fusing_test_utils.hpp"
#include "common_test_utils/ov_tensor_utils.hpp"
#include "shared_test_classes/subgraph/weights_decompression_builders.hpp"

using namespace CPUTestUtils;

namespace ov {
namespace test {

/*
* WP - weights precision
* DP - decompression precision
* IP - input precision
* SP - scale precision
* Opt - optional
* Subtract_const(WP)
* /
* Weights(WP) Convert(DP)
* | / Multiply_const(SP)
* Convert(DP) Reshape (Opt) /
* \ / Convert(if SP != DP)
* Subtract(Opt) /
* \ Reshape (Opt)
* \ /
* Multiply
* |
* Reshape (in case of group decompression)
* |
* Convert (if IP != DP)
* |
* Data(IP) Transpose(Opt)
* \ /
* Matmul
* |
* Bias
*/
typedef std::tuple<MatMulDecompressionShapeParams,
ov::test::ElementType, // weights precision
ov::test::ElementType, // decompression precision
ov::test::ElementType, // scale precision
bool, // transpose on weights
DecompressionSubtractType, // decompression subtract type
bool, // reshape on decompression constants
ov::AnyMap, // additional config
fusingSpecificParams,
bool> // should use decompression implementation
MatmulWeightsDecompressionParams;
ov::test::ElementType, // weights precision
ov::test::ElementType, // decompression precision
ov::test::ElementType, // scale precision
bool, // transpose on weights
DecompressionSubtractType, // decompression subtract type
bool, // reshape on decompression constants
ov::AnyMap, // additional config
fusingSpecificParams,
bool> // should use decompression implementation
MatmulWeightsDecompressionParams;

class MatmulWeightsDecompression : public testing::WithParamInterface<MatmulWeightsDecompressionParams>,
virtual public SubgraphBaseTest,
Expand Down

0 comments on commit 51b67ca

Please sign in to comment.