Skip to content

Enable MKLDNN OP fusion for convolution - rebase to latest master #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 26 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
6ce181d
add graph rewrite pass for mkldnn conv fusion
chunyuan-w Apr 28, 2022
d1e0418
add OP context for mkldnn conv fusion
chunyuan-w Apr 28, 2022
e2c61ab
add impl of mkldnn conv2d_prepack and conv2d_run
chunyuan-w Apr 28, 2022
f6cc10a
add mkldnn_prepacked::conv2d_run into NNC via external call
chunyuan-w Apr 28, 2022
8ca0527
add UT for mkldnn conv fusion
chunyuan-w Apr 28, 2022
9467d6a
fix variable_excluded_from_dispatch check
chunyuan-w Apr 28, 2022
7b591c6
fix !AT_MKLDNN_ENABLED()
chunyuan-w Apr 29, 2022
22252ee
use template for fusion rewriter pattern
chunyuan-w Apr 29, 2022
86976e9
do not support mkldnn fusion op in NNC if context is not a Constant
chunyuan-w Apr 29, 2022
a896f4f
rename conv2d to conv
chunyuan-w May 5, 2022
f46c7df
fix lint
chunyuan-w May 5, 2022
32a73e3
remove hard-coded dim=2 for conv2d
chunyuan-w May 5, 2022
e56c272
rename NNC mkldnn conv2d to conv
chunyuan-w May 5, 2022
5d71c6f
fix comment header in UT
chunyuan-w May 5, 2022
66d0047
add outputStrides into NNC compute func after merging latest master
chunyuan-w May 5, 2022
a1a96e2
use a map for PostOp fusion
chunyuan-w May 5, 2022
c388b7a
do not rewrite channels last
chunyuan-w May 7, 2022
0dab23d
add fusion for relu_
chunyuan-w May 7, 2022
0cf0473
graph rewrite: exclude cases where MKLDNN performs worse than native
chunyuan-w May 10, 2022
7778d0a
remove memcpy in nnc_mkldnn_prepacked_conv_run
chunyuan-w May 9, 2022
ddaf3a6
put FuseMkldnn inside NNC compile func
chunyuan-w May 10, 2022
b8dc3bb
fix UT
chunyuan-w May 10, 2022
7cd634f
fix registration when USE_MKLDNN=0
chunyuan-w May 10, 2022
b98f295
remove mkldnnConvFusionIsSupported
chunyuan-w May 10, 2022
cf18fd2
remove duplicated useMkldnnForConvShape
chunyuan-w May 10, 2022
0020ce0
Rename pass to FuseConvWithEltwise
chunyuan-w May 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions aten/src/ATen/native/mkldnn/Common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#pragma once

#include <ATen/ATen.h>
#include <ATen/Config.h>

#if AT_MKLDNN_ENABLED()

#include <ideep/tensor.hpp>

namespace at {
namespace native {
namespace mkldnn {

struct ContextConv final {
ideep::tensor weight_packed_;
c10::optional<at::Tensor> at_bias_;
std::vector<int64_t> padding_;
std::vector<int64_t> stride_;
std::vector<int64_t> dilation_;
int64_t groups_;
ideep::attr_t attr_;

ContextConv() = delete;

ContextConv(
ideep::tensor&& weight_packed,
c10::optional<at::Tensor> at_bias,
std::vector<int64_t> padding,
std::vector<int64_t> stride,
std::vector<int64_t> dilation,
int64_t groups,
ideep::attr_t attr)
: weight_packed_(std::move(weight_packed)),
at_bias_(std::move(at_bias)),
padding_(padding),
stride_(stride),
dilation_(dilation),
groups_(groups),
attr_(attr) {}
};

} // namespace mkldnn
} // namespace native
} // namespace at

#endif // AT_MKLDNN_ENABLED()
243 changes: 243 additions & 0 deletions aten/src/ATen/native/mkldnn/ConvPrepack.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
#include <vector>

#include <ATen/native/ConvUtils.h>
#include <ATen/native/mkldnn/Common.h>
#include <ATen/native/mkldnn/ConvPrepack.h>
#include <ATen/native/mkldnn/MKLDNNCommon.h>
#include <ATen/native/utils/Factory.h>
#include <ATen/native/utils/ParamUtils.h>
#include <torch/csrc/jit/passes/mkldnn_rewrite.h>
#include <c10/util/irange.h>

#if AT_MKLDNN_ENABLED()

namespace at {
namespace native {
namespace mkldnn {
namespace internal {
namespace convolution {

using torch::jit::mkldnn::fusion_attr_map;

c10::intrusive_ptr<mkldnn::ConvOpContext> createConvPrePackOpContext(
Tensor weight,
c10::optional<Tensor> bias,
std::vector<int64_t> stride,
std::vector<int64_t> padding,
std::vector<int64_t> dilation,
int64_t groups,
std::vector<int64_t> input_size,
std::string attr) {
auto it = fusion_attr_map.find(attr);
TORCH_CHECK(it != fusion_attr_map.end(), "Fusion behavior undefined.");
ideep::attr_t op_attr = it->second.op_attr;

return mkldnn::MkldnnConvOpContext::create_context(
std::move(weight),
std::move(bias),
std::move(padding),
std::move(stride),
std::move(dilation),
groups,
std::move(input_size),
op_attr);
}

ContextConv create(
const Tensor& weight,
const c10::optional<Tensor>& bias,
const IntArrayRef padding,
const IntArrayRef stride,
const IntArrayRef dilation,
const int64_t groups,
const IntArrayRef input_size,
const ideep::attr_t& attr) {
auto k = weight.ndimension();
int64_t dim = k - 2;
const auto padding_expanded = expand_param_if_needed(padding, "padding", dim);
const auto stride_expanded = expand_param_if_needed(stride, "stride", dim);
const auto dilation_expanded =
expand_param_if_needed(dilation, "dilation", dim);
const auto input_size_expanded =
expand_param_if_needed(input_size, "input_size", k);

c10::impl::ExcludeDispatchKeyGuard edkg(c10::autograd_dispatch_keyset);
auto w = itensor_view_from_dense(weight);
ideep::tensor::desc expected_weight_desc =
ideep::convolution_forward::expected_weights_desc(
w.get_dims(),
w.get_data_type(),
{stride_expanded.begin(), stride_expanded.end()},
{padding_expanded.begin(), padding_expanded.end()},
{padding_expanded.begin(), padding_expanded.end()},
{dilation_expanded.begin(), dilation_expanded.end()},
groups,
ideep::algorithm::convolution_direct,
ideep::prop_kind::forward,
/*x_dtype*/ w.get_data_type(),
{input_size_expanded.begin(), input_size_expanded.end()});

ideep::tensor packed_weight;
packed_weight.init(expected_weight_desc);
packed_weight.feed_from(w);

return ContextConv{
std::move(packed_weight),
bias.has_value() ? c10::make_optional(*bias) : c10::nullopt,
{padding_expanded.begin(), padding_expanded.end()},
{stride_expanded.begin(), stride_expanded.end()},
{dilation_expanded.begin(), dilation_expanded.end()},
groups,
std::move(attr)};
}

void _mkldnn_convolution_out(
const ideep::tensor& x,
ideep::tensor& y,
const ideep::tensor& w,
const c10::optional<ideep::tensor>& b,
IntArrayRef padding,
IntArrayRef stride,
IntArrayRef dilation,
IntArrayRef output_sizes,
int64_t groups,
const ideep::attr_t& attr = ideep::attr_t()) {
if (b.has_value()) {
ideep::convolution_forward::compute<true>(
x,
w,
b.value(),
{output_sizes.cbegin(), output_sizes.cend()},
y,
{stride.begin(), stride.end()},
{dilation.begin(), dilation.end()},
{padding.begin(), padding.end()},
{padding.begin(), padding.end()},
groups,
ideep::scale_t(),
ideep::scale_t(),
ideep::scale_t(),
attr);
} else {
ideep::convolution_forward::compute<true>(
x,
w,
{output_sizes.cbegin(), output_sizes.cend()},
y,
{stride.begin(), stride.end()},
{dilation.begin(), dilation.end()},
{padding.begin(), padding.end()},
{padding.begin(), padding.end()},
groups,
ideep::scale_t(),
ideep::scale_t(),
ideep::scale_t(),
attr);
}
}

void mkldnn_convolution_out(
const Tensor& input,
ideep::tensor& mkldnn_output,
const ideep::tensor& mkldnn_weight,
const c10::optional<Tensor>& bias_opt,
IntArrayRef padding,
IntArrayRef stride,
IntArrayRef dilation,
IntArrayRef output_sizes,
int64_t groups,
const ideep::attr_t& attr = ideep::attr_t()) {
c10::MaybeOwned<Tensor> bias_maybe_owned =
at::borrow_from_optional_tensor(bias_opt);
const Tensor& bias = *bias_maybe_owned;

c10::impl::ExcludeDispatchKeyGuard edkg(c10::autograd_dispatch_keyset);
const ideep::tensor mkldnn_input = itensor_from_tensor(input);
c10::optional<ideep::tensor> mkldnn_bias{c10::nullopt};
if (bias.defined()) {
mkldnn_bias = itensor_from_tensor(bias);
}

_mkldnn_convolution_out(
mkldnn_input,
mkldnn_output,
mkldnn_weight,
mkldnn_bias,
padding,
stride,
dilation,
output_sizes,
groups,
attr);
}

std::vector<int64_t> get_output_sizes(
ContextConv& context,
const Tensor& input) {
const ideep::tensor& mkldnn_weight = context.weight_packed_;
IntArrayRef padding = context.padding_;
IntArrayRef stride = context.stride_;
IntArrayRef dilation = context.dilation_;

auto kernel_size = mkldnn_weight.get_dims();

std::vector<int64_t> input_size = input.sizes().vec();
return conv_output_size(input_size, kernel_size, padding, stride, dilation);
}

Tensor run(ContextConv& context, const Tensor& input) {
std::vector<int64_t> output_sizes = get_output_sizes(context, input);

c10::impl::ExcludeDispatchKeyGuard edkg(c10::autograd_dispatch_keyset);
auto output = at::empty(
output_sizes,
input.options().memory_format(input.suggest_memory_format()));
ideep::tensor mkldnn_output = itensor_view_from_dense(output);

mkldnn_convolution_out(
input,
mkldnn_output,
context.weight_packed_,
context.at_bias_,
context.padding_,
context.stride_,
context.dilation_,
output_sizes,
context.groups_,
context.attr_);
return output;
}

void run(ContextConv& context, const Tensor& input, void* output) {
std::vector<int64_t> output_sizes = get_output_sizes(context, input);

ideep::tensor::desc o_desc = {
output_sizes, get_mkldnn_dtype(input.scalar_type())};
ideep::tensor mkldnn_output = {o_desc, output};

mkldnn_convolution_out(
input,
mkldnn_output,
context.weight_packed_,
context.at_bias_,
context.padding_,
context.stride_,
context.dilation_,
output_sizes,
context.groups_,
context.attr_);
}

Tensor conv_run(
const Tensor& input,
const c10::intrusive_ptr<mkldnn::ConvOpContext>& op_context) {
return op_context->run(input);
}

} // namespace convolution
} // namespace internal
} // namespace mkldnn
} // namespace native
} // namespace at

#endif // AT_MKLDNN_ENABLED()
49 changes: 49 additions & 0 deletions aten/src/ATen/native/mkldnn/ConvPrepack.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#pragma once

#include <ATen/Tensor.h>
#include <ATen/native/mkldnn/Common.h>
#include <ATen/native/mkldnn/OpContext.h>

#if AT_MKLDNN_ENABLED()

namespace at {
namespace native {
namespace mkldnn {
namespace internal {
namespace convolution {

c10::intrusive_ptr<mkldnn::ConvOpContext> createConvPrePackOpContext(
Tensor weight,
c10::optional<Tensor> bias,
std::vector<int64_t> stride,
std::vector<int64_t> padding,
std::vector<int64_t> dilation,
int64_t groups,
std::vector<int64_t> input_size,
std::string attr);

Tensor conv_run(
const Tensor& input,
const c10::intrusive_ptr<mkldnn::ConvOpContext>& op_context);

ContextConv create(
const Tensor& weight,
const c10::optional<Tensor>& bias,
const IntArrayRef padding,
const IntArrayRef stride,
const IntArrayRef dilation,
const int64_t groups,
const IntArrayRef input_size,
const ideep::attr_t& attr);

Tensor run(ContextConv& context, const Tensor& input);

void run(ContextConv& context, const Tensor& input, void* output);

} // namespace convolution
} // namespace internal
} // namespace mkldnn
} // namespace native
} // namespace at

#endif // AT_MKLDNN_ENABLED()
47 changes: 47 additions & 0 deletions aten/src/ATen/native/mkldnn/OpContext.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#include <ATen/native/mkldnn/ConvPrepack.h>
#include <ATen/native/mkldnn/OpContext.h>

#if AT_MKLDNN_ENABLED()

namespace at {
namespace native {
namespace mkldnn {

c10::intrusive_ptr<ConvOpContext> MkldnnConvOpContext::create_context(
at::Tensor&& weight,
c10::optional<at::Tensor>&& bias,
std::vector<int64_t>&& padding,
std::vector<int64_t>&& stride,
std::vector<int64_t>&& dilation,
int64_t groups,
std::vector<int64_t>&& input_size,
const ideep::attr_t& attr) {
auto op_context = mkldnn::internal::convolution::create(
weight, bias, padding, stride, dilation, groups, input_size, attr);

auto conv_op_context = c10::make_intrusive<MkldnnConvOpContext>(
std::move(weight),
std::move(bias),
std::move(padding),
std::move(stride),
std::move(dilation),
groups,
std::move(input_size),
std::move(op_context));

return conv_op_context;
}

Tensor MkldnnConvOpContext::run(const Tensor& input) {
return mkldnn::internal::convolution::run(op_context_, input);
}

void MkldnnConvOpContext::run(const Tensor& input, void* output) {
return mkldnn::internal::convolution::run(op_context_, input, output);
}

} // namespace mkldnn
} // namespace native
} // namespace at

#endif // AT_MKLDNN_ENABLED()
Loading