Skip to content

Commit

Permalink
Merge branch 'develop' into export_symbols
Browse files Browse the repository at this point in the history
  • Loading branch information
apwojcik authored Jun 3, 2024
2 parents 0bce4cf + 5822497 commit 380d8da
Show file tree
Hide file tree
Showing 14 changed files with 670 additions and 88 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/rocm-image-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ on:
description: Docker image name for rocm Docker build
required: true
default: "rocm-migraphx"
branch_name:
description: branch to use for building base ROCm image
required: true
default: "develop"
build_navi:
description: Build navi number
required: true
Expand All @@ -35,6 +39,7 @@ jobs:
benchmark-utils_repo: ${{ github.event.inputs.benchmark-utils_repo || 'ROCm/migraphx-benchmark-utils' }}
base_image: ${{ github.event.inputs.base_image || 'rocm/dev-ubuntu-20.04' }}
docker_image: ${{ github.event.inputs.docker_image || 'rocm-migraphx' }}
branch_name: ${{ github.event.inputs.branch_name || 'develop' }}
build_navi: ${{ github.event.inputs.build_navi || '0' }}
overwrite: ${{ github.event.inputs.overwrite == 'true' }}
secrets:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/sync_rocMLIR.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
schedule:
- cron: '0 7 * * sun'
pull_request:
branches: [develop]
branches: [rocMLIR-sync-*]
types: [synchronize, closed]
workflow_dispatch:
inputs:
Expand Down
198 changes: 135 additions & 63 deletions src/onnx/parse_split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,131 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace onnx {

auto parse_dyn_split(const onnx_parser::node_info& info,
const std::vector<instruction_ref>& args,
int64_t tuned_axis)
{
if(contains(info.attributes, "split"))
{
MIGRAPHX_THROW("PARSE_SPLIT: dynamic input and non-fixed split axis and `split` "
"attribute not supported");
}
if(args.size() == 2)
{
MIGRAPHX_THROW("PARSE_SPLIT: dynamic input and non-fixed split axis and `split` "
"input not supported");
}

std::size_t num_outputs = info.num_outputs;
std::vector<instruction_ref> ret_ins(num_outputs);

// Doing shape calculations for the splits in the graph
auto split_dim = info.add_instruction(
make_op("dimensions_of", {{"start", tuned_axis}, {"end", tuned_axis + 1}}), args[0]);
shape int64_scalar_shape{shape::int64_type, {1}, {0}};
auto num_outputs_lit = info.add_literal(literal{int64_scalar_shape, {num_outputs}});
auto num_outputs_minus_1_lit = info.add_literal(literal{int64_scalar_shape, {num_outputs - 1}});
// (A + (B - 1)) / B == ceil(A / B)
auto chunk_size = info.add_instruction(
make_op("div"),
info.add_instruction(make_op("add"), split_dim, num_outputs_minus_1_lit),
num_outputs_lit);
for(int n = 0; n < num_outputs - 1; ++n)
{
// slice(input, starts = {n * chunk_size}, ends = {(n+1) * chunk_size}); axes =
// {tuned_axis}
ret_ins.at(n) = info.add_instruction(
make_op("slice", {{"axes", {tuned_axis}}}),
args[0],
info.add_instruction(
make_op("mul"), chunk_size, info.add_literal(literal{int64_scalar_shape, {n}})),
info.add_instruction(make_op("mul"),
chunk_size,
info.add_literal(literal{int64_scalar_shape, {n + 1}})));
}
// last slice: slice(input, starts = {n * chunk_size}); ends = max_int, axes =
// {tuned_axis}
ret_ins.at(num_outputs - 1) = info.add_instruction(
make_op("slice", {{"axes", {tuned_axis}}, {"ends", {std::numeric_limits<int64_t>::max()}}}),
args[0],
info.add_instruction(make_op("mul"),
chunk_size,
info.add_literal(literal{int64_scalar_shape, {num_outputs - 1}})));
return ret_ins;
}

auto parse_static_split(const onnx_parser::node_info& info,
const onnx_parser& parser,
const std::vector<instruction_ref>& args,
int64_t tuned_axis)
{
const auto& input_shape = args[0]->get_shape();
// either static shape or fixed dynamic_dimension for split axis
auto tuned_axis_len = input_shape.to_static(0).lens().at(tuned_axis);
std::vector<int64_t> vec_splits;
if(contains(info.attributes, "split"))
{
literal s = parser.parse_value(info.attributes.at("split"));
s.visit([&](auto v) { vec_splits.assign(v.begin(), v.end()); });
}
else if(args.size() == 2)
{
auto s = args[1]->eval();
check_arg_empty(s, "PARSE_SPLIT: non-constant `split` input is not supported");
s.visit([&](auto v) { vec_splits.assign(v.begin(), v.end()); });
}
// no split attribute, input is equally divided
else
{
std::size_t num_outputs = info.num_outputs;
// the num_outputs attribute seems to be redundant since we already have
// node_info::num_outputs, but we can still perform an error check
if(contains(info.attributes, "num_outputs"))
{
num_outputs = parser.parse_value(info.attributes.at("num_outputs")).at<std::size_t>();
if(num_outputs != info.num_outputs)
{
MIGRAPHX_THROW("PARSE_SPLIT: num_outputs attribute " + std::to_string(num_outputs) +
" doesn't match actual number of outputs " +
std::to_string(info.num_outputs) + "!");
}
}
if(tuned_axis_len % num_outputs == 0)
{
std::size_t chunk_size = tuned_axis_len / num_outputs;
vec_splits.resize(num_outputs, chunk_size);
}
else
{
std::size_t chunk_size = tuned_axis_len / num_outputs + 1;
std::size_t last_chunk_size = tuned_axis_len - chunk_size * (num_outputs - 1);
vec_splits.resize(num_outputs - 1, chunk_size);
vec_splits.push_back(last_chunk_size);
}
}

if(std::accumulate(vec_splits.begin(), vec_splits.end(), int64_t(0)) !=
static_cast<int64_t>(tuned_axis_len))
{
MIGRAPHX_THROW(
"PARSE_SPLIT: sum of split attribute unequal to dim size of axis! tuned axis:" +
std::to_string(tuned_axis_len) + " Output " + to_string_range(vec_splits) + " Rank " +
std::to_string(input_shape.ndim()));
}

std::vector<instruction_ref> ret_ins;
int64_t start = 0;
for(auto sl : vec_splits)
{
ret_ins.push_back(info.add_instruction(
make_op("slice", {{"axes", {tuned_axis}}, {"starts", {start}}, {"ends", {start + sl}}}),
args[0]));
start += sl;
}

return ret_ins;
}

struct parse_split : op_parser<parse_split>
{
std::vector<op_desc> operators() const { return {{"Split"}}; }
Expand All @@ -49,75 +174,22 @@ struct parse_split : op_parser<parse_split>
axis = parser.parse_value(info.attributes.at("axis")).at<int>();
}

auto lens = args[0]->get_shape().lens();
int64_t n_rank = lens.size();
int64_t tuned_axis = tune_axis(n_rank, axis, opd.op_name);
const auto& input_shape = args[0]->get_shape();
// axis over which the split occurs (split_axis)
int64_t tuned_axis = tune_axis(input_shape.ndim(), axis, opd.op_name);

std::vector<int64_t> vec_splits;
if(contains(info.attributes, "split"))
{
literal s = parser.parse_value(info.attributes.at("split"));
s.visit([&](auto v) { vec_splits.assign(v.begin(), v.end()); });
}
else if(args.size() == 2)
{
auto s = args[1]->eval();
check_arg_empty(s, "Split: dynamic shape is not supported");
s.visit([&](auto v) { vec_splits.assign(v.begin(), v.end()); });
}
// no split attribute, input is equally divided
else
{
std::size_t num_outputs = info.num_outputs;
// the num_outputs attribute seems to be redundant since we already have
// node_info::num_outputs, but we can still perform an error check
if(contains(info.attributes, "num_outputs"))
{
num_outputs =
parser.parse_value(info.attributes.at("num_outputs")).at<std::size_t>();
if(num_outputs != info.num_outputs)
{
MIGRAPHX_THROW("PARSE_SPLIT: num_outputs attribute " +
std::to_string(num_outputs) +
" doesn't match actual number of outputs " +
std::to_string(info.num_outputs) + "!");
}
}

if(lens[tuned_axis] % num_outputs == 0)
{
std::size_t chunk_size = lens[tuned_axis] / num_outputs;
vec_splits.resize(num_outputs, chunk_size);
}
else
{
std::size_t chunk_size = lens[tuned_axis] / num_outputs + 1;
std::size_t last_chunk_size = lens[tuned_axis] - chunk_size * (num_outputs - 1);
vec_splits.resize(num_outputs - 1, chunk_size);
vec_splits.push_back(last_chunk_size);
}
}
auto split_axis_is_fixed = [&]() {
return input_shape.dyn_dims().at(tuned_axis).is_fixed();
};

if(std::accumulate(vec_splits.begin(), vec_splits.end(), int64_t(0)) !=
static_cast<int64_t>(lens[tuned_axis]))
if(input_shape.dynamic() and not split_axis_is_fixed())
{
MIGRAPHX_THROW(
"PARSE_SPLIT: sum of split attribute unequal to dim size of axis! tuned axis:" +
std::to_string(lens[tuned_axis]) + " Output " + to_string_range(vec_splits) +
" Rank " + std::to_string(n_rank) + " Len outs " + to_string_range(lens));
return parse_dyn_split(info, args, tuned_axis);
}

std::vector<instruction_ref> ret_ins;
int64_t start = 0;
for(auto sl : vec_splits)
else
{
ret_ins.push_back(info.add_instruction(
make_op("slice", {{"axes", {axis}}, {"starts", {start}}, {"ends", {start + sl}}}),
args[0]));
start += sl;
return parse_static_split(info, parser, args, tuned_axis);
}

return ret_ins;
}
};

Expand Down
100 changes: 80 additions & 20 deletions src/simplify_reshapes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1012,26 +1012,28 @@ struct find_scalar_multibroadcast_reshape_or_transpose
}
};

struct find_reshape_reshape_dot
struct find_reshape_dot
{
auto matcher() const
{
return match::name("dot")(match::used_once(),
match::args(match::name("reshape").bind("inp_rsp1"),
match::name("reshape").bind("inp_rsp2")));
return match::name("dot")(
match::used_once(),
match::either_arg(0, 1)(match::name("reshape").bind("rsp"),
match::skip_broadcasts(match::any().bind("other"))));
}

// Gemm axis should not be altered by the reshape
auto is_valid_reshape(instruction_ref in, instruction_ref rsp) const
auto is_valid_reshape(instruction_ref inp, instruction_ref rsp, size_t dot_axis) const
{
auto in_lens = in->get_shape().lens();
auto inp_lens = inp->get_shape().lens();
auto rsp_lens = rsp->get_shape().lens();

return std::equal(rsp_lens.end() - 2, rsp_lens.end(), in_lens.end() - 2, in_lens.end());
return (inp_lens.size() >= dot_axis and
rsp_lens[rsp_lens.size() - dot_axis] == inp_lens[inp_lens.size() - dot_axis]);
}

// Batch dims should match for both inputs
auto is_valid_inputs(instruction_ref in1, instruction_ref in2) const
// Same batch dims
auto has_same_batch_dims(instruction_ref in1, instruction_ref in2) const
{
auto in1_lens = in1->get_shape().lens();
auto in2_lens = in2->get_shape().lens();
Expand All @@ -1043,21 +1045,79 @@ struct find_reshape_reshape_dot

void apply(module& m, const match::matcher_result& r) const
{
auto dot = r.result;
auto inp_rsp1 = r.instructions["inp_rsp1"];
auto inp_rsp2 = r.instructions["inp_rsp2"];
auto dot = r.result;
auto rsp = r.instructions["rsp"];
auto other = r.instructions["other"];

auto dot_lens = dot->get_shape().lens();
auto rsp_lens = rsp->get_shape().lens();
auto inp = rsp->inputs().front();
auto inp_lens = inp->get_shape().lens();

auto inp1 = inp_rsp1->inputs().front();
auto inp2 = inp_rsp2->inputs().front();
// Gemm axis should not be altered by the reshape
bool flipped = rsp == dot->inputs().back();
size_t dot_axis = (flipped) ? 2 : 1;

if(not(is_valid_reshape(inp1, inp_rsp1) and is_valid_reshape(inp2, inp_rsp2) and
is_valid_inputs(inp1, inp2)))
if(not is_valid_reshape(inp, rsp, dot_axis))
return;

auto new_dot = m.insert_instruction(dot, dot->get_operator(), inp1, inp2);
m.replace_instruction(dot, make_op("reshape", {{"dims", dot_lens}}), new_dot);
instruction_ref new_other;
if(other->get_operator().name() == "reshape")
{
auto other_inp = other->inputs().front();
size_t other_dot_axis = (flipped) ? 1 : 2;
if(not is_valid_reshape(other_inp, other, other_dot_axis) or
not has_same_batch_dims(inp, other_inp))
return;

new_other = other_inp;
}
else
{
auto other_lens = other->get_shape().lens();
if(other_lens.size() > 2)
return;

std::vector<size_t> new_other_lens{inp_lens.begin(), inp_lens.end() - 2};
operation new_bc_op;

auto bc_other = (flipped) ? dot->inputs().front() : dot->inputs().back();
auto bc_other_lens = bc_other->get_shape().lens();
new_other_lens.insert(
new_other_lens.end(), bc_other_lens.end() - 2, bc_other_lens.end());

// if the original weight is one dimensional, look at the original broadcast
// to determine the correct broadcast axis
if(other_lens.size() == 1)
{
auto bc_other_strides = bc_other->get_shape().strides();
auto it = std::find_if(bc_other_strides.begin(),
bc_other_strides.end(),
[&](auto i) { return i != 0; });
auto orig_bc_axis = std::distance(bc_other_strides.begin(), it);

auto new_bc_axis = new_other_lens.size() - (bc_other_lens.size() - orig_bc_axis);
new_bc_op =
make_op("broadcast", {{"axis", new_bc_axis}, {"out_lens", new_other_lens}});
}
else
{
new_bc_op = make_op("multibroadcast", {{"out_lens", new_other_lens}});
}

new_other = m.insert_instruction(dot, new_bc_op, other);
}

instruction_ref new_dot;
if(flipped)
{
new_dot = m.insert_instruction(dot, make_op("dot"), new_other, inp);
}
else
{
new_dot = m.insert_instruction(dot, make_op("dot"), inp, new_other);
}
m.replace_instruction(
dot, make_op("reshape", {{"dims", dot->get_shape().lens()}}), new_dot);
}
};

Expand All @@ -1081,7 +1141,7 @@ void simplify_reshapes::apply(module& m) const
find_broadcast_transpose{},
find_slice_transpose{},
find_unary_shape_transforms{},
find_reshape_reshape_dot{},
find_reshape_dot{},
find_scalar_multibroadcast_reshape_or_transpose{});
dead_code_elimination{}.apply(m);
}
Expand Down
2 changes: 1 addition & 1 deletion test/onnx/.onnxrt-commit
Original file line number Diff line number Diff line change
@@ -1 +1 @@
33a68d221f28bd8d412f2e9188e50bac8a255b71
35697d242111b20ac2160197ff9fe90ee0ca63bc
Loading

0 comments on commit 380d8da

Please sign in to comment.