Skip to content

Commit

Permalink
support async collective op execution
Browse files Browse the repository at this point in the history
  • Loading branch information
eedalong committed Mar 14, 2024
1 parent f160eb2 commit 91ed00f
Showing 1 changed file with 51 additions and 9 deletions.
60 changes: 51 additions & 9 deletions tao_compiler/mlir/disc/transforms/mhlo_decomp_rewriters.cc
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,23 @@ LogicalResult SliceOpConvert::matchAndRewrite(mhlo::SliceOp op,
}
} // namespace
namespace {

bool IsAsyncCollective(mhlo::mhloOp op) {
if(llvm::isa<mhlo::AllReduceOp>(op)) {
if (const char* env_p = std::getenv("ENABLE_ASYNC_ALL_REDUCE")) {
return std::strcmp(env_p, "true") == 0 || std::strcmp(env_p, "True") == 0;
}
} else if(llvm::isa<mhlo::AllGatherOp>(op)) {
if (const char* env_p = std::getenv("ENABLE_ASYNC_ALL_GATHER")) {
return std::strcmp(env_p, "true") == 0 || std::strcmp(env_p, "True") == 0;
}
}
return false
}




enum ReductionKind {
ALL_REDUCE_SUM,
ALL_REDUCE_PRODUCT,
Expand Down Expand Up @@ -192,6 +209,9 @@ struct CollectiveOpConverter : public OpRewritePattern<mhlo::AllReduceOp> {
if (!reductionKind) {
return failure();
}

bool is_async = IsAsyncCollective(op);

for (int i = 0; i < op->getOperands().size(); ++i) {
// no need call all_reduce op if no consumer
if (op->getResult(i).getUsers().empty()) {
Expand All @@ -206,19 +226,41 @@ struct CollectiveOpConverter : public OpRewritePattern<mhlo::AllReduceOp> {
op->setAttr("output_layouts", rewriter.getStringAttr("*"));
op->setAttr("expected_input_layouts", rewriter.getStringAttr("*"));
op->setAttr("expected_output_layouts", rewriter.getStringAttr("*"));
SmallVector<NamedAttribute> newAttrs;
newAttrs.push_back(
NamedAttribute(rewriter.getStringAttr("reduction_kind"),
rewriter.getStringAttr(reductionKind.value())));

auto newCustomAttrs = DictionaryAttr::get(op->getContext(), newAttrs);

op->setAttr("custom_attrs", newCustomAttrs);
SmallVector<NamedAttribute> attrs;
attrs.push_back(
NamedAttribute(rewriter.getStringAttr("reduction_kind"),
rewriter.getStringAttr(reductionKind.value())),
NamedAttribute(rewriter.getStringAttr("is_async"),
rewriter.getBoolAttr(is_async))
);
auto customAttrs = DictionaryAttr::get(op->getContext(), attrs);
op->setAttr("custom_attrs", customAttrs);

auto newOutput = rewriter.create<mhlo_disc::CustomCallV2Op>(
auto reduce_op = rewriter.create<mhlo_disc::CustomCallV2Op>(
op->getLoc(), op->getResults()[i].getType(), op->getOperands()[i],
op->getAttrs());
newOutputs.push_back(newOutput.getResult(0));

if(is_async) {
int64_t async_pair_token = reinterpret_cast<int64_t>(reduce_op.getOperation())
attrs.push_back(
NamedAttribute(rewriter.getStringAttr("async_pair_token"),
rewriter.getIntegerAttr(rewriter.getIntegerType(64), async_pair_token))
);
auto newCustomAttrs = DictionaryAttr::get(op->getContext(), attrs);
reduce_op->setAttr("custom_attrs", newCustomAttrs);
}

if(is_async) {
// Insert CollectiveDoneOp
auto collective_done_op = rewriter.create<mhlo_disc::CustomCallV2Op>(
reduce_op->getLoc(), reduce_op->getResults()[0].getType(), reduce_op->getResults()[0],
reduce_op->getAttrs());
collective_done_op->setAttr("call_target_name", rewriter.getStringAttr("ral_async_collective_done"));
newOutputs.push_back(collective_done_op.getResult(0));
} else {
newOutputs.push_back(reduce_op.getResult(0));
}
}
rewriter.replaceOp(op, newOutputs);
return success();
Expand Down

0 comments on commit 91ed00f

Please sign in to comment.