Skip to content

Commit

Permalink
Fixing execution region result placement. (#19872)
Browse files Browse the repository at this point in the history
This uses transfer ops to place allocations that escape execution
regions. In the future we'll need something more sophisticated
(AffinityAnalysis for the escaped results, etc) but in simple programs
today where transfers are used to indicate resource movement it
correctly picks up the destinations.

This also fixes `flow.tensor.barrier` and `stream.async.barrier` to be
tied ops (as they are metadata-only) and a few issues with affinity
attribute assignment identified while tracking down the placement
affinities.
  • Loading branch information
benvanik authored Feb 4, 2025
1 parent d96a3f0 commit 4fffb0e
Show file tree
Hide file tree
Showing 10 changed files with 309 additions and 121 deletions.
33 changes: 28 additions & 5 deletions compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1828,9 +1828,9 @@ LogicalResult TensorSplatOp::verify() {

LogicalResult TensorCloneOp::verify() {
if (failed(verifyOpDynamicDims(getOperation(), {getOperand()},
getArgumentDims())) ||
getOperandDims())) ||
failed(verifyOpDynamicDims(getOperation(), {getResult()},
getArgumentDims()))) {
getOperandDims()))) {
return failure();
}
return success();
Expand All @@ -1840,17 +1840,40 @@ LogicalResult TensorCloneOp::verify() {
// flow.tensor.barrier
//===----------------------------------------------------------------------===//

LogicalResult TensorBarrierOp::verify() { return success(); }
LogicalResult TensorBarrierOp::verify() {
if (failed(verifyOpDynamicDims(getOperation(), {getOperand()},
getOperandDims()))) {
return failure();
}
return success();
}

Value TensorBarrierOp::getTiedResult(unsigned resultIndex) {
return IREE::Util::TiedOpInterface::findTiedBaseValue(getOperand());
}

Value TensorBarrierOp::getTiedResultOperand(Value result) {
return getOperand();
}

::std::optional<unsigned>
TensorBarrierOp::getTiedResultOperandIndex(unsigned resultIndex) {
return {0}; // operand
}

SmallVector<int64_t> TensorBarrierOp::getTiedResultOperandIndices() {
return {0}; // operand
}

//===----------------------------------------------------------------------===//
// flow.tensor.transfer
//===----------------------------------------------------------------------===//

LogicalResult TensorTransferOp::verify() {
if (failed(verifyOpDynamicDims(getOperation(), {getOperand()},
getArgumentDims())) ||
getOperandDims())) ||
failed(verifyOpDynamicDims(getOperation(), {getResult()},
getArgumentDims()))) {
getOperandDims()))) {
return failure();
}
return success();
Expand Down
36 changes: 23 additions & 13 deletions compiler/src/iree/compiler/Dialect/Flow/IR/FlowOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1469,14 +1469,14 @@ def FLOW_TensorCloneOp : FLOW_PureOp<"tensor.clone", [

let arguments = (ins
FLOW_Tensor:$operand,
FLOW_ShapeDynamicDims:$argument_dims
FLOW_ShapeDynamicDims:$operand_dims
);
let results = (outs
FLOW_Tensor:$result
);

let assemblyFormat = [{
$operand `:` type($result) (`{` $argument_dims^ `}`)?
$operand `:` type($result) (`{` $operand_dims^ `}`)?
attr-dict-with-keyword
}];

Expand All @@ -1493,8 +1493,8 @@ def FLOW_TensorCloneOp : FLOW_PureOp<"tensor.clone", [
let extraClassDeclaration = [{
bool isHoistableLeafOp() { return false; }

ValueRange getOperandDynamicDims(unsigned idx) { return getArgumentDims(); }
ValueRange getResultDynamicDims(unsigned idx) { return getArgumentDims(); }
ValueRange getOperandDynamicDims(unsigned idx) { return getOperandDims(); }
ValueRange getResultDynamicDims(unsigned idx) { return getOperandDims(); }
}];

let hasVerifier = 1;
Expand All @@ -1506,22 +1506,32 @@ def FLOW_TensorBarrierOp : FLOW_PureOp<"tensor.barrier", [
AllTypesMatch<["operand", "result"]>,
DeclareOpInterfaceMethods<Util_HoistableOpInterface>,
Util_ShapeAwareOp,
DeclareOpInterfaceMethods<Util_TiedOpInterface, [
"getTiedResult",
"getTiedResultOperand",
"getTiedResultOperandIndex",
"getTiedResultOperandIndices",
]>,
]> {
let summary = [{}];
let summary = [{indicates a value that must have a specific affinity}];
let description = [{
Prevents fusion and scheduling of a value across an affinity boundary.
May introduce copy-on-write behavior if the operand value is used as well as
the result and users should try to keep the operand to a single use by this
op.
}];

let arguments = (ins
FLOW_Tensor:$operand,
FLOW_ShapeDynamicDims:$argument_dims,
FLOW_ShapeDynamicDims:$operand_dims,
AnyAttr:$target
);
let results = (outs
FLOW_Tensor:$result
);

let assemblyFormat = [{
$operand `:` type($result) (`{` $argument_dims^ `}`)?
$operand `:` type($result) (`{` $operand_dims^ `}`)?
`on` $target
attr-dict-with-keyword
}];
Expand All @@ -1540,8 +1550,8 @@ def FLOW_TensorBarrierOp : FLOW_PureOp<"tensor.barrier", [
let extraClassDeclaration = [{
bool isHoistableLeafOp() { return false; }

ValueRange getOperandDynamicDims(unsigned idx) { return getArgumentDims(); }
ValueRange getResultDynamicDims(unsigned idx) { return getArgumentDims(); }
ValueRange getOperandDynamicDims(unsigned idx) { return getOperandDims(); }
ValueRange getResultDynamicDims(unsigned idx) { return getOperandDims(); }
}];

let hasVerifier = 1;
Expand All @@ -1564,15 +1574,15 @@ def FLOW_TensorTransferOp : FLOW_PureOp<"tensor.transfer", [

let arguments = (ins
FLOW_Tensor:$operand,
FLOW_ShapeDynamicDims:$argument_dims,
FLOW_ShapeDynamicDims:$operand_dims,
AnyAttr:$target
);
let results = (outs
FLOW_Tensor:$result
);

let assemblyFormat = [{
$operand `:` type($result) (`{` $argument_dims^ `}`)?
$operand `:` type($result) (`{` $operand_dims^ `}`)?
`to` $target
attr-dict-with-keyword
}];
Expand All @@ -1591,8 +1601,8 @@ def FLOW_TensorTransferOp : FLOW_PureOp<"tensor.transfer", [
let extraClassDeclaration = [{
bool isHoistableLeafOp() { return false; }

ValueRange getOperandDynamicDims(unsigned idx) { return getArgumentDims(); }
ValueRange getResultDynamicDims(unsigned idx) { return getArgumentDims(); }
ValueRange getOperandDynamicDims(unsigned idx) { return getOperandDims(); }
ValueRange getResultDynamicDims(unsigned idx) { return getOperandDims(); }
}];

let hasVerifier = 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,8 @@ struct ConvertTensorCloneOp
auto unknownType = rewriter.getType<IREE::Stream::ResourceType>();
auto cloneOp = rewriter.create<IREE::Stream::TensorCloneOp>(
op.getLoc(), unknownType, operand.resource, op.getOperand().getType(),
op.getArgumentDims(), operand.resourceSize, op.getResult().getType(),
flattenValues(adaptor.getArgumentDims()), operand.resourceSize,
op.getOperandDims(), operand.resourceSize, op.getResult().getType(),
flattenValues(adaptor.getOperandDims()), operand.resourceSize,
executionAffinityAttr);
rewriter.replaceOpWithMultiple(op, {{cloneOp, operand.resourceSize}});
return success();
Expand All @@ -249,7 +249,7 @@ struct ConvertTensorBarrierOp
auto barrierOp = rewriter.create<IREE::Stream::AsyncBarrierOp>(
op.getLoc(), operand.resource.getType(), operand.resource,
operand.resourceSize,
/*affinity=*/operand.affinity);
/*affinity=*/executionAffinityAttr);
rewriter.replaceOpWithMultiple(op, {{barrierOp, operand.resourceSize}});
return success();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,14 @@ util.func public @tensorSplat(%value: i8, %dim0: index) -> tensor<?x128xi8> {
util.global private @device : !hal.device

// CHECK-LABEL: @tensorBarrierDispatch
// CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[DIM0:.+]]: index, %[[DIM1:.+]]: index)
// CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[INPUT_SIZE:.+]]: index, %[[DIM0:.+]]: index)
util.func public @tensorBarrierDispatch(%input: tensor<?x128xi8>, %dim0: index) -> tensor<?x128xi8> {
// CHECK: %[[BARRIER:.+]] = stream.async.barrier %[[INPUT]] : !stream.resource<*>{%[[DIM0]]} -> !stream.resource<*>
// CHECK: %[[BARRIER:.+]] = stream.async.barrier on(#hal.device.affinity<@device>) %[[INPUT]] : !stream.resource<*>{%[[INPUT_SIZE]]}
%barrier = flow.tensor.barrier %input : tensor<?x128xi8>{%dim0} on #hal.device.affinity<@device>
// CHECK: %[[SIZE:.+]] = stream.tensor.sizeof on(#hal.device.affinity<@device>) tensor<?x128xi8>{%arg2} : index
// CHECK: %[[RESULT:.+]] = stream.tensor.dispatch on(#hal.device.affinity<@device>) @ex::@entry(%[[BARRIER]])
// CHECK: %[[RESULT_SIZE:.+]] = stream.tensor.sizeof tensor<?x128xi8>{%[[DIM0]]} : index
// CHECK: %[[RESULT:.+]] = stream.tensor.dispatch @ex::@entry(%[[BARRIER]])
%0 = flow.dispatch @ex::@entry(%barrier) : (tensor<?x128xi8>{%dim0}) -> tensor<?x128xi8>{%dim0}
// CHECK: util.return %[[RESULT]], %[[SIZE]]
// CHECK: util.return %[[RESULT]], %[[RESULT_SIZE]]
util.return %0 : tensor<?x128xi8>
}

Expand All @@ -170,7 +170,7 @@ util.global private @device : !hal.device
// CHECK-LABEL: @tensorBarrier
// CHECK-SAME: (%[[INPUT:.+]]: !stream.resource<*>, %[[INPUT_SIZE:.+]]: index, %[[DIM0:.+]]: index)
util.func public @tensorBarrier(%input: tensor<?x128xi8>, %dim0: index) -> tensor<?x128xi8> {
// CHECK: %[[TRANSFER:.+]] = stream.async.barrier %[[INPUT]] : !stream.resource<*>{%[[INPUT_SIZE]]} -> !stream.resource<*>
// CHECK: %[[TRANSFER:.+]] = stream.async.barrier on(#hal.device.affinity<@device>) %[[INPUT]] : !stream.resource<*>{%[[INPUT_SIZE]]}
%transfer = flow.tensor.barrier %input : tensor<?x128xi8>{%dim0} on #hal.device.affinity<@device>
// CHECK: util.return %[[TRANSFER]], %[[INPUT_SIZE]]
util.return %transfer : tensor<?x128xi8>
Expand Down
13 changes: 13 additions & 0 deletions compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2469,6 +2469,19 @@ bool AsyncBarrierOp::isMetadata() { return true; }

LogicalResult AsyncBarrierOp::verify() { return success(); }

Value AsyncBarrierOp::getTiedResult(unsigned resultIndex) {
return IREE::Util::TiedOpInterface::findTiedBaseValue(getSource());
}

::std::optional<unsigned>
AsyncBarrierOp::getTiedResultOperandIndex(unsigned resultIndex) {
return {0}; // source
}

SmallVector<int64_t> AsyncBarrierOp::getTiedResultOperandIndices() {
return {0}; // source
}

//===----------------------------------------------------------------------===//
// stream.async.transfer
//===----------------------------------------------------------------------===//
Expand Down
16 changes: 12 additions & 4 deletions compiler/src/iree/compiler/Dialect/Stream/IR/StreamOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -2291,15 +2291,25 @@ def Stream_AsyncCollectiveOp : Stream_Op<"async.collective", [
}

def Stream_AsyncBarrierOp : Stream_Op<"async.barrier", [
AllTypesMatch<["source", "result"]>,
Stream_AffinityOp,
Stream_AsyncPhaseOp,
DeclareOpInterfaceMethods<Stream_StreamableOp, [
"isMetadata",
]>,
Util_SizeAwareOp,
DeclareOpInterfaceMethods<Util_TiedOpInterface, [
"getTiedResult",
"getTiedResultOperandIndex",
"getTiedResultOperandIndices",
]>,
]> {
let summary = [{ }];
let summary = [{indicates a value that must have a specific affinity}];
let description = [{
Prevents fusion and scheduling of a value across an affinity boundary.
May introduce copy-on-write behavior if the operand value is used as well as
the result and users should try to keep the operand to a single use by this
op.
}];

let arguments = (ins
Expand All @@ -2318,11 +2328,9 @@ def Stream_AsyncBarrierOp : Stream_Op<"async.barrier", [
);

let assemblyFormat = [{
(`on` `(` $affinity^ `)`)?
$source `:` type($source)
`` `{` $size `}`
(`from` `(` $affinity^ `)`)?
`->`
type($result)
attr-dict-with-keyword
}];

Expand Down
Loading

0 comments on commit 4fffb0e

Please sign in to comment.