Skip to content

Commit

Permalink
[Codegen][GPU] Change iree_gpu.shuffle_tensor to take a region for th…
Browse files Browse the repository at this point in the history
…e read (iree-org#17425)

This simplifies the number of fields required for the ops and enables
including reshaping of the intermediate allocation without needing to
add fields to the op ad infinitum.

This change has another motivation due to an issue arising from alloc
reuse that naturally arises from hoisting static allocations out of
loops. In short, such hoisting (and bufferization) requires a
synchronization not only on the write to the allocation, but also after
all reads have completed due to reusing the same allocation for each
iteration of the loop. This dependency is not modeled with SSA before or
after bufferization, meaning the fact that this operation represents
both the write and the reads is saving us with some spooky action at a
distance. This missing dependency needs more investigation in the
future, but it is unclear to me at the moment how to navigate
bufferization and vectorization currently. I suspect we will end up
wanting a vectorization pattern for this operation, but I'm leaving that
as TODO for now.

This also makes the intermediate type a tensor again because we were
just using `bufferization.to_memref` before to get back to a tensor and
the generated IR was unnatural. Perhaps worth another look in the future
as well.
  • Loading branch information
qedawkins authored May 20, 2024
1 parent dc61fcc commit e0f3c05
Show file tree
Hide file tree
Showing 9 changed files with 223 additions and 175 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -408,35 +408,30 @@ struct LowerShuffleTensor
PatternRewriter &rewriter) const final {
Location loc = shuffleOp.getLoc();

MemRefType allocType = shuffleOp.getSharedAllocType();
auto tensorType =
RankedTensorType::get(allocType.getShape(), allocType.getElementType());
Value tensorAlloc = rewriter.create<bufferization::ToTensorOp>(
loc, tensorType, shuffleOp.getSharedAlloc(), /*restrict=*/true,
/*writeable=*/true);

// Step 1. Insert the source slice into the intermediate tensor.
SmallVector<OpFoldResult, 4> sourceOffsets =
shuffleOp.getMixedSourceOffsets();
SmallVector<OpFoldResult, 4> sourceSizes = shuffleOp.getMixedSourceSizes();
SmallVector<OpFoldResult, 4> sourceStrides =
shuffleOp.getMixedSourceStrides();
SmallVector<OpFoldResult, 4> sourceOffsets = shuffleOp.getMixedOffsets();
SmallVector<OpFoldResult, 4> sourceSizes = shuffleOp.getMixedSizes();
SmallVector<OpFoldResult, 4> sourceStrides = shuffleOp.getMixedStrides();
Value insertedSlice = rewriter.create<tensor::InsertSliceOp>(
loc, shuffleOp.getSource(), tensorAlloc, sourceOffsets, sourceSizes,
sourceStrides);
loc, shuffleOp.getSource(), shuffleOp.getDest(), sourceOffsets,
sourceSizes, sourceStrides);

// Step 2. Synchronize the workers.
rewriter.create<gpu::BarrierOp>(loc);

// Step 3. Extract the result slice.
SmallVector<OpFoldResult, 4> resultOffsets =
shuffleOp.getMixedResultOffsets();
SmallVector<OpFoldResult, 4> resultSizes = shuffleOp.getMixedResultSizes();
SmallVector<OpFoldResult, 4> resultStrides =
shuffleOp.getMixedResultStrides();
rewriter.replaceOpWithNewOp<tensor::ExtractSliceOp>(
shuffleOp, shuffleOp.getType(), insertedSlice, resultOffsets,
resultSizes, resultStrides);
auto terminator = shuffleOp.getBody()->getTerminator();
Value replacement = terminator->getOperand(0);
rewriter.inlineBlockBefore(shuffleOp.getBody(), shuffleOp, {insertedSlice});
rewriter.replaceAllUsesWith(shuffleOp.getResult(), replacement);
rewriter.setInsertionPointAfterValue(replacement);

// Step 2. Synchronize the workers again after reading the shuffled values.
// TODO: This barrier is an approximation for what we expect bufferization +
// vectorization to produce. There is no guarantee that this barrier is
// adhered to, but the way that bufferization and vectorization works
// is unfriendly towards barrier-like constructs.
rewriter.create<gpu::BarrierOp>(loc);
rewriter.eraseOp(terminator);
return success();
}
};
Expand Down Expand Up @@ -877,35 +872,27 @@ LogicalResult compareWorkerCountsAndTypes(scf::ForallOp producer,
return success();
}

Value getReplacementSlice(RewriterBase &rewriter, Location loc,
tensor::ParallelInsertSliceOp parallelInsert,
tensor::ExtractSliceOp extractSlice,
std::optional<Attribute> addressSpace) {
RankedTensorType destTensorType = parallelInsert.getDestType();
MemRefType allocType =
addressSpace ? MemRefType::get(destTensorType.getShape(),
destTensorType.getElementType(),
MemRefLayoutAttrInterface{}, *addressSpace)
: MemRefType::get(destTensorType.getShape(),
destTensorType.getElementType());
Value dest = Value();
if (auto empty = parallelInsert.getDest().getDefiningOp<tensor::EmptyOp>()) {
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(empty);
dest = rewriter.create<memref::AllocOp>(loc, allocType,
empty.getDynamicSizes());
} else {
dest = rewriter.create<bufferization::ToMemrefOp>(loc, allocType,
parallelInsert.getDest());
}
return rewriter.create<IREE::GPU::ShuffleTensorOp>(
void replaceExtractSlice(RewriterBase &rewriter, Location loc,
tensor::ParallelInsertSliceOp parallelInsert,
tensor::ExtractSliceOp extractSlice) {
OpBuilder::InsertionGuard g(rewriter);
auto shuffleOp = rewriter.create<IREE::GPU::ShuffleTensorOp>(
loc, extractSlice.getType(), parallelInsert.getSource(),
parallelInsert.getOffsets(), parallelInsert.getSizes(),
parallelInsert.getStrides(), parallelInsert.getStaticOffsets(),
parallelInsert.getStaticSizes(), parallelInsert.getStaticStrides(), dest,
extractSlice.getOffsets(), extractSlice.getSizes(),
extractSlice.getStrides(), extractSlice.getStaticOffsets(),
extractSlice.getStaticSizes(), extractSlice.getStaticStrides());
parallelInsert.getStaticSizes(), parallelInsert.getStaticStrides(),
parallelInsert.getDest());
Region *region = &shuffleOp.getRegion();
rewriter.createBlock(region, region->end(),
ArrayRef<Type>{parallelInsert.getDestType()},
ArrayRef<Location>{loc});
rewriter.setInsertionPointToStart(shuffleOp.getBody());
auto terminator =
rewriter.create<IREE::GPU::YieldOp>(loc, extractSlice.getResult());
rewriter.moveOpBefore(extractSlice, terminator);
extractSlice.getSourceMutable().assign(shuffleOp.getBody()->getArgument(0));
rewriter.replaceAllUsesExcept(extractSlice.getResult(), shuffleOp,
terminator);
}

LogicalResult fuseForallIntoSlice(RewriterBase &rewriter,
Expand Down Expand Up @@ -975,12 +962,9 @@ LogicalResult fuseForallIntoSlice(RewriterBase &rewriter,
auto parallelInsert =
cast<tensor::ParallelInsertSliceOp>(*terminator.getYieldingOps().begin());

Value replacementSlice =
getReplacementSlice(rewriter, loc, parallelInsert, slice, addressSpace);
rewriter.replaceAllUsesWith(slice, replacementSlice);
replaceExtractSlice(rewriter, loc, parallelInsert, slice);

rewriter.eraseOp(parallelInsert);
rewriter.eraseOp(slice);
rewriter.eraseOp(terminator);
rewriter.eraseOp(producer);
return success();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ module attributes { transform.with_named_sequence } {
transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
%loops = transform.structured.match ops{["scf.forall"]} in %root : (!transform.any_op) -> !transform.any_op
%producer, %consumer = transform.split_handle %loops : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.iree.fuse_forall %producer into %consumer {address_space = #gpu.address_space<workgroup>} : (!transform.any_op, !transform.any_op) -> (!transform.any_op)
transform.iree.fuse_forall %producer into %consumer : (!transform.any_op, !transform.any_op) -> (!transform.any_op)
transform.yield
}
}
Expand All @@ -45,7 +45,6 @@ module attributes { transform.with_named_sequence } {
// CHECK-SAME: %[[ARG0:[A-Za-z0-9]+]]: tensor<128x128xf32>

// CHECK-DAG: %[[EMPTY:.+]] = tensor.empty() : tensor<128x128xf32>
// CHECK-DAG: %[[ALLOC:.+]] = memref.alloc() : memref<128x128xf32, #gpu.address_space<workgroup>>
// CHECK: scf.forall (%[[IDX:.+]], %[[IDY:.+]]) in (8, 8) shared_outs(%[[INIT:.+]] = %[[EMPTY]]) -> (tensor<128x128xf32>) {
// CHECK-DAG: %[[OUTID0:.+]] = affine.apply #[[$MAP]](%[[IDX]])
// CHECK-DAG: %[[OUTID1:.+]] = affine.apply #[[$MAP]](%[[IDY]])
Expand All @@ -55,9 +54,11 @@ module attributes { transform.with_named_sequence } {
// CHECK: %[[INSLICE0:.+]] = tensor.extract_slice %[[ARG0]][%[[INID0]], %[[IDS]]#1] [2, 128] [1, 1] : tensor<128x128xf32> to tensor<2x128xf32>
// CHECK: %[[INSLICE1:.+]] = tensor.extract_slice %[[EMPTY]][%[[INID0]], %[[IDS]]#1] [2, 128] [1, 1] : tensor<128x128xf32> to tensor<2x128xf32>
// CHECK: %[[COPY:.+]] = linalg.copy ins(%[[INSLICE0]] : tensor<2x128xf32>) outs(%[[INSLICE1]] : tensor<2x128xf32>) -> tensor<2x128xf32>
// CHECK: %[[SHUFFLE:.+]] = iree_gpu.shuffle_tensor %[[COPY]][%[[INID0]], %[[IDS]]#1] [2, 128] [1, 1]
// CHECK-SAME: to %[[ALLOC]] [%[[OUTID0]], %[[OUTID1]]] [16, 16] [1, 1]
// CHECK-SAME: : tensor<2x128xf32> -> memref<128x128xf32, #gpu.address_space<workgroup>> -> tensor<16x16xf32>
// CHECK: %[[SHUFFLE:.+]] = iree_gpu.shuffle_tensor %[[COPY]][%[[INID0]], %[[IDS]]#1] [2, 128] [1, 1] to %[[EMPTY]]
// CHECK: ^bb0(%[[INTERMEDIATE:.+]]: tensor<128x128xf32>):
// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[INTERMEDIATE]][%[[OUTID0]], %[[OUTID1]]] [16, 16] [1, 1] : tensor<128x128xf32> to tensor<16x16xf32>
// CHECK: iree_gpu.yield %[[SLICE]]
// CHECK: } : tensor<2x128xf32> -> tensor<128x128xf32> -> tensor<16x16xf32>
// CHECK: %[[OUTSLICE:.+]] = tensor.extract_slice %[[INIT]][%[[OUTID0]], %[[OUTID1]]] [16, 16] [1, 1] : tensor<128x128xf32> to tensor<16x16xf32>
// CHECK: %[[MM:.+]] = linalg.matmul ins(%[[SHUFFLE]], %[[SHUFFLE]] : tensor<16x16xf32>, tensor<16x16xf32>)
// CHECK-SAME: outs(%[[OUTSLICE]] : tensor<16x16xf32>) -> tensor<16x16xf32>
Expand Down Expand Up @@ -113,7 +114,6 @@ module attributes { transform.with_named_sequence } {
// CHECK-SAME: %[[ARG1:[A-Za-z0-9]+]]: tensor<128x128xf32>

// CHECK: scf.forall (%[[IDX:.+]], %[[IDY:.+]]) in (8, 8) shared_outs(%[[INIT:.+]] = %[[ARG1]]) -> (tensor<128x128xf32>) {
// CHECK: %[[ALLOC:.+]] = bufferization.to_memref %[[ARG1]]
// CHECK: %[[SHUFFLE:.+]] = iree_gpu.shuffle_tensor %{{.*}} to %[[ALLOC]]
// CHECK-SAME: : tensor<2x128xf32> -> memref<128x128xf32> -> tensor<16x16xf32>
// CHECK: %[[SHUFFLE:.+]] = iree_gpu.shuffle_tensor %{{.*}} to %[[ARG1]]
// CHECK: } : tensor<2x128xf32> -> tensor<128x128xf32> -> tensor<16x16xf32>
// CHECK: } {mapping = [#gpu.warp<y>, #gpu.warp<x>]}
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
// RUN: iree-opt %s -iree-transform-dialect-interpreter -transform-dialect-drop-schedule --split-input-file | FileCheck %s

func.func @shuffle_tensor(%init: memref<6x6xf32>, %arg0: tensor<2x3xf32>, %x: index) -> tensor<3x2xf32> {
%0 = iree_gpu.shuffle_tensor %arg0[%x, 0] [2, 3] [1, 1] to %init[0, %x] [3, 2] [1, 1] : tensor<2x3xf32> -> memref<6x6xf32> -> tensor<3x2xf32>
func.func @shuffle_tensor(%init: tensor<6x6xf32>, %source: tensor<2x3xf32>, %x: index) -> tensor<3x2xf32> {
%0 = iree_gpu.shuffle_tensor %source[%x, 0] [2, 3] [1, 1] to %init {
^bb0(%intermediate: tensor<6x6xf32>):
%slice = tensor.extract_slice %intermediate[0, %x] [3, 2] [1, 1] : tensor<6x6xf32> to tensor<3x2xf32>
iree_gpu.yield %slice : tensor<3x2xf32>
} : tensor<2x3xf32> -> tensor<6x6xf32> -> tensor<3x2xf32>
return %0 : tensor<3x2xf32>
}

Expand All @@ -16,22 +20,24 @@ module attributes { transform.with_named_sequence } {
}

// CHECK-LABEL: func @shuffle_tensor
// CHECK-SAME: %[[INIT:[A-Za-z0-9]+]]: memref<6x6xf32>
// CHECK-SAME: %[[INIT:[A-Za-z0-9]+]]: tensor<6x6xf32>
// CHECK-SAME: %[[ARG1:[A-Za-z0-9]+]]: tensor<2x3xf32>
// CHECK-SAME: %[[X:[A-Za-z0-9]+]]: index

// CHECK: %[[TENSOR:.+]] = bufferization.to_tensor %[[INIT]]
// CHECK-SAME: restrict
// CHECK-SAME: writable
// CHECK: %[[IN:.+]] = tensor.insert_slice %[[ARG1]] into %[[TENSOR]][%[[X]], 0] [2, 3] [1, 1] : tensor<2x3xf32> into tensor<6x6xf32>
// CHECK: %[[IN:.+]] = tensor.insert_slice %[[ARG1]] into %[[INIT]][%[[X]], 0] [2, 3] [1, 1] : tensor<2x3xf32> into tensor<6x6xf32>
// CHECK: gpu.barrier
// CHECK: %[[OUT:.+]] = tensor.extract_slice %[[IN]][0, %[[X]]] [3, 2] [1, 1] : tensor<6x6xf32> to tensor<3x2xf32>
// CHECK: gpu.barrier
// CHECK: return %[[OUT]] : tensor<3x2xf32>

// -----

func.func @rank_reducing_shuffle_tensor(%init: memref<1x6x6xf32>, %arg0: tensor<2x3xf32>, %x: index, %y: index) -> tensor<3x2xf32> {
%0 = iree_gpu.shuffle_tensor %arg0[0, %x, %y] [1, 2, 3] [1, 1, 1] to %init[0, %y, %x] [1, 3, 2] [1, 1, 1] : tensor<2x3xf32> -> memref<1x6x6xf32> -> tensor<3x2xf32>
func.func @rank_reducing_shuffle_tensor(%init: tensor<1x6x6xf32>, %source: tensor<2x3xf32>, %x: index, %y: index) -> tensor<3x2xf32> {
%0 = iree_gpu.shuffle_tensor %source[0, %x, %y] [1, 2, 3] [1, 1, 1] to %init {
^bb0(%intermediate: tensor<1x6x6xf32>):
%slice = tensor.extract_slice %intermediate[0, %y, %x] [1, 3, 2] [1, 1, 1] : tensor<1x6x6xf32> to tensor<3x2xf32>
iree_gpu.yield %slice : tensor<3x2xf32>
} : tensor<2x3xf32> -> tensor<1x6x6xf32> -> tensor<3x2xf32>
return %0 : tensor<3x2xf32>
}

Expand All @@ -46,14 +52,45 @@ module attributes { transform.with_named_sequence } {
}

// CHECK-LABEL: func @rank_reducing_shuffle_tensor
// CHECK-SAME: %[[INIT:[A-Za-z0-9]+]]: memref<1x6x6xf32>
// CHECK-SAME: %[[INIT:[A-Za-z0-9]+]]: tensor<1x6x6xf32>
// CHECK-SAME: %[[ARG1:[A-Za-z0-9]+]]: tensor<2x3xf32>
// CHECK-SAME: %[[X:[A-Za-z0-9]+]]: index
// CHECK-SAME: %[[Y:[A-Za-z0-9]+]]: index

// CHECK: %[[TENSOR:.+]] = bufferization.to_tensor %[[INIT]]
// CHECK-SAME: restrict
// CHECK-SAME: writable
// CHECK: %[[IN:.+]] = tensor.insert_slice %[[ARG1]] into %[[TENSOR]][0, %[[X]], %[[Y]]] [1, 2, 3] [1, 1, 1] : tensor<2x3xf32> into tensor<1x6x6xf32>
// CHECK: %[[IN:.+]] = tensor.insert_slice %[[ARG1]] into %[[INIT]][0, %[[X]], %[[Y]]] [1, 2, 3] [1, 1, 1] : tensor<2x3xf32> into tensor<1x6x6xf32>
// CHECK: gpu.barrier
// CHECK: %[[OUT:.+]] = tensor.extract_slice %[[IN]][0, %[[Y]], %[[X]]] [1, 3, 2] [1, 1, 1] : tensor<1x6x6xf32> to tensor<3x2xf32>
// CHECK: gpu.barrier
// CHECK: return %[[OUT]]

// -----

func.func @reshape_shuffle_tensor(%init: tensor<12x12xf32>, %source: tensor<2x3xf32>) -> tensor<2x1x3x2xf32> {
%0 = iree_gpu.shuffle_tensor %source[0, 0] [2, 3] [1, 1] to %init {
^bb0(%intermediate: tensor<12x12xf32>):
%expand = tensor.expand_shape %intermediate [[0, 1], [2, 3]] output_shape [4, 3, 3, 4] : tensor<12x12xf32> into tensor<4x3x3x4xf32>
%slice = tensor.extract_slice %expand[0, 0, 0, 0] [2, 1, 3, 2] [1, 1, 1, 1] : tensor<4x3x3x4xf32> to tensor<2x1x3x2xf32>
iree_gpu.yield %slice : tensor<2x1x3x2xf32>
} : tensor<2x3xf32> -> tensor<12x12xf32> -> tensor<2x1x3x2xf32>
return %0 : tensor<2x1x3x2xf32>
}

module attributes { transform.with_named_sequence } {
transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
%func = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.any_op
transform.apply_patterns to %func {
transform.apply_patterns.iree.lower_shuffle_tensor
} : !transform.any_op
transform.yield
}
}

// CHECK-LABEL: func @reshape_shuffle_tensor
// CHECK-SAME: %[[INIT:[A-Za-z0-9]+]]: tensor<12x12xf32>
// CHECK-SAME: %[[ARG1:[A-Za-z0-9]+]]: tensor<2x3xf32>

// CHECK: %[[IN:.+]] = tensor.insert_slice %[[ARG1]] into %[[INIT]][0, 0] [2, 3] [1, 1] : tensor<2x3xf32> into tensor<12x12xf32>
// CHECK: gpu.barrier
// CHECK: %[[EXPAND:.+]] = tensor.expand_shape %[[IN]] {{\[}}[0, 1], [2, 3]{{\]}} output_shape [4, 3, 3, 4] : tensor<12x12xf32> into tensor<4x3x3x4xf32>
// CHECK: tensor.extract_slice %[[EXPAND]][0, 0, 0, 0] [2, 1, 3, 2] [1, 1, 1, 1] : tensor<4x3x3x4xf32> to tensor<2x1x3x2xf32>
// CHECK: gpu.barrier
// CHECK: tensor.extract_slice %[[IN]][0, %[[Y]], %[[X]]] [1, 3, 2] [1, 1, 1] : tensor<1x6x6xf32> to tensor<3x2xf32>
2 changes: 2 additions & 0 deletions compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ iree_td_library(
include = ["*.td"],
),
deps = [
"@llvm-project//mlir:ControlFlowInterfacesTdFiles",
"@llvm-project//mlir:OpBaseTdFiles",
"@llvm-project//mlir:SideEffectInterfacesTdFiles",
],
Expand Down Expand Up @@ -69,6 +70,7 @@ iree_compiler_cc_library(
"//llvm-external-projects/iree-dialects:IREEVectorExtDialect",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:AMDGPUDialect",
"@llvm-project//mlir:ControlFlowInterfaces",
"@llvm-project//mlir:DialectUtils",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:LinalgDialect",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ iree_cc_library(
IREEVectorExtDialect
LLVMSupport
MLIRAMDGPUDialect
MLIRControlFlowInterfaces
MLIRIR
MLIRLinalgDialect
MLIRParser
Expand Down
42 changes: 25 additions & 17 deletions compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,38 +25,46 @@ namespace mlir::iree_compiler::IREE::GPU {

LogicalResult ShuffleTensorOp::verify() {
// Get the equivalent tensor type for the alloc to verify against.
MemRefType allocType = getSharedAllocType();
Type allocElementType = allocType.getElementType();
RankedTensorType destType = getDestType();
Type allocElementType = destType.getElementType();
RankedTensorType allocTensorType =
RankedTensorType::get(allocType.getShape(), allocElementType);
RankedTensorType::get(destType.getShape(), allocElementType);

// Verify source type against inferred type. Slice insertion and extraction
// use the same verification logic.
RankedTensorType expectedType = tensor::ExtractSliceOp::inferResultType(
allocTensorType, getMixedSourceOffsets(), getMixedSourceSizes(),
getMixedSourceStrides());
allocTensorType, getMixedOffsets(), getMixedSizes(), getMixedStrides());
SliceVerificationResult result =
isRankReducedType(expectedType, getSourceType());
if (result != SliceVerificationResult::Success) {
return emitError("Invalid source slice type");
}

// Do the same for the resulting tensor type
expectedType = tensor::ExtractSliceOp::inferResultType(
allocTensorType, getMixedResultOffsets(), getMixedResultSizes(),
getMixedResultStrides());
result = isRankReducedType(expectedType, getType());
if (result != SliceVerificationResult::Success) {
return emitError("Invalid result slice type");
}

if (allocElementType != getSourceType().getElementType() ||
allocElementType != getType().getElementType()) {
return emitError(
"Element type mismatch between source, allocation, and result");
return emitError("Element type mismatch between source and destination");
}
return success();
}

LogicalResult ShuffleTensorOp::verifyRegions() {
auto &region = getRegion();
Block &block = region.front();
if (block.getNumArguments() != 1) {
return emitError("expected the block to have a single argument");
}

if (block.getArgumentTypes()[0] != getDestType()) {
return emitError("expected block to have single argument type of")
<< getDestType();
}

// Ensure that the region yields an element of the right type.
auto yieldOp = llvm::cast<GPU::YieldOp>(block.getTerminator());
if (yieldOp.getValue().getType() != getResult().getType()) {
return emitOpError("expected yield type to match result type");
}

// TODO: Verification of the allocation size in the static case.
return success();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/Operation.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"

// clang-format off
Expand Down
Loading

0 comments on commit e0f3c05

Please sign in to comment.