Skip to content

Commit

Permalink
Eliminate compiler warnings (gcc11)
Browse files Browse the repository at this point in the history
  • Loading branch information
fschlimb authored and silee2 committed Dec 13, 2023
1 parent 45301cf commit 180dc90
Show file tree
Hide file tree
Showing 8 changed files with 169 additions and 171 deletions.
2 changes: 1 addition & 1 deletion include/imex/Dialect/XeGPU/IR/XeGPUOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def XeGPU_CreateNdDescOp : XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSe
shape.size() == strides.size()
)
);
assert(offsets.size() == dynamicDims);
assert((std::ptrdiff_t)offsets.size() == dynamicDims);

$_state.addOperands(source);
$_state.addOperands(offsets);
Expand Down
3 changes: 2 additions & 1 deletion include/imex/Utils/DebugUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
#include <fstream>
#include <string>

static std::string getValueAsString(mlir::Value op, bool asOperand = false) {
[[maybe_unused]] static std::string getValueAsString(mlir::Value op,
bool asOperand = false) {
std::string buf;
buf.clear();
llvm::raw_string_ostream os(buf);
Expand Down
2 changes: 1 addition & 1 deletion lib/Conversion/XeGPUToSPIRV/XeGPUToSPIRV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1484,7 +1484,7 @@ unsigned getElementPerWI(imex::xegpu::TensorDescType tDescType) {
auto wiLayout = sgMap.getWiLayout();
auto wiData = sgMap.getWiData();
unsigned elemPerWI = 1;
for (size_t i = 0; i < wiData.size(); i++) {
for (int64_t i = 0; i < wiData.size(); i++) {
if (wiData[i] != 1)
llvm_unreachable("wi_data must be 1 for all dimension for "
"JointMatrix lowering");
Expand Down
4 changes: 2 additions & 2 deletions lib/Conversion/XeTileToXeGPU/XeTileOpConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ struct SgPrefetchTileOpPattern
return mlir::failure();
auto shape = tileTy.getShape();

if (shape[0] * shape[1] != tiles.size()) {
if (shape[0] * shape[1] != (int64_t)tiles.size()) {
op.emitOpError("Failed to lower LoadTileOp because shape[0] * shape[1] "
"!= sources.size().");
return mlir::failure();
Expand Down Expand Up @@ -153,7 +153,7 @@ struct SgLoadTileOpPattern
auto shape = resultTy.getShape();
auto sources = adaptor.getSource();

if (shape[0] * shape[1] != sources.size()) {
if (shape[0] * shape[1] != (int64_t)sources.size()) {
op.emitOpError("Failed to lower LoadTileOp because shape[0] * shape[1] "
"!= sources.size().");
return mlir::failure();
Expand Down
14 changes: 8 additions & 6 deletions lib/Dialect/XeGPU/IR/XeGPUOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ static bool verifyAndInferShape(std::vector<int64_t> &shape,
auto sgData = wgMap.getSgData();
auto sgLayout = wgMap.getSgLayout();

if (shape.size() != sgData.size() || shape.size() != sgLayout.size())
if ((int64_t)shape.size() != sgData.size() ||
(int64_t)shape.size() != sgLayout.size())
return false;

for (size_t i = 0; i < shape.size(); i++) {
Expand All @@ -254,11 +255,12 @@ static bool verifyAndInferShape(std::vector<int64_t> &shape,
auto wiLayout = sgMap.getWiLayout();
auto wiData = sgMap.getWiData();

if (blockSize && shape.size() != blockSize.size()) {
if (blockSize && (int64_t)shape.size() != blockSize.size()) {
return false;
}

if (shape.size() != wiData.size() || shape.size() != wiLayout.size()) {
if ((int64_t)shape.size() != wiData.size() ||
(int64_t)shape.size() != wiLayout.size()) {
return false;
}

Expand Down Expand Up @@ -840,7 +842,7 @@ ::mlir::ParseResult StoreNDOp::parse(::mlir::OpAsmParser &parser,

void StoreNDOp::print(::mlir::OpAsmPrinter &printer) {
auto mode = getMode();
bool printSep = false;
[[maybe_unused]] bool printSep = false;
auto printDefaults = printDefaultValues();
auto numAttrs = (*this)->getAttrs().size();

Expand Down Expand Up @@ -997,7 +999,7 @@ ::mlir::ParseResult PrefetchNDOp::parse(::mlir::OpAsmParser &parser,

void PrefetchNDOp::print(::mlir::OpAsmPrinter &printer) {
auto mode = getMode();
bool printSep = false;
[[maybe_unused]] bool printSep = false;
auto printDefaults = printDefaultValues();
auto numAttrs = (*this)->getAttrs().size();
printer << ' ';
Expand Down Expand Up @@ -1537,7 +1539,7 @@ ::mlir::LogicalResult UpdateOffsetOp::verify() {

::mlir::LogicalResult UpdateNDOffsetOp::verify() {
// number of offsets specified must match the rank of the tensor descriptor
if (getTensorDesc().getType().getRank() != getOffsets().size()) {
if (getTensorDesc().getType().getRank() != (int64_t)getOffsets().size()) {
return emitOpError("Invalid number of offsets.");
}
return ::mlir::success();
Expand Down
236 changes: 116 additions & 120 deletions lib/Transforms/BF16ToGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,140 +34,136 @@ struct BF16ToGPUPass : public BF16ToGPUBase<BF16ToGPUPass> {
auto mod = getOperation();
SymbolTable symbolTable(mod);
mlir::OpBuilder builder(mod);
auto &aliases = getAnalysis<mlir::BufferViewFlowAnalysis>();
// Part 1: gpu::GPUFuncOp
WalkResult result1 =
mod.walk<WalkOrder::PreOrder>([&](gpu::GPUFuncOp op) -> WalkResult {
// 1-1: Create new FunctionType and replace old FunctionType
auto oftype = op.getFunctionType();
llvm::SmallVector<mlir::Type, 4> argTypes;
ArrayRef<Type> inputTypes;
ArrayRef<Type> resultTypes;
for (Type t : oftype.getInputs()) {
MemRefType m = t.dyn_cast<MemRefType>();
if (m) {
Type et = m.getElementType();
if (et.isBF16()) {
if (m.hasStaticShape()) {
llvm::ArrayRef<int64_t> s = m.getShape();
auto i = MemRefType::get(s, builder.getI16Type());
argTypes.push_back(i);
} else {
// TODO: Support dynamic shape
op.emitError(
"Non static shape bf16 MemRefType in GPUFuncOp inputs");
}
} else {
argTypes.push_back(t);
}
} else if (t.isBF16()) {
argTypes.push_back(builder.getI16Type());
(void)mod.walk<WalkOrder::PreOrder>([&](gpu::GPUFuncOp op) -> WalkResult {
// 1-1: Create new FunctionType and replace old FunctionType
auto oftype = op.getFunctionType();
llvm::SmallVector<mlir::Type, 4> argTypes;
ArrayRef<Type> inputTypes;
ArrayRef<Type> resultTypes;
for (Type t : oftype.getInputs()) {
MemRefType m = t.dyn_cast<MemRefType>();
if (m) {
Type et = m.getElementType();
if (et.isBF16()) {
if (m.hasStaticShape()) {
llvm::ArrayRef<int64_t> s = m.getShape();
auto i = MemRefType::get(s, builder.getI16Type());
argTypes.push_back(i);
} else {
argTypes.push_back(t);
// TODO: Support dynamic shape
op.emitError(
"Non static shape bf16 MemRefType in GPUFuncOp inputs");
}
} else {
argTypes.push_back(t);
}
auto nftype =
dyn_cast<FunctionType>(op.cloneTypeWith(argTypes, resultTypes));
op.setFunctionType(nftype);
} else if (t.isBF16()) {
argTypes.push_back(builder.getI16Type());
} else {
argTypes.push_back(t);
}
}
auto nftype =
dyn_cast<FunctionType>(op.cloneTypeWith(argTypes, resultTypes));
op.setFunctionType(nftype);

// 1-2: Collect ops that need bf16 widening and widen those ops
// Most ops in arith and math dialect that has bf16 operand will
// be widened to use f32 operand
SmallVector<Operation *, 8> widenOps;
WalkResult result1_1 = op.getRegion().walk<WalkOrder::PreOrder>(
[&](Operation *lop) -> WalkResult {
auto oname = lop->getName().getStringRef();
if (oname.startswith("arith.") || oname.startswith("math.")) {
// Skip bitcast operation as we cannot change width of operand
if (!oname.startswith("arith.bitcast")) {
bool needWidening = false;
for (const auto &oper : lop->getOperands()) {
if (oper.getType().isBF16()) {
needWidening = true;
}
}
if (needWidening) {
widenOps.push_back(lop);
}
// 1-2: Collect ops that need bf16 widening and widen those ops
// Most ops in arith and math dialect that has bf16 operand will
// be widened to use f32 operand
SmallVector<Operation *, 8> widenOps;
(void)op.getRegion().walk<WalkOrder::PreOrder>(
[&](Operation *lop) -> WalkResult {
auto oname = lop->getName().getStringRef();
if (oname.startswith("arith.") || oname.startswith("math.")) {
// Skip bitcast operation as we cannot change width of operand
if (!oname.startswith("arith.bitcast")) {
bool needWidening = false;
for (const auto &oper : lop->getOperands()) {
if (oper.getType().isBF16()) {
needWidening = true;
}
}
return WalkResult::advance();
});
for (Operation *o : widenOps) {
builder.setInsertionPoint(o);
unsigned int idx = 0;
for (const auto &oper : o->getOperands()) {
if (oper.getType().isBF16()) {
auto newOp = builder.create<arith::ExtFOp>(
o->getLoc(), builder.getF32Type(), oper);
o->setOperand(idx, newOp);
}
idx++;
}
for (mlir::OpResult res : o->getResults()) {
if (res.getType().isBF16()) {
res.setType(builder.getF32Type());
builder.setInsertionPointAfter(o);
auto newRes = builder.create<arith::TruncFOp>(
o->getLoc(), builder.getBF16Type(), res);
res.replaceAllUsesExcept(newRes, newRes);
if (needWidening) {
widenOps.push_back(lop);
}
}
}
return WalkResult::advance();
});
for (Operation *o : widenOps) {
builder.setInsertionPoint(o);
unsigned int idx = 0;
for (const auto &oper : o->getOperands()) {
if (oper.getType().isBF16()) {
auto newOp = builder.create<arith::ExtFOp>(
o->getLoc(), builder.getF32Type(), oper);
o->setOperand(idx, newOp);
}
// 1-3: Change element type of entry block arguments
Block &eblock = op.getBlocks().front();
for (mlir::BlockArgument arg : eblock.getArguments()) {
Type argt = arg.getType();
MemRefType mt = dyn_cast<MemRefType>(argt);
if (mt) {
if (mt.getElementType().isBF16()) {
MemRefType newMt = dyn_cast<MemRefType>(
mt.cloneWith(mt.getShape(), builder.getI16Type()));
arg.setType(newMt);
}
} else if (argt.isBF16()) {
arg.setType(builder.getI16Type());
}
idx++;
}
for (mlir::OpResult res : o->getResults()) {
if (res.getType().isBF16()) {
res.setType(builder.getF32Type());
builder.setInsertionPointAfter(o);
auto newRes = builder.create<arith::TruncFOp>(
o->getLoc(), builder.getBF16Type(), res);
res.replaceAllUsesExcept(newRes, newRes);
}
WalkResult result1_2 = op.getRegion().walk<WalkOrder::PreOrder>(
[&](Operation *lop) -> WalkResult {
if (dyn_cast<arith::ExtFOp>(lop)) {
// if extf i16 -> f32 : "i16" is not a typo
if (lop->getOperand(0).getType().isInteger(16)) {
if (lop->getResult(0).getType().isF32()) {
builder.setInsertionPoint(lop);
auto bcast = builder.create<arith::BitcastOp>(
lop->getLoc(), builder.getBF16Type(),
lop->getOperand(0));
lop->setOperand(0, bcast);
}
}
} else if (dyn_cast<arith::TruncFOp>(lop)) {
// if truncf f32 -> bf16
if (lop->getOperand(0).getType().isF32()) {
if (lop->getResult(0).getType().isBF16()) {
builder.setInsertionPointAfter(lop);
auto bcast = builder.create<arith::BitcastOp>(
lop->getLoc(), builder.getI16Type(),
lop->getResult(0));
lop->getResult(0).replaceAllUsesExcept(bcast, bcast);
}
}
} else {
if (lop->getNumResults() > 0) {
if (lop->getResultTypes().front().isBF16()) {
lop->getResult(0).setType(builder.getI16Type());
}
}
}
}
// 1-3: Change element type of entry block arguments
Block &eblock = op.getBlocks().front();
for (mlir::BlockArgument arg : eblock.getArguments()) {
Type argt = arg.getType();
MemRefType mt = dyn_cast<MemRefType>(argt);
if (mt) {
if (mt.getElementType().isBF16()) {
MemRefType newMt = dyn_cast<MemRefType>(
mt.cloneWith(mt.getShape(), builder.getI16Type()));
arg.setType(newMt);
}
} else if (argt.isBF16()) {
arg.setType(builder.getI16Type());
}
}
(void)op.getRegion().walk<WalkOrder::PreOrder>(
[&](Operation *lop) -> WalkResult {
if (dyn_cast<arith::ExtFOp>(lop)) {
// if extf i16 -> f32 : "i16" is not a typo
if (lop->getOperand(0).getType().isInteger(16)) {
if (lop->getResult(0).getType().isF32()) {
builder.setInsertionPoint(lop);
auto bcast = builder.create<arith::BitcastOp>(
lop->getLoc(), builder.getBF16Type(), lop->getOperand(0));
lop->setOperand(0, bcast);
}
}
} else if (dyn_cast<arith::TruncFOp>(lop)) {
// if truncf f32 -> bf16
if (lop->getOperand(0).getType().isF32()) {
if (lop->getResult(0).getType().isBF16()) {
builder.setInsertionPointAfter(lop);
auto bcast = builder.create<arith::BitcastOp>(
lop->getLoc(), builder.getI16Type(), lop->getResult(0));
lop->getResult(0).replaceAllUsesExcept(bcast, bcast);
}
return WalkResult::advance();
});
return WalkResult::advance();
});
}
} else {
if (lop->getNumResults() > 0) {
if (lop->getResultTypes().front().isBF16()) {
lop->getResult(0).setType(builder.getI16Type());
}
}
}
return WalkResult::advance();
});
return WalkResult::advance();
});
// Part 2: gpu::LaunchFuncOp and gpu::AllocOp
SmallVector<Operation *, 8> replacedAllocOps;
WalkResult result2 = mod.walk<WalkOrder::PreOrder>([&](gpu::LaunchFuncOp op)
-> WalkResult {
(void)mod.walk<WalkOrder::PreOrder>([&](gpu::LaunchFuncOp op)
-> WalkResult {
for (const auto &kop : op.getKernelOperands()) {
auto mem = kop;
Type memt = mem.getType();
Expand Down
Loading

0 comments on commit 180dc90

Please sign in to comment.