Skip to content

Commit

Permalink
add "infer" option for backwards compatibility
Browse files Browse the repository at this point in the history
  • Loading branch information
umangyadav committed Sep 30, 2024
1 parent e51517e commit b521f41
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ void configureGpuToROCDLConversionLegality(ConversionTarget &target);
/// is configurable.
std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
createLowerGpuOpsToROCDLOpsPass(
const std::string &chipset = "infer",
unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout,
bool useBarePtrCallConv = false,
gpu::amd::Runtime runtime = gpu::amd::Runtime::Unknown);
Expand Down
3 changes: 3 additions & 0 deletions external/llvm-project/mlir/include/mlir/Conversion/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,9 @@ def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> {
"memref::MemRefDialect",
];
let options = [
Option<"chipset", "chipset", "std::string",
/*default=*/"\"infer\"",
"Chipset that these operations will run on. By default it will infer target from attached Target Attribute on GPU Module">,
Option<"indexBitwidth", "index-bitwidth", "unsigned",
/*default=kDeriveIndexBitwidthFromDataLayout*/"0",
"Bitwidth of the index type, 0 to use size of machine word">,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
#include "mlir/Dialect/Arith/Transforms/Passes.h"
#include "mlir/IR/Diagnostics.h"
#include "mlir/IR/Location.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Transforms/Passes.h"
Expand Down Expand Up @@ -57,7 +55,6 @@ namespace mlir {
} // namespace mlir

#include "mlir/Dialect/LLVMIR/Transforms/Passes.h"

using namespace mlir;

/// Returns true if the given `gpu.func` can be safely called using the bare
Expand Down Expand Up @@ -205,8 +202,11 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
struct LowerGpuOpsToROCDLOpsPass
: public impl::ConvertGpuOpsToROCDLOpsBase<LowerGpuOpsToROCDLOpsPass> {
LowerGpuOpsToROCDLOpsPass() = default;
LowerGpuOpsToROCDLOpsPass(unsigned indexBitwidth, bool useBarePtrCallConv,
LowerGpuOpsToROCDLOpsPass(const std::string &chipset, unsigned indexBitwidth,
bool useBarePtrCallConv,
gpu::amd::Runtime runtime) {
if (this->chipset.getNumOccurrences() == 0)
this->chipset = chipset;
if (this->indexBitwidth.getNumOccurrences() == 0)
this->indexBitwidth = indexBitwidth;
if (this->useBarePtrCallConv.getNumOccurrences() == 0)
Expand All @@ -220,15 +220,17 @@ struct LowerGpuOpsToROCDLOpsPass
MLIRContext *ctx = m.getContext();
ArrayAttr targets = m.getTargetsAttr();
FailureOr<amdgpu::Chipset> maybeChipset;
if (!targets) {
emitError(UnknownLoc::get(ctx), "ROCDLTargetAttr is empty on GPU module");
return signalPassFailure();
}
if (targets.size() != 1) {
emitError(UnknownLoc::get(ctx), "ROCDLTargetAttrs has more specified "
"more than one gpu-arch on GPU module");
return signalPassFailure();
} else {
if (chipset == "infer") {
if (!targets) {
emitError(UnknownLoc::get(ctx),
"ROCDLTargetAttr is empty on GPU module");
return signalPassFailure();
}
if (targets.size() != 1) {
emitError(UnknownLoc::get(ctx), "ROCDLTargetAttrs has more specified "
"more than one gpu-arch on GPU module");
return signalPassFailure();
}
const ROCDL::ROCDLTargetAttr targetAttr =
mlir::dyn_cast<ROCDL::ROCDLTargetAttr>(targets.getValue().front());
maybeChipset = amdgpu::Chipset::parse(targetAttr.getChip());
Expand All @@ -237,6 +239,12 @@ struct LowerGpuOpsToROCDLOpsPass
"Invalid chipset name: " + targetAttr.getChip());
return signalPassFailure();
}
} else {
maybeChipset = amdgpu::Chipset::parse(chipset);
if (failed(maybeChipset)) {
emitError(UnknownLoc::get(ctx), "Invalid chipset name: " + chipset);
return signalPassFailure();
}
}

auto llvmDataLayout = m->getAttrOfType<StringAttr>(
Expand Down Expand Up @@ -412,9 +420,10 @@ void mlir::populateGpuToROCDLConversionPatterns(
}

std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
mlir::createLowerGpuOpsToROCDLOpsPass(unsigned indexBitwidth,
mlir::createLowerGpuOpsToROCDLOpsPass(const std::string &chipset,
unsigned indexBitwidth,
bool useBarePtrCallConv,
gpu::amd::Runtime runtime) {
return std::make_unique<LowerGpuOpsToROCDLOpsPass>(
indexBitwidth, useBarePtrCallConv, runtime);
chipset, indexBitwidth, useBarePtrCallConv, runtime);
}
2 changes: 1 addition & 1 deletion mlir/lib/Dialect/Rock/Pipelines/Pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ void rock::buildBackendPipeline(OpPassManager &pm,
pm.addPass(createGpuROCDLAttachTarget(opts));
auto &gpuPm2 = pm.nest<gpu::GPUModuleOp>();
gpuPm2.addPass(createLowerGpuOpsToROCDLOpsPass(
/*indexBitwidth=*/kDeriveIndexBitwidthFromDataLayout,
/*chipset=*/"infer", /*indexBitwidth=*/kDeriveIndexBitwidthFromDataLayout,
/*useBarePtrCallConv=*/true, gpu::amd::Runtime::HIP));
// Ensure we only run passes on LLVM functions inside GPU modules.
auto &llvmFuncPm = gpuPm2.nest<LLVM::LLVMFuncOp>();
Expand Down
4 changes: 2 additions & 2 deletions mlir/test/rocmlir-driver/pipelines.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
// BINARY-NEXT:expand-strided-metadata,
// BINARY-NEXT:lower-affine),
// BINARY-NEXT:rocdl-attach-target{O=3 abi=500 chip=gfx90a correct-sqrt=true daz=false fast=false features= finite-only=false module= triple=amdgcn-amd-amdhsa unsafe-math=false wave64=true},
// BINARY-NEXT:gpu.module(convert-gpu-to-rocdl{index-bitwidth=0 runtime=HIP use-bare-ptr-memref-call-conv=true},
// BINARY-NEXT:gpu.module(convert-gpu-to-rocdl{chipset=infer index-bitwidth=0 runtime=HIP use-bare-ptr-memref-call-conv=true},
// BINARY-NEXT:llvm.func(canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},
// BINARY-NEXT:cse,
// BINARY-NEXT:rock-prepare-llvm)),
Expand All @@ -71,7 +71,7 @@
// BINARY_MI300-NEXT:expand-strided-metadata,
// BINARY_MI300-NEXT:lower-affine),
// BINARY_MI300-NEXT:rocdl-attach-target{O=3 abi=500 chip=gfx940 correct-sqrt=true daz=false fast=false features= finite-only=false module= triple=amdgcn-amd-amdhsa unsafe-math=false wave64=true},
// BINARY_MI300-NEXT:gpu.module(convert-gpu-to-rocdl{index-bitwidth=0 runtime=HIP use-bare-ptr-memref-call-conv=true},
// BINARY_MI300-NEXT:gpu.module(convert-gpu-to-rocdl{chipset=infer index-bitwidth=0 runtime=HIP use-bare-ptr-memref-call-conv=true},
// BINARY_MI300-NEXT:llvm.func(canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},
// BINARY_MI300-NEXT:cse,
// BINARY_MI300-NEXT:rock-prepare-llvm)),
Expand Down

0 comments on commit b521f41

Please sign in to comment.