add "infer" option for backwards compatibility

ROCm · Sep 30, 2024 · b521f41 · b521f41
1 parent e51517e
commit b521f41
Show file tree

Hide file tree

Showing 5 changed files with 31 additions and 18 deletions.
diff --git a/external/llvm-project/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h b/external/llvm-project/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h
@@ -42,6 +42,7 @@ void configureGpuToROCDLConversionLegality(ConversionTarget &target);
 /// is configurable.
 std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
 createLowerGpuOpsToROCDLOpsPass(
+    const std::string &chipset = "infer",
     unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout,
     bool useBarePtrCallConv = false,
     gpu::amd::Runtime runtime = gpu::amd::Runtime::Unknown);

diff --git a/external/llvm-project/mlir/include/mlir/Conversion/Passes.td b/external/llvm-project/mlir/include/mlir/Conversion/Passes.td
@@ -591,6 +591,9 @@ def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> {
     "memref::MemRefDialect",
   ];
   let options = [
+    Option<"chipset", "chipset", "std::string",
+           /*default=*/"\"infer\"",
+           "Chipset that these operations will run on. By default it will infer target from attached Target Attribute on GPU Module">,
     Option<"indexBitwidth", "index-bitwidth", "unsigned",
            /*default=kDeriveIndexBitwidthFromDataLayout*/"0",
            "Bitwidth of the index type, 0 to use size of machine word">,

diff --git a/external/llvm-project/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/external/llvm-project/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -14,8 +14,6 @@
 #include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
 #include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
 #include "mlir/Dialect/Arith/Transforms/Passes.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/Location.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/Passes.h"
@@ -57,7 +55,6 @@ namespace mlir {
 } // namespace mlir
 
 #include "mlir/Dialect/LLVMIR/Transforms/Passes.h"
-
 using namespace mlir;
 
 /// Returns true if the given `gpu.func` can be safely called using the bare
@@ -205,8 +202,11 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
 struct LowerGpuOpsToROCDLOpsPass
     : public impl::ConvertGpuOpsToROCDLOpsBase<LowerGpuOpsToROCDLOpsPass> {
   LowerGpuOpsToROCDLOpsPass() = default;
-  LowerGpuOpsToROCDLOpsPass(unsigned indexBitwidth, bool useBarePtrCallConv,
+  LowerGpuOpsToROCDLOpsPass(const std::string &chipset, unsigned indexBitwidth,
+                            bool useBarePtrCallConv,
                             gpu::amd::Runtime runtime) {
+    if (this->chipset.getNumOccurrences() == 0)
+      this->chipset = chipset;
     if (this->indexBitwidth.getNumOccurrences() == 0)
       this->indexBitwidth = indexBitwidth;
     if (this->useBarePtrCallConv.getNumOccurrences() == 0)
@@ -220,15 +220,17 @@ struct LowerGpuOpsToROCDLOpsPass
     MLIRContext *ctx = m.getContext();
     ArrayAttr targets = m.getTargetsAttr();
     FailureOr<amdgpu::Chipset> maybeChipset;
-    if (!targets) {
-      emitError(UnknownLoc::get(ctx), "ROCDLTargetAttr is empty on GPU module");
-      return signalPassFailure();
-    }
-    if (targets.size() != 1) {
-      emitError(UnknownLoc::get(ctx), "ROCDLTargetAttrs has more specified "
-                                      "more than one gpu-arch on GPU module");
-      return signalPassFailure();
-    } else {
+    if (chipset == "infer") {
+      if (!targets) {
+        emitError(UnknownLoc::get(ctx),
+                  "ROCDLTargetAttr is empty on GPU module");
+        return signalPassFailure();
+      }
+      if (targets.size() != 1) {
+        emitError(UnknownLoc::get(ctx), "ROCDLTargetAttrs has more specified "
+                                        "more than one gpu-arch on GPU module");
+        return signalPassFailure();
+      }
       const ROCDL::ROCDLTargetAttr targetAttr =
           mlir::dyn_cast<ROCDL::ROCDLTargetAttr>(targets.getValue().front());
       maybeChipset = amdgpu::Chipset::parse(targetAttr.getChip());
@@ -237,6 +239,12 @@ struct LowerGpuOpsToROCDLOpsPass
                   "Invalid chipset name: " + targetAttr.getChip());
         return signalPassFailure();
       }
+    } else {
+      maybeChipset = amdgpu::Chipset::parse(chipset);
+      if (failed(maybeChipset)) {
+        emitError(UnknownLoc::get(ctx), "Invalid chipset name: " + chipset);
+        return signalPassFailure();
+      }
     }
 
     auto llvmDataLayout = m->getAttrOfType<StringAttr>(
@@ -412,9 +420,10 @@ void mlir::populateGpuToROCDLConversionPatterns(
 }
 
 std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
-mlir::createLowerGpuOpsToROCDLOpsPass(unsigned indexBitwidth,
+mlir::createLowerGpuOpsToROCDLOpsPass(const std::string &chipset,
+                                      unsigned indexBitwidth,
                                       bool useBarePtrCallConv,
                                       gpu::amd::Runtime runtime) {
   return std::make_unique<LowerGpuOpsToROCDLOpsPass>(
-      indexBitwidth, useBarePtrCallConv, runtime);
+      chipset, indexBitwidth, useBarePtrCallConv, runtime);
 }
diff --git a/mlir/lib/Dialect/Rock/Pipelines/Pipelines.cpp b/mlir/lib/Dialect/Rock/Pipelines/Pipelines.cpp
@@ -244,7 +244,7 @@ void rock::buildBackendPipeline(OpPassManager &pm,
   pm.addPass(createGpuROCDLAttachTarget(opts));
   auto &gpuPm2 = pm.nest<gpu::GPUModuleOp>();
   gpuPm2.addPass(createLowerGpuOpsToROCDLOpsPass(
-      /*indexBitwidth=*/kDeriveIndexBitwidthFromDataLayout,
+      /*chipset=*/"infer", /*indexBitwidth=*/kDeriveIndexBitwidthFromDataLayout,
       /*useBarePtrCallConv=*/true, gpu::amd::Runtime::HIP));
   // Ensure we only run passes on LLVM functions inside GPU modules.
   auto &llvmFuncPm = gpuPm2.nest<LLVM::LLVMFuncOp>();

diff --git a/mlir/test/rocmlir-driver/pipelines.mlir b/mlir/test/rocmlir-driver/pipelines.mlir
@@ -52,7 +52,7 @@
 // BINARY-NEXT:expand-strided-metadata,
 // BINARY-NEXT:lower-affine),
 // BINARY-NEXT:rocdl-attach-target{O=3 abi=500 chip=gfx90a correct-sqrt=true daz=false fast=false features= finite-only=false  module= triple=amdgcn-amd-amdhsa unsafe-math=false wave64=true},
-// BINARY-NEXT:gpu.module(convert-gpu-to-rocdl{index-bitwidth=0 runtime=HIP use-bare-ptr-memref-call-conv=true},
+// BINARY-NEXT:gpu.module(convert-gpu-to-rocdl{chipset=infer index-bitwidth=0 runtime=HIP use-bare-ptr-memref-call-conv=true},
 // BINARY-NEXT:llvm.func(canonicalize{  max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},
 // BINARY-NEXT:cse,
 // BINARY-NEXT:rock-prepare-llvm)),
@@ -71,7 +71,7 @@
 // BINARY_MI300-NEXT:expand-strided-metadata,
 // BINARY_MI300-NEXT:lower-affine),
 // BINARY_MI300-NEXT:rocdl-attach-target{O=3 abi=500 chip=gfx940 correct-sqrt=true daz=false fast=false features= finite-only=false  module= triple=amdgcn-amd-amdhsa unsafe-math=false wave64=true},
-// BINARY_MI300-NEXT:gpu.module(convert-gpu-to-rocdl{index-bitwidth=0 runtime=HIP use-bare-ptr-memref-call-conv=true},
+// BINARY_MI300-NEXT:gpu.module(convert-gpu-to-rocdl{chipset=infer index-bitwidth=0 runtime=HIP use-bare-ptr-memref-call-conv=true},
 // BINARY_MI300-NEXT:llvm.func(canonicalize{  max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},
 // BINARY_MI300-NEXT:cse,
 // BINARY_MI300-NEXT:rock-prepare-llvm)),