From ad250879e57824deeab8b45ff083ad1cf9a3e5bd Mon Sep 17 00:00:00 2001 From: "Prajapati, Dimple" Date: Wed, 13 Dec 2023 14:05:43 -0800 Subject: [PATCH] Add gpu.printf lowering in GPUToSPIRV pass along with SPIRV patch to fix spirv dialect and serialization for spirv.CL.printf Changes include: - gpu.printf op conversion lowering in GPUToSPIRV pass that lowers to spirv.CL.printf op - Adds lit test as well as e2e test case to verify the lowering through imex-convert-gpu-to-spirv pass - Adds a PATCH that fixes upstream MLIR spirv dialect to support SpecConstantComposite as an initializer for spirv.GlobalVariable op. patch "0001-SPIRV-add-SpecConstantComposite-Op-support-in-Global.patch" is added until spirv dialect fix is upstreamed. --- ...nstantComposite-Op-support-in-Global.patch | 55 ++++++++ .../Conversion/GPUToSPIRV/GPUToSPIRVPass.h | 9 ++ lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp | 122 ++++++++++++++++++ test/Conversion/GPUToSPIRV/gpu-to-llvm.pp | 17 +++ test/Conversion/GPUToSPIRV/printf.mlir | 55 ++++++++ .../GPUToSPIRV/printf_with_runner.mlir | 39 ++++++ test/SPIRV/OpTest.spirv.CL.printf.mlir | 68 ++++++++++ 7 files changed, 365 insertions(+) create mode 100644 build_tools/patches/0001-SPIRV-add-SpecConstantComposite-Op-support-in-Global.patch create mode 100644 test/Conversion/GPUToSPIRV/gpu-to-llvm.pp create mode 100644 test/Conversion/GPUToSPIRV/printf.mlir create mode 100644 test/Conversion/GPUToSPIRV/printf_with_runner.mlir create mode 100644 test/SPIRV/OpTest.spirv.CL.printf.mlir diff --git a/build_tools/patches/0001-SPIRV-add-SpecConstantComposite-Op-support-in-Global.patch b/build_tools/patches/0001-SPIRV-add-SpecConstantComposite-Op-support-in-Global.patch new file mode 100644 index 000000000..a1a19c5f2 --- /dev/null +++ b/build_tools/patches/0001-SPIRV-add-SpecConstantComposite-Op-support-in-Global.patch @@ -0,0 +1,55 @@ +From fb3fb52995e3441aa8f8abc14006eda6a599c94b Mon Sep 17 00:00:00 2001 +From: "Prajapati, Dimple" +Date: Fri, 1 Dec 2023 12:57:17 -0800 +Subject: [PATCH] SPIRV: add SpecConstantComposite Op support in GlobalVarOp + +--- + mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp | 5 +++-- + mlir/lib/Target/SPIRV/Serialization/SerializeOps.cpp | 11 ++++++++--- + 2 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp +index 3906bf74ea72..b39f1607ad7f 100644 +--- a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp ++++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp +@@ -1163,9 +1163,10 @@ LogicalResult spirv::GlobalVariableOp::verify() { + // constants and other variables is supported. They could be normal + // constants in the module scope as well. + if (!initOp || +- !isa(initOp)) { ++ !isa(initOp)) { + return emitOpError("initializer must be result of a " +- "spirv.SpecConstant or spirv.GlobalVariable op"); ++ "spirv.SpecConstant or spirv.GlobalVariable op or " ++ "spirv.SpecConstantCompositeOp"); + } + } + +diff --git a/mlir/lib/Target/SPIRV/Serialization/SerializeOps.cpp b/mlir/lib/Target/SPIRV/Serialization/SerializeOps.cpp +index 44538c38a41b..cc968c0627e9 100644 +--- a/mlir/lib/Target/SPIRV/Serialization/SerializeOps.cpp ++++ b/mlir/lib/Target/SPIRV/Serialization/SerializeOps.cpp +@@ -382,13 +382,18 @@ Serializer::processGlobalVariableOp(spirv::GlobalVariableOp varOp) { + // Encode StorageClass. + operands.push_back(static_cast(varOp.storageClass())); + ++ // Encode initialization. ++ + // Encode initialization. + if (auto initializer = varOp.getInitializer()) { + auto initializerID = getVariableID(*initializer); ++ + if (!initializerID) { +- return emitError(varOp.getLoc(), +- "invalid usage of undefined variable as initializer"); +- } ++ initializerID = getSpecConstID(*initializer); ++ if (!initializerID) ++ return emitError(varOp.getLoc(), ++ "invalid usage of undefined variable as initializer"); ++ } + operands.push_back(initializerID); + elidedAttrs.push_back("initializer"); + } +-- +2.34.1 diff --git a/include/imex/Conversion/GPUToSPIRV/GPUToSPIRVPass.h b/include/imex/Conversion/GPUToSPIRV/GPUToSPIRVPass.h index e88df8ca1..ac24ea3c9 100644 --- a/include/imex/Conversion/GPUToSPIRV/GPUToSPIRVPass.h +++ b/include/imex/Conversion/GPUToSPIRV/GPUToSPIRVPass.h @@ -14,10 +14,15 @@ #ifndef IMEX_GPUTOSPIRV_PASS_H_ #define IMEX_GPUTOSPIRV_PASS_H_ +#include +#include +#include #include namespace mlir { +class SPIRVTypeConverter; +class RewritePatternSet; class Pass; struct ScfToSPIRVContextImpl; class ModuleOp; @@ -26,6 +31,10 @@ template class OperationPass; } // namespace mlir namespace imex { + +void populateGPUPrintfToSPIRVPatterns(mlir::SPIRVTypeConverter &typeConverter, + mlir::RewritePatternSet &patterns); + /// Create a pass std::unique_ptr<::mlir::OperationPass<::mlir::ModuleOp>> createConvertGPUXToSPIRVPass(bool mapMemorySpace = true); diff --git a/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp b/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp index 17742bba5..e0b933625 100644 --- a/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp +++ b/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp @@ -29,6 +29,15 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" + +#include "mlir/Conversion/GPUCommon/GPUCommonPass.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinTypes.h" +#include "llvm/ADT/SmallVectorExtras.h" +#include "llvm/Support/FormatVariadic.h" + #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Matchers.h" @@ -59,6 +68,117 @@ class GPUXToSPIRVPass : public ::imex::ConvertGPUXToSPIRVBase { bool mapMemorySpace; }; +class PrintfOpPattern : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + mlir::LogicalResult + matchAndRewrite(mlir::gpu::PrintfOp gpuPrintfOp, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + auto loc = gpuPrintfOp.getLoc(); + + auto funcOp = rewriter.getBlock() + ->getParent() + ->getParentOfType(); + + auto moduleOp = funcOp->getParentOfType(); + + const char formatStringPrefix[] = "printfMsg"; + unsigned stringNumber = 0; + mlir::SmallString<16> globalVarName; + mlir::spirv::GlobalVariableOp globalVar; + + // formulate spirv global variable name + do { + globalVarName.clear(); + (formatStringPrefix + llvm::Twine(stringNumber++)) + .toStringRef(globalVarName); + } while (moduleOp.lookupSymbol(globalVarName)); + + auto i8Type = rewriter.getI8Type(); + auto i32Type = rewriter.getI32Type(); + + unsigned scNum = 0; + auto createSpecConstant = [&](unsigned value) { + auto attr = rewriter.getI8IntegerAttr(value); + mlir::SmallString<16> specCstName; + (llvm::Twine(globalVarName) + "_sc" + llvm::Twine(scNum++)) + .toStringRef(specCstName); + + return rewriter.create( + loc, rewriter.getStringAttr(specCstName), attr); + }; + + // define GlobalVarOp with printf format string using SpecConstants + // and make composite of SpecConstants + { + mlir::Operation *parent = + mlir::SymbolTable::getNearestSymbolTable(gpuPrintfOp->getParentOp()); + + mlir::ConversionPatternRewriter::InsertionGuard guard(rewriter); + + mlir::Block &entryBlock = *parent->getRegion(0).begin(); + rewriter.setInsertionPointToStart( + &entryBlock); // insertion point at module level + + // Create Constituents with SpecConstant to construct + // SpecConstantCompositeOp + llvm::SmallString<20> formatString(gpuPrintfOp.getFormat()); + formatString.push_back('\0'); // Null terminate for C + mlir::SmallVector constituents; + for (auto c : formatString) { + auto cSpecConstantOp = createSpecConstant(c); + constituents.push_back(mlir::SymbolRefAttr::get(cSpecConstantOp)); + } + + // Create specialization constant composite defined via spirv.SpecConstant + size_t contentSize = constituents.size(); + auto globalType = mlir::spirv::ArrayType::get(i8Type, contentSize); + mlir::spirv::SpecConstantCompositeOp specCstComposite; + mlir::SmallString<16> specCstCompositeName; + (llvm::Twine(globalVarName) + "_scc").toStringRef(specCstCompositeName); + specCstComposite = rewriter.create( + loc, mlir::TypeAttr::get(globalType), + rewriter.getStringAttr(specCstCompositeName), + rewriter.getArrayAttr(constituents)); + + // Define GlobalVariable initialized from Constant Composite + globalVar = rewriter.create( + loc, + mlir::spirv::PointerType::get( + globalType, mlir::spirv::StorageClass::UniformConstant), + globalVarName, mlir::FlatSymbolRefAttr::get(specCstComposite)); + } + + // Get SSA value of Global variable + mlir::Value globalPtr = + rewriter.create(loc, globalVar); + + mlir::Value fmtStr = rewriter.create( + loc, + mlir::spirv::PointerType::get( + i8Type, mlir::spirv::StorageClass::UniformConstant), + globalPtr); + + // Get printf arguments + auto argsRange = adaptor.getArgs(); + mlir::SmallVector printfArgs; + printfArgs.reserve(argsRange.size() + 1); + printfArgs.append(argsRange.begin(), argsRange.end()); + + rewriter.create(loc, i32Type, fmtStr, printfArgs); + + rewriter.eraseOp(gpuPrintfOp); + + return mlir::success(); + } +}; + +void populateGPUPrintfToSPIRVPatterns(mlir::SPIRVTypeConverter &typeConverter, + mlir::RewritePatternSet &patterns) { + + patterns.add(typeConverter, patterns.getContext()); +} + void GPUXToSPIRVPass::runOnOperation() { mlir::MLIRContext *context = &getContext(); mlir::ModuleOp module = getOperation(); @@ -242,6 +362,8 @@ void GPUXToSPIRVPass::runOnOperation() { mlir::populateSCFToSPIRVPatterns(typeConverter, scfToSpirvCtx, patterns); mlir::cf::populateControlFlowToSPIRVPatterns(typeConverter, patterns); mlir::populateMathToSPIRVPatterns(typeConverter, patterns); + imex::populateGPUPrintfToSPIRVPatterns(typeConverter, patterns); + if (this->enableVCIntrinsic) imex::populateXeGPUToVCIntrinsicsPatterns(typeConverter, patterns); else if (this->enableJointMatrix) diff --git a/test/Conversion/GPUToSPIRV/gpu-to-llvm.pp b/test/Conversion/GPUToSPIRV/gpu-to-llvm.pp new file mode 100644 index 000000000..3fbe5fc29 --- /dev/null +++ b/test/Conversion/GPUToSPIRV/gpu-to-llvm.pp @@ -0,0 +1,17 @@ +builtin.module( + imex-convert-gpu-to-spirv{enable-vc-intrinsic=true} + spirv.module(spirv-lower-abi-attrs + spirv-update-vce) + func.func(llvm-request-c-wrappers) + serialize-spirv + convert-gpu-to-gpux + convert-scf-to-cf + convert-cf-to-llvm + convert-arith-to-llvm + convert-func-to-llvm + convert-math-to-llvm + convert-gpux-to-llvm + expand-strided-metadata + lower-affine + finalize-memref-to-llvm + reconcile-unrealized-casts) diff --git a/test/Conversion/GPUToSPIRV/printf.mlir b/test/Conversion/GPUToSPIRV/printf.mlir new file mode 100644 index 000000000..9103dd18a --- /dev/null +++ b/test/Conversion/GPUToSPIRV/printf.mlir @@ -0,0 +1,55 @@ +// RUN: imex-opt -allow-unregistered-dialect -split-input-file -imex-convert-gpu-to-spirv='enable-vc-intrinsic=true' -verify-diagnostics %s -o - | FileCheck %s + +module @test attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + func.func @print_test() { + %c1 = arith.constant 1 : index + %c100 = arith.constant 100: i32 + %cst_f32 = arith.constant 314.4: f32 + + gpu.launch_func @kernel_module1::@test_printf_arg + blocks in (%c1, %c1, %c1) threads in (%c1, %c1, %c1) + args(%c100: i32, %cst_f32: f32) + return + } + + // CHECK-LABEL: spirv.module @{{.*}} Physical64 OpenCL + // CHECK-DAG: spirv.SpecConstant [[SPECCST:@.*]] = {{.*}} : i8 + // CHECK-DAG: spirv.SpecConstantComposite [[SPECCSTCOMPOSITE:@.*]] ([[SPECCST]], {{.*}}) : !spirv.array<[[ARRAYSIZE:.*]] x i8> + // CHECK-DAG: spirv.GlobalVariable [[PRINTMSG:@.*]] initializer([[SPECCSTCOMPOSITE]]) : !spirv.ptr, UniformConstant> + // spirv.SpecConstantComposite + gpu.module @kernel_module0 { + gpu.func @test_printf(%arg0: i32, %arg1: f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi<>} { + %0 = gpu.block_id x + %1 = gpu.block_id y + %2 = gpu.thread_id x + // CHECK: [[FMTSTR_ADDR:%.*]] = spirv.mlir.addressof [[PRINTMSG]] : !spirv.ptr, UniformConstant> + // CHECK-NEXT: [[FMTSTR_PTR:%.*]] = spirv.Bitcast [[FMTSTR_ADDR]] : !spirv.ptr, UniformConstant> to !spirv.ptr + // CHECK-NEXT {{%.*}} = spirv.CL.printf [[FMTSTR_PTR]], {{.*}} : (!spirv.ptr, ({{.*}})) -> i32 + gpu.printf "\nHello\n" + gpu.return + } + } + + // CHECK-LABEL: spirv.module @{{.*}} Physical64 OpenCL + // CHECK-DAG: spirv.SpecConstant [[SPECCST:@.*]] = {{.*}} : i8 + // CHECK-DAG: spirv.SpecConstantComposite [[SPECCSTCOMPOSITE:@.*]] ([[SPECCST]], {{.*}}) : !spirv.array<[[ARRAYSIZE:.*]] x i8> + // CHECK-DAG: spirv.GlobalVariable [[PRINTMSG:@.*]] initializer([[SPECCSTCOMPOSITE]]) : !spirv.ptr, UniformConstant> + // spirv.SpecConstantComposite + gpu.module @kernel_module1 { + gpu.func @test_printf_arg(%arg0: i32, %arg1: f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi<>} { + %0 = gpu.block_id x + %1 = gpu.block_id y + %2 = gpu.thread_id x + // CHECK: [[FMTSTR_ADDR:%.*]] = spirv.mlir.addressof [[PRINTMSG]] : !spirv.ptr, UniformConstant> + // CHECK-NEXT: [[FMTSTR_PTR:%.*]] = spirv.Bitcast [[FMTSTR_ADDR]] : !spirv.ptr, UniformConstant> to !spirv.ptr + // CHECK-NEXT: {{%.*}} = spirv.CL.printf [[FMTSTR_PTR]], {{%.*}}, {{%.*}}, {{%.*}} : (!spirv.ptr, (i32, f32, i64)) -> i32 + gpu.printf "\nHello, world : %d %f \n Thread id: %d\n" %arg0, %arg1, %2: i32, f32, index + gpu.return + } + } +} diff --git a/test/Conversion/GPUToSPIRV/printf_with_runner.mlir b/test/Conversion/GPUToSPIRV/printf_with_runner.mlir new file mode 100644 index 000000000..8c5993fa4 --- /dev/null +++ b/test/Conversion/GPUToSPIRV/printf_with_runner.mlir @@ -0,0 +1,39 @@ +// RUN: %python_executable %imex_runner --requires=sycl-runtime -i %s --pass-pipeline-file=%p/gpu-to-llvm.pp \ +// RUN: --runner imex-cpu-runner -e main \ +// RUN: --entry-point-result=void \ +// RUN: --shared-libs=%irunner_utils,%mlir_runner_utils,%mlir_c_runner_utils,%sycl_runtime --filecheck +module attributes { + gpu.container_module +}{ + + func.func @main() { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %c100 = arith.constant 100: i32 + %cst_f32 = arith.constant 314.4: f32 + + gpu.launch_func @kernel_module::@print_kernel + blocks in (%c1, %c1, %c1) threads in (%c1, %c1, %c1) + args(%c100: i32, %cst_f32: f32) + // CHECK: Hello + // CHECK: Hello, world : 100 314.399994 + // CHECK: Thread id: 0 + return + } + + gpu.module @kernel_module + attributes { + spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> +} { + gpu.func @print_kernel(%arg0: i32, %arg1: f32) kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi<>} { + %0 = gpu.block_id x + %1 = gpu.block_id y + %2 = gpu.thread_id x + gpu.printf "\nHello\n" + gpu.printf "\nHello, world : %d %f\n" %arg0, %arg1: i32, f32 + gpu.printf "\nThread id: %d\n" %2: index + gpu.return + } + } +} diff --git a/test/SPIRV/OpTest.spirv.CL.printf.mlir b/test/SPIRV/OpTest.spirv.CL.printf.mlir new file mode 100644 index 000000000..5861e9056 --- /dev/null +++ b/test/SPIRV/OpTest.spirv.CL.printf.mlir @@ -0,0 +1,68 @@ +// RUN: %python_executable %imex_runner --requires=sycl-runtime -i %s --pass-pipeline-file=%p/spirv-to-llvm.pp \ +// RUN: --runner imex-cpu-runner -e main \ +// RUN: --entry-point-result=void \ +// RUN: --shared-libs=%irunner_utils,%mlir_runner_utils,%mlir_c_runner_utils,%sycl_runtime --filecheck + + +module @print_simple attributes {gpu.container_module} { + + func.func @test() -> () attributes {llvm.emit_c_interface} { + %c1 = arith.constant 1 : index + %c100_i32 = arith.constant 100 : i32 + %cst_f32 = arith.constant 3.144000e+02 : f32 + + gpu.launch_func @test_kernel::@test_kernel blocks in (%c1, %c1, %c1) threads in (%c1, %c1, %c1) args(%c100_i32 : i32, %cst_f32 : f32) + return + } + spirv.module @__spv__test_kernel Physical64 OpenCL requires #spirv.vce attributes {spirv.target_env = #spirv.target_env<#spirv.vce, api=OpenCL, #spirv.resource_limits<>>} { + + spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr, Input> + spirv.SpecConstant @printfMsg0_sc0 = 72 : i8 + spirv.SpecConstant @printfMsg0_sc1 = 101 : i8 + spirv.SpecConstant @printfMsg0_sc2 = 108 : i8 + spirv.SpecConstant @printfMsg0_sc3 = 111 : i8 + spirv.SpecConstant @printfMsg0_sc4 = 58 : i8 + spirv.SpecConstant @printfMsg0_sc5 = 32 : i8 + spirv.SpecConstant @printfMsg0_sc6 = 37 : i8 + spirv.SpecConstant @printfMsg0_sc7 = 100 : i8 + spirv.SpecConstant @printfMsg0_sc8 = 102 : i8 + spirv.SpecConstant @printfMsg0_sc9 = 10 : i8 + spirv.SpecConstant @printfMsg0_sc10 = 0 : i8 + + // Print Fmt String - "Hello\n" + spirv.SpecConstantComposite @printfMsg0_scc (@printfMsg0_sc0, @printfMsg0_sc1, @printfMsg0_sc2, @printfMsg0_sc2, @printfMsg0_sc3, @printfMsg0_sc9, @printfMsg0_sc10) : !spirv.array<7 x i8> + spirv.GlobalVariable @printfMsg0 initializer(@printfMsg0_scc) : !spirv.ptr, UniformConstant> + + // Print Fmt String - "Hello: %d %f\n" + spirv.SpecConstantComposite @printfMsg1_scc (@printfMsg0_sc0, @printfMsg0_sc1, @printfMsg0_sc2, @printfMsg0_sc2, @printfMsg0_sc3, @printfMsg0_sc4, @printfMsg0_sc5, @printfMsg0_sc6, @printfMsg0_sc7, @printfMsg0_sc5, @printfMsg0_sc6, @printfMsg0_sc8, @printfMsg0_sc9, @printfMsg0_sc10) : !spirv.array<14 x i8> + spirv.GlobalVariable @printfMsg1 initializer(@printfMsg1_scc) : !spirv.ptr, UniformConstant> + + spirv.func @test_kernel(%arg0: i32, %arg1: f32) "None" attributes {gpu.known_block_size = array, gpu.known_grid_size = array, workgroup_attributions = 0 : i64} { + + %printfMsg0_addr = spirv.mlir.addressof @printfMsg0 : !spirv.ptr, UniformConstant> + %2 = spirv.Bitcast %printfMsg0_addr : !spirv.ptr, UniformConstant> to !spirv.ptr + %3 = spirv.CL.printf %2, %arg0 : (!spirv.ptr, (i32)) -> i32 + + %printfMsg1_addr = spirv.mlir.addressof @printfMsg1 : !spirv.ptr, UniformConstant> + %0 = spirv.Bitcast %printfMsg1_addr : !spirv.ptr, UniformConstant> to !spirv.ptr + %1 = spirv.CL.printf %0, %arg0, %arg1 : (!spirv.ptr, (i32, f32)) -> i32 + + spirv.Return + } + spirv.EntryPoint "Kernel" @test_kernel, @__builtin_var_WorkgroupId__, @printfMsg0 + } + + gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce, api=OpenCL, #spirv.resource_limits<>>} { + gpu.func @test_kernel(%arg0: i32, %arg1: f32) kernel attributes {gpu.known_block_size = array, gpu.known_grid_size = array, spirv.entry_point_abi = #spirv.entry_point_abi<>} { + + gpu.return + } + } + func.func @main() attributes {llvm.emit_c_interface} { + func.call @test() : ()-> () + // CHECK: Hello + // CHECK: Hello: 100 314.399994 + + return + } +}