diff --git a/clang/include/clang/CIR/Dialect/IR/CIROpenCLAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIROpenCLAttrs.td index 576d619fcf7a..a6932c8ca178 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROpenCLAttrs.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROpenCLAttrs.td @@ -185,4 +185,25 @@ def OpenCLKernelAttr : CIRUnitAttr< let storageType = [{ OpenCLKernelAttr }]; } +//===----------------------------------------------------------------------===// +// OpenCLKernelUniformWorkGroupSizeAttr +//===----------------------------------------------------------------------===// + +def OpenCLKernelUniformWorkGroupSizeAttr : CIRUnitAttr< + "OpenCLKernelUniformWorkGroupSize", "cl.uniform_work_group_size"> { + let summary = "OpenCL kernel work-group uniformity"; + let description = [{ + In OpenCL v2.0, work groups can either be uniform or non-uniform. + This attribute is associated with kernels to represent the work group type. + Non-kernel entities should not interact with this attribute. + + Clang's `-cl-uniform-work-group-size` compilation option provides a hint to + the compiler, indicating that the global work size should be a multiple of + the work-group size specified in the `clEnqueueNDRangeKernel` function, + thereby ensuring that the work groups are uniform. + }]; + + let storageType = [{ OpenCLKernelUniformWorkGroupSizeAttr }]; +} + #endif // MLIR_CIR_DIALECT_CIR_OPENCL_ATTRS diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp index 2a1b1a69da3d..c5b7e2a2edae 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp @@ -433,7 +433,22 @@ void CIRGenModule::constructAttributeList(StringRef Name, auto cirKernelAttr = mlir::cir::OpenCLKernelAttr::get(builder.getContext()); funcAttrs.set(cirKernelAttr.getMnemonic(), cirKernelAttr); - assert(!MissingFeatures::openCL()); + + auto uniformAttr = mlir::cir::OpenCLKernelUniformWorkGroupSizeAttr::get( + builder.getContext()); + if (getLangOpts().OpenCLVersion <= 120) { + // OpenCL v1.2 Work groups are always uniform + funcAttrs.set(uniformAttr.getMnemonic(), uniformAttr); + } else { + // OpenCL v2.0 Work groups may be whether uniform or not. + // '-cl-uniform-work-group-size' compile option gets a hint + // to the compiler that the global work-size be a multiple of + // the work-group size specified to clEnqueueNDRangeKernel + // (i.e. work groups are uniform). + if (getLangOpts().OffloadUniformBlock) { + funcAttrs.set(uniformAttr.getMnemonic(), uniformAttr); + } + } } if (TargetDecl->hasAttr() && diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp index 4e8e2e9558cc..7b520ab2d72e 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp @@ -91,6 +91,7 @@ class CIRDialectLLVMIRTranslationInterface mlir::NamedAttribute attribute, mlir::LLVM::ModuleTranslation &moduleTranslation) const { llvm::Function *llvmFunc = moduleTranslation.lookupFunction(func.getName()); + llvm::LLVMContext &llvmCtx = moduleTranslation.getLLVMContext(); if (auto extraAttr = mlir::dyn_cast( attribute.getValue())) { for (auto attr : extraAttr.getElements()) { @@ -110,6 +111,15 @@ class CIRDialectLLVMIRTranslationInterface llvmFunc->addFnAttr(llvm::Attribute::NoUnwind); } else if (mlir::dyn_cast(attr.getValue())) { llvmFunc->addFnAttr(llvm::Attribute::Convergent); + } else if (mlir::dyn_cast( + attr.getValue())) { + const auto uniformAttrName = + mlir::cir::OpenCLKernelUniformWorkGroupSizeAttr::getMnemonic(); + const bool isUniform = + extraAttr.getElements().getNamed(uniformAttrName).has_value(); + auto attrs = llvmFunc->getAttributes().addFnAttribute( + llvmCtx, "uniform-work-group-size", isUniform ? "true" : "false"); + llvmFunc->setAttributes(attrs); } else if (auto clKernelMetadata = mlir::dyn_cast( attr.getValue())) { diff --git a/clang/test/CIR/CodeGen/OpenCL/cl-uniform-wg-size.cl b/clang/test/CIR/CodeGen/OpenCL/cl-uniform-wg-size.cl new file mode 100644 index 000000000000..e6d6ce1ca25a --- /dev/null +++ b/clang/test/CIR/CodeGen/OpenCL/cl-uniform-wg-size.cl @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-cir -O0 -cl-std=CL1.2 -o %t.cl12.cir %s +// RUN: FileCheck %s -input-file=%t.cl12.cir -check-prefixes CIR,CIR-UNIFORM +// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-cir -O0 -cl-std=CL2.0 -o %t.cl20.cir %s +// RUN: FileCheck %s -input-file=%t.cl20.cir -check-prefixes CIR,CIR-NONUNIFORM +// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-cir -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o %t.cl20.uniform1.cir %s +// RUN: FileCheck %s -input-file=%t.cl20.uniform1.cir -check-prefixes CIR,CIR-UNIFORM +// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-cir -O0 -cl-std=CL2.0 -foffload-uniform-block -o %t.cl20.uniform2.cir %s +// RUN: FileCheck %s -input-file=%t.cl20.uniform2.cir -check-prefixes CIR,CIR-UNIFORM + +// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-llvm -O0 -cl-std=CL1.2 -o %t.cl12.ll %s +// RUN: FileCheck %s -input-file=%t.cl12.ll -check-prefixes LLVM,LLVM-UNIFORM +// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-llvm -O0 -cl-std=CL2.0 -o %t.cl20.ll %s +// RUN: FileCheck %s -input-file=%t.cl20.ll -check-prefixes LLVM,LLVM-NONUNIFORM +// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o %t.cl20.uniform1.ll %s +// RUN: FileCheck %s -input-file=%t.cl20.uniform1.ll -check-prefixes LLVM,LLVM-UNIFORM +// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-llvm -O0 -cl-std=CL2.0 -foffload-uniform-block -o %t.cl20.uniform2.ll %s +// RUN: FileCheck %s -input-file=%t.cl20.uniform2.ll -check-prefixes LLVM,LLVM-UNIFORM + +// CIR-LABEL: #fn_attr = +// CIR: cl.kernel = #cir.cl.kernel +// CIR-UNIFORM: cl.uniform_work_group_size = #cir.cl.uniform_work_group_size +// CIR-NONUNIFORM-NOT: cl.uniform_work_group_size = #cir.cl.uniform_work_group_size + +// CIR-LABEL: #fn_attr1 = +// CIR-NOT: cl.kernel = #cir.cl.kernel +// CIR-NOT: cl.uniform_work_group_size + +kernel void ker() {}; +// CIR: cir.func @ker{{.*}} extra(#fn_attr) { +// LLVM: define{{.*}}@ker() #0 + +void foo() {}; +// CIR: cir.func @foo{{.*}} extra(#fn_attr1) { +// LLVM: define{{.*}}@foo() #1 + +// LLVM-LABEL: attributes #0 +// LLVM-UNIFORM: "uniform-work-group-size"="true" +// LLVM-NONUNIFORM: "uniform-work-group-size"="false" + +// LLVM-LABEL: attributes #1 +// LLVM-NOT: uniform-work-group-size