diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 63c11e237d6c8..84689ee95a9c9 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -3279,11 +3279,11 @@ OPT_LIST(V) /// Return a new OMPTraitInfo object owned by this context. OMPTraitInfo &getNewOMPTraitInfo(); - /// Whether a C++ static variable may be externalized. - bool mayExternalizeStaticVar(const Decl *D) const; + /// Whether a C++ static variable or CUDA/HIP kernel may be externalized. + bool mayExternalize(const Decl *D) const; - /// Whether a C++ static variable should be externalized. - bool shouldExternalizeStaticVar(const Decl *D) const; + /// Whether a C++ static variable or CUDA/HIP kernel should be externalized. + bool shouldExternalize(const Decl *D) const; StringRef getCUIDHash() const; diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 5fa2d46de89b2..b1bcd2e07c190 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -11331,7 +11331,7 @@ static GVALinkage adjustGVALinkageForAttributes(const ASTContext &Context, // name between the host and device compilation which is the same for the // same compilation unit whereas different among different compilation // units. - if (Context.shouldExternalizeStaticVar(D)) + if (Context.shouldExternalize(D)) return GVA_StrongExternal; } return L; @@ -12258,7 +12258,7 @@ operator<<(const StreamingDiagnostic &DB, return DB << "a prior #pragma section"; } -bool ASTContext::mayExternalizeStaticVar(const Decl *D) const { +bool ASTContext::mayExternalize(const Decl *D) const { bool IsStaticVar = isa(D) && cast(D)->getStorageClass() == SC_Static; bool IsExplicitDeviceVar = (D->hasAttr() && @@ -12266,14 +12266,18 @@ bool ASTContext::mayExternalizeStaticVar(const Decl *D) const { (D->hasAttr() && !D->getAttr()->isImplicit()); // CUDA/HIP: static managed variables need to be externalized since it is - // a declaration in IR, therefore cannot have internal linkage. - return IsStaticVar && - (D->hasAttr() || IsExplicitDeviceVar); -} - -bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const { - return mayExternalizeStaticVar(D) && - (D->hasAttr() || + // a declaration in IR, therefore cannot have internal linkage. Kernels in + // anonymous name space needs to be externalized to avoid duplicate symbols. + return (IsStaticVar && + (D->hasAttr() || IsExplicitDeviceVar)) || + (D->hasAttr() && + basicGVALinkageForFunction(*this, cast(D)) == + GVA_Internal); +} + +bool ASTContext::shouldExternalize(const Decl *D) const { + return mayExternalize(D) && + (D->hasAttr() || D->hasAttr() || CUDADeviceVarODRUsedByHost.count(cast(D))); } diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index c4e3f7f54f4f2..89ceea7408ca2 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -281,13 +281,13 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) { DeviceSideName = std::string(ND->getIdentifier()->getName()); // Make unique name for device side static file-scope variable for HIP. - if (CGM.getContext().shouldExternalizeStaticVar(ND) && + if (CGM.getContext().shouldExternalize(ND) && CGM.getLangOpts().GPURelocatableDeviceCode && !CGM.getLangOpts().CUID.empty()) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << DeviceSideName; - CGM.printPostfixForExternalizedStaticVar(Out); + CGM.printPostfixForExternalizedDecl(Out, ND); DeviceSideName = std::string(Out.str()); } return DeviceSideName; diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 29806b65e984e..776a5f2ef7291 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1364,10 +1364,10 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, } // Make unique name for device side static file-scope variable for HIP. - if (CGM.getContext().shouldExternalizeStaticVar(ND) && + if (CGM.getContext().shouldExternalize(ND) && CGM.getLangOpts().GPURelocatableDeviceCode && CGM.getLangOpts().CUDAIsDevice && !CGM.getLangOpts().CUID.empty()) - CGM.printPostfixForExternalizedStaticVar(Out); + CGM.printPostfixForExternalizedDecl(Out, ND); return std::string(Out.str()); } @@ -1434,8 +1434,7 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { // static device variable depends on whether the variable is referenced by // a host or device host function. Therefore the mangled name cannot be // cached. - if (!LangOpts.CUDAIsDevice || - !getContext().mayExternalizeStaticVar(GD.getDecl())) { + if (!LangOpts.CUDAIsDevice || !getContext().mayExternalize(GD.getDecl())) { auto FoundName = MangledDeclNames.find(CanonicalGD); if (FoundName != MangledDeclNames.end()) return FoundName->second; @@ -1455,7 +1454,7 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { // directly between host- and device-compilations, the host- and // device-mangling in host compilation could help catching certain ones. assert(!isa(ND) || !ND->hasAttr() || - getLangOpts().CUDAIsDevice || + getContext().shouldExternalize(ND) || getLangOpts().CUDAIsDevice || (getContext().getAuxTargetInfo() && (getContext().getAuxTargetInfo()->getCXXABI() != getContext().getTargetInfo().getCXXABI())) || @@ -6645,7 +6644,14 @@ bool CodeGenModule::stopAutoInit() { return false; } -void CodeGenModule::printPostfixForExternalizedStaticVar( - llvm::raw_ostream &OS) const { - OS << "__static__" << getContext().getCUIDHash(); +void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS, + const Decl *D) const { + StringRef Tag; + // ptxas does not allow '.' in symbol names. On the other hand, HIP prefers + // postfix beginning with '.' since the symbol name can be demangled. + if (LangOpts.HIP) + Tag = (isa(D) ? ".static." : ".intern."); + else + Tag = (isa(D) ? "__static__" : "__intern__"); + OS << Tag << getContext().getCUIDHash(); } diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 1fcd5d4d808a1..a8a63c8da57fc 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1447,9 +1447,10 @@ class CodeGenModule : public CodeGenTypeCache { TBAAAccessInfo *TBAAInfo = nullptr); bool stopAutoInit(); - /// Print the postfix for externalized static variable for single source - /// offloading languages CUDA and HIP. - void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const; + /// Print the postfix for externalized static variable or kernels for single + /// source offloading languages CUDA and HIP. + void printPostfixForExternalizedDecl(llvm::raw_ostream &OS, + const Decl *D) const; private: llvm::Constant *GetOrCreateLLVMFunction( diff --git a/clang/test/CodeGenCUDA/device-var-linkage.cu b/clang/test/CodeGenCUDA/device-var-linkage.cu index d830802c82061..2c3f6023acae8 100644 --- a/clang/test/CodeGenCUDA/device-var-linkage.cu +++ b/clang/test/CodeGenCUDA/device-var-linkage.cu @@ -1,15 +1,18 @@ -// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \ // RUN: -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=DEV,NORDC %s -// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \ // RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=DEV,RDC %s -// RUN: %clang_cc1 -triple nvptx \ +// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \ // RUN: -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=HOST,NORDC-H %s -// RUN: %clang_cc1 -triple nvptx \ +// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \ // RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=HOST,RDC-H %s +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ +// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CUDA %s #include "Inputs/cuda.h" @@ -24,7 +27,9 @@ __constant__ int v2; // DEV-DAG: @v3 = addrspace(1) externally_initialized global i32 addrspace(1)* null // NORDC-H-DAG: @v3 = internal externally_initialized global i32* null // RDC-H-DAG: @v3 = externally_initialized global i32* null +#if __HIP__ __managed__ int v3; +#endif // DEV-DAG: @ev1 = external addrspace(1) global i32 // HOST-DAG: @ev1 = external global i32 @@ -34,25 +39,35 @@ extern __device__ int ev1; extern __constant__ int ev2; // DEV-DAG: @ev3 = external addrspace(1) externally_initialized global i32 addrspace(1)* // HOST-DAG: @ev3 = external externally_initialized global i32* +#if __HIP__ extern __managed__ int ev3; +#endif // NORDC-DAG: @_ZL3sv1 = addrspace(1) externally_initialized global i32 0 -// RDC-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 +// RDC-DAG: @_ZL3sv1.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 // HOST-DAG: @_ZL3sv1 = internal global i32 undef +// CUDA-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 static __device__ int sv1; // NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized global i32 0 -// RDC-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0 +// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized global i32 0 // HOST-DAG: @_ZL3sv2 = internal global i32 undef +// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0 static __constant__ int sv2; // NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global i32 addrspace(1)* null -// RDC-DAG: @_ZL3sv3__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null +// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null // HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null +#if __HIP__ static __managed__ int sv3; +#endif __device__ __host__ int work(int *x); __device__ __host__ int fun1() { - return work(&ev1) + work(&ev2) + work(&ev3) + work(&sv1) + work(&sv2) + work(&sv3); + return work(&ev1) + work(&ev2) + work(&sv1) + work(&sv2) +#if __HIP__ + + work(&ev3) + work(&sv3) +#endif + ; } // HOST: hipRegisterVar({{.*}}@v1 diff --git a/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu b/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu new file mode 100644 index 0000000000000..bc753d76e5c11 --- /dev/null +++ b/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu @@ -0,0 +1,58 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \ +// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \ +// RUN: -emit-llvm -o - -x hip %s > %t.dev + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ +// RUN: -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \ +// RUN: -emit-llvm -o - -x hip %s > %t.host + +// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=HIP,COMMON %s + +// RUN: echo "GPU binary" > %t.fatbin + +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \ +// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \ +// RUN: -emit-llvm -o - %s > %t.dev + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ +// RUN: -aux-triple nvptx -std=c++11 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \ +// RUN: -emit-llvm -o - %s > %t.host + +// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=CUDA,COMMON %s + +#include "Inputs/cuda.h" + +// HIP-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]]( +// HIP-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]]( +// HIP-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]]( + +// CUDA-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]]( +// CUDA-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]]( +// CUDA-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]]( + +// COMMON-DAG: @[[STR1:.*]] = {{.*}} c"[[KERN1]]\00" +// COMMON-DAG: @[[STR2:.*]] = {{.*}} c"[[KERN2]]\00" +// COMMON-DAG: @[[STR3:.*]] = {{.*}} c"[[KERN3]]\00" + +// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR1]] +// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR2]] +// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR3]] + + +template +__global__ void tempKern(T x) {} + +namespace { + __global__ void kernel() {} + struct X {}; + X x; + auto lambda = [](){}; +} + +void test() { + kernel<<<1, 1>>>(); + + tempKern<<<1, 1>>>(x); + + tempKern<<<1, 1>>>(lambda); +} diff --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu index 96657f0f7a131..3b8540377ab22 100644 --- a/clang/test/CodeGenCUDA/managed-var.cu +++ b/clang/test/CodeGenCUDA/managed-var.cu @@ -1,5 +1,3 @@ -// REQUIRES: x86-registered-target, amdgpu-registered-target - // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ // RUN: -emit-llvm -o - -x hip %s | FileCheck \ // RUN: -check-prefixes=COMMON,DEV,NORDC-D %s @@ -52,15 +50,15 @@ extern __managed__ int ex; // NORDC-D-DAG: @_ZL2sx.managed = addrspace(1) externally_initialized global i32 1, align 4 // NORDC-D-DAG: @_ZL2sx = addrspace(1) externally_initialized global i32 addrspace(1)* null -// RDC-D-DAG: @_ZL2sx__static__[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4 -// RDC-D-DAG: @_ZL2sx__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null +// RDC-D-DAG: @_ZL2sx.static.[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4 +// RDC-D-DAG: @_ZL2sx.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null // HOST-DAG: @_ZL2sx.managed = internal global i32 1 // HOST-DAG: @_ZL2sx = internal externally_initialized global i32* null // NORDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx\00" -// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH:.*]]\00" +// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH:.*]]\00" -// POSTFIX: @_ZL2sx__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null -// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH]]\00" +// POSTFIX: @_ZL2sx.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null +// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH]]\00" static __managed__ int sx = 1; // DEV-DAG: @llvm.compiler.used diff --git a/clang/test/CodeGenCUDA/static-device-var-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-rdc.cu index bb750bd91a928..56ec2faf1e1ad 100644 --- a/clang/test/CodeGenCUDA/static-device-var-rdc.cu +++ b/clang/test/CodeGenCUDA/static-device-var-rdc.cu @@ -40,6 +40,11 @@ // RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host // RUN: cat %t.host | FileCheck -check-prefix=HOST-NEG %s +// Check postfix for CUDA. + +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \ +// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - %s | FileCheck \ +// RUN: -check-prefixes=CUDA %s #include "Inputs/cuda.h" @@ -55,11 +60,12 @@ // INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00" // Test externalized static device variables -// EXT-DEV-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 -// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH:.*]]\00" +// EXT-DEV-DAG: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 +// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH:.*]]\00" +// CUDA-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 -// POSTFIX: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 -// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH]]\00" +// POSTFIX: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 +// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH]]\00" static __device__ int x; @@ -73,8 +79,8 @@ static __device__ int x2; // INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00" // Test externalized static device variables -// EXT-DEV-DAG: @_ZL1y__static__[[HASH]] = addrspace(4) externally_initialized global i32 0 -// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y__static__[[HASH]]\00" +// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized global i32 0 +// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00" static __constant__ int y;