From c9499390d569aa10408a5b9ee3dbc9cd9452981d Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Thu, 7 Apr 2022 22:57:56 -0400 Subject: [PATCH 1/3] [CUDA][HIP] Externalize kernels in anonymous name space kernels in anonymous name space needs to have unique name to avoid duplicate symbols. Fixes: https://github.com/llvm/llvm-project/issues/54560 Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D123353 --- clang/include/clang/AST/ASTContext.h | 8 +++---- clang/lib/AST/ASTContext.cpp | 18 +++++++++------- clang/lib/CodeGen/CGCUDANV.cpp | 4 ++-- clang/lib/CodeGen/CodeGenModule.cpp | 16 +++++++------- clang/lib/CodeGen/CodeGenModule.h | 7 +++--- clang/test/CodeGenCUDA/kernel-in-anon-ns.cu | 24 +++++++++++++++++++++ 6 files changed, 52 insertions(+), 25 deletions(-) create mode 100644 clang/test/CodeGenCUDA/kernel-in-anon-ns.cu diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 63c11e237d6c8..84689ee95a9c9 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -3279,11 +3279,11 @@ OPT_LIST(V) /// Return a new OMPTraitInfo object owned by this context. OMPTraitInfo &getNewOMPTraitInfo(); - /// Whether a C++ static variable may be externalized. - bool mayExternalizeStaticVar(const Decl *D) const; + /// Whether a C++ static variable or CUDA/HIP kernel may be externalized. + bool mayExternalize(const Decl *D) const; - /// Whether a C++ static variable should be externalized. - bool shouldExternalizeStaticVar(const Decl *D) const; + /// Whether a C++ static variable or CUDA/HIP kernel should be externalized. + bool shouldExternalize(const Decl *D) const; StringRef getCUIDHash() const; diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 5fa2d46de89b2..5a1297b80894c 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -11331,7 +11331,7 @@ static GVALinkage adjustGVALinkageForAttributes(const ASTContext &Context, // name between the host and device compilation which is the same for the // same compilation unit whereas different among different compilation // units. - if (Context.shouldExternalizeStaticVar(D)) + if (Context.shouldExternalize(D)) return GVA_StrongExternal; } return L; @@ -12258,7 +12258,7 @@ operator<<(const StreamingDiagnostic &DB, return DB << "a prior #pragma section"; } -bool ASTContext::mayExternalizeStaticVar(const Decl *D) const { +bool ASTContext::mayExternalize(const Decl *D) const { bool IsStaticVar = isa(D) && cast(D)->getStorageClass() == SC_Static; bool IsExplicitDeviceVar = (D->hasAttr() && @@ -12266,14 +12266,16 @@ bool ASTContext::mayExternalizeStaticVar(const Decl *D) const { (D->hasAttr() && !D->getAttr()->isImplicit()); // CUDA/HIP: static managed variables need to be externalized since it is - // a declaration in IR, therefore cannot have internal linkage. - return IsStaticVar && - (D->hasAttr() || IsExplicitDeviceVar); + // a declaration in IR, therefore cannot have internal linkage. Kernels in + // anonymous name space needs to be externalized to avoid duplicate symbols. + return (IsStaticVar && + (D->hasAttr() || IsExplicitDeviceVar)) || + (D->hasAttr() && D->isInAnonymousNamespace()); } -bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const { - return mayExternalizeStaticVar(D) && - (D->hasAttr() || +bool ASTContext::shouldExternalize(const Decl *D) const { + return mayExternalize(D) && + (D->hasAttr() || D->hasAttr() || CUDADeviceVarODRUsedByHost.count(cast(D))); } diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index c4e3f7f54f4f2..89ceea7408ca2 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -281,13 +281,13 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) { DeviceSideName = std::string(ND->getIdentifier()->getName()); // Make unique name for device side static file-scope variable for HIP. - if (CGM.getContext().shouldExternalizeStaticVar(ND) && + if (CGM.getContext().shouldExternalize(ND) && CGM.getLangOpts().GPURelocatableDeviceCode && !CGM.getLangOpts().CUID.empty()) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << DeviceSideName; - CGM.printPostfixForExternalizedStaticVar(Out); + CGM.printPostfixForExternalizedDecl(Out, ND); DeviceSideName = std::string(Out.str()); } return DeviceSideName; diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 29806b65e984e..6c4c8e984678d 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1364,10 +1364,10 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, } // Make unique name for device side static file-scope variable for HIP. - if (CGM.getContext().shouldExternalizeStaticVar(ND) && + if (CGM.getContext().shouldExternalize(ND) && CGM.getLangOpts().GPURelocatableDeviceCode && CGM.getLangOpts().CUDAIsDevice && !CGM.getLangOpts().CUID.empty()) - CGM.printPostfixForExternalizedStaticVar(Out); + CGM.printPostfixForExternalizedDecl(Out, ND); return std::string(Out.str()); } @@ -1434,8 +1434,7 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { // static device variable depends on whether the variable is referenced by // a host or device host function. Therefore the mangled name cannot be // cached. - if (!LangOpts.CUDAIsDevice || - !getContext().mayExternalizeStaticVar(GD.getDecl())) { + if (!LangOpts.CUDAIsDevice || !getContext().mayExternalize(GD.getDecl())) { auto FoundName = MangledDeclNames.find(CanonicalGD); if (FoundName != MangledDeclNames.end()) return FoundName->second; @@ -1455,7 +1454,7 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { // directly between host- and device-compilations, the host- and // device-mangling in host compilation could help catching certain ones. assert(!isa(ND) || !ND->hasAttr() || - getLangOpts().CUDAIsDevice || + getContext().shouldExternalize(ND) || getLangOpts().CUDAIsDevice || (getContext().getAuxTargetInfo() && (getContext().getAuxTargetInfo()->getCXXABI() != getContext().getTargetInfo().getCXXABI())) || @@ -6645,7 +6644,8 @@ bool CodeGenModule::stopAutoInit() { return false; } -void CodeGenModule::printPostfixForExternalizedStaticVar( - llvm::raw_ostream &OS) const { - OS << "__static__" << getContext().getCUIDHash(); +void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS, + const Decl *D) const { + OS << (isa(D) ? "__static__" : ".anon.") + << getContext().getCUIDHash(); } diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 1fcd5d4d808a1..a8a63c8da57fc 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1447,9 +1447,10 @@ class CodeGenModule : public CodeGenTypeCache { TBAAAccessInfo *TBAAInfo = nullptr); bool stopAutoInit(); - /// Print the postfix for externalized static variable for single source - /// offloading languages CUDA and HIP. - void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const; + /// Print the postfix for externalized static variable or kernels for single + /// source offloading languages CUDA and HIP. + void printPostfixForExternalizedDecl(llvm::raw_ostream &OS, + const Decl *D) const; private: llvm::Constant *GetOrCreateLLVMFunction( diff --git a/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu b/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu new file mode 100644 index 0000000000000..4243cec796a86 --- /dev/null +++ b/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \ +// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \ +// RUN: -emit-llvm -o - -x hip %s > %t.dev + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ +// RUN: -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \ +// RUN: -emit-llvm -o - -x hip %s > %t.host + +// RUN: cat %t.dev %t.host | FileCheck %s + +#include "Inputs/cuda.h" + +// CHECK: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.anon\.b04fd23c98500190]]( +// CHECK: @[[STR:.*]] = {{.*}} c"[[KERN]]\00" +// CHECK: call i32 @__hipRegisterFunction({{.*}}@[[STR]] + +namespace { +__global__ void kernel() { +} +} + +void test() { + kernel<<<1, 1>>>(); +} From 906395c7cf20cbd386b785a3e4abb86629fb775c Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Thu, 21 Apr 2022 11:44:11 -0400 Subject: [PATCH 2/3] [CUDA][HIP] Externalize kernels with internal linkage This patch is a continuation of https://reviews.llvm.org/D123353. Not only kernels in anonymous namespace, but also template kernels with template arguments in anonymous namespace need to be externalized. To be more generic, this patch checks the linkage of a kernel assuming the kernel does not have __global__ attribute. If the linkage is internal then clang will externalize it. This patch also fixes the postfix for externalized symbol since nvptx does not allow '.' in symbol name. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D124189 Fixes: https://github.com/llvm/llvm-project/issues/54560 --- clang/lib/AST/ASTContext.cpp | 4 +- clang/lib/CodeGen/CodeGenModule.cpp | 10 +++- clang/test/CodeGenCUDA/device-var-linkage.cu | 31 +++++++++---- clang/test/CodeGenCUDA/kernel-in-anon-ns.cu | 46 ++++++++++++++++--- clang/test/CodeGenCUDA/managed-var.cu | 12 ++--- .../test/CodeGenCUDA/static-device-var-rdc.cu | 18 +++++--- 6 files changed, 91 insertions(+), 30 deletions(-) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 5a1297b80894c..b1bcd2e07c190 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12270,7 +12270,9 @@ bool ASTContext::mayExternalize(const Decl *D) const { // anonymous name space needs to be externalized to avoid duplicate symbols. return (IsStaticVar && (D->hasAttr() || IsExplicitDeviceVar)) || - (D->hasAttr() && D->isInAnonymousNamespace()); + (D->hasAttr() && + basicGVALinkageForFunction(*this, cast(D)) == + GVA_Internal); } bool ASTContext::shouldExternalize(const Decl *D) const { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 6c4c8e984678d..776a5f2ef7291 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -6646,6 +6646,12 @@ bool CodeGenModule::stopAutoInit() { void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS, const Decl *D) const { - OS << (isa(D) ? "__static__" : ".anon.") - << getContext().getCUIDHash(); + StringRef Tag; + // ptxas does not allow '.' in symbol names. On the other hand, HIP prefers + // postfix beginning with '.' since the symbol name can be demangled. + if (LangOpts.HIP) + Tag = (isa(D) ? ".static." : ".intern."); + else + Tag = (isa(D) ? "__static__" : "__intern__"); + OS << Tag << getContext().getCUIDHash(); } diff --git a/clang/test/CodeGenCUDA/device-var-linkage.cu b/clang/test/CodeGenCUDA/device-var-linkage.cu index d830802c82061..2c3f6023acae8 100644 --- a/clang/test/CodeGenCUDA/device-var-linkage.cu +++ b/clang/test/CodeGenCUDA/device-var-linkage.cu @@ -1,15 +1,18 @@ -// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \ // RUN: -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=DEV,NORDC %s -// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device \ // RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=DEV,RDC %s -// RUN: %clang_cc1 -triple nvptx \ +// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \ // RUN: -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=HOST,NORDC-H %s -// RUN: %clang_cc1 -triple nvptx \ +// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \ // RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - -x hip %s \ // RUN: | FileCheck -check-prefixes=HOST,RDC-H %s +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ +// RUN: -fgpu-rdc -cuid=abc -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CUDA %s #include "Inputs/cuda.h" @@ -24,7 +27,9 @@ __constant__ int v2; // DEV-DAG: @v3 = addrspace(1) externally_initialized global i32 addrspace(1)* null // NORDC-H-DAG: @v3 = internal externally_initialized global i32* null // RDC-H-DAG: @v3 = externally_initialized global i32* null +#if __HIP__ __managed__ int v3; +#endif // DEV-DAG: @ev1 = external addrspace(1) global i32 // HOST-DAG: @ev1 = external global i32 @@ -34,25 +39,35 @@ extern __device__ int ev1; extern __constant__ int ev2; // DEV-DAG: @ev3 = external addrspace(1) externally_initialized global i32 addrspace(1)* // HOST-DAG: @ev3 = external externally_initialized global i32* +#if __HIP__ extern __managed__ int ev3; +#endif // NORDC-DAG: @_ZL3sv1 = addrspace(1) externally_initialized global i32 0 -// RDC-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 +// RDC-DAG: @_ZL3sv1.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 // HOST-DAG: @_ZL3sv1 = internal global i32 undef +// CUDA-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 static __device__ int sv1; // NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized global i32 0 -// RDC-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0 +// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized global i32 0 // HOST-DAG: @_ZL3sv2 = internal global i32 undef +// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0 static __constant__ int sv2; // NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global i32 addrspace(1)* null -// RDC-DAG: @_ZL3sv3__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null +// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null // HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null +#if __HIP__ static __managed__ int sv3; +#endif __device__ __host__ int work(int *x); __device__ __host__ int fun1() { - return work(&ev1) + work(&ev2) + work(&ev3) + work(&sv1) + work(&sv2) + work(&sv3); + return work(&ev1) + work(&ev2) + work(&sv1) + work(&sv2) +#if __HIP__ + + work(&ev3) + work(&sv3) +#endif + ; } // HOST: hipRegisterVar({{.*}}@v1 diff --git a/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu b/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu index 4243cec796a86..bc753d76e5c11 100644 --- a/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu +++ b/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu @@ -6,19 +6,53 @@ // RUN: -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \ // RUN: -emit-llvm -o - -x hip %s > %t.host -// RUN: cat %t.dev %t.host | FileCheck %s +// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=HIP,COMMON %s + +// RUN: echo "GPU binary" > %t.fatbin + +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \ +// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \ +// RUN: -emit-llvm -o - %s > %t.dev + +// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \ +// RUN: -aux-triple nvptx -std=c++11 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \ +// RUN: -emit-llvm -o - %s > %t.host + +// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=CUDA,COMMON %s #include "Inputs/cuda.h" -// CHECK: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.anon\.b04fd23c98500190]]( -// CHECK: @[[STR:.*]] = {{.*}} c"[[KERN]]\00" -// CHECK: call i32 @__hipRegisterFunction({{.*}}@[[STR]] +// HIP-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]]( +// HIP-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]]( +// HIP-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]]( + +// CUDA-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]]( +// CUDA-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]]( +// CUDA-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]]( + +// COMMON-DAG: @[[STR1:.*]] = {{.*}} c"[[KERN1]]\00" +// COMMON-DAG: @[[STR2:.*]] = {{.*}} c"[[KERN2]]\00" +// COMMON-DAG: @[[STR3:.*]] = {{.*}} c"[[KERN3]]\00" + +// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR1]] +// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR2]] +// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR3]] + + +template +__global__ void tempKern(T x) {} namespace { -__global__ void kernel() { -} + __global__ void kernel() {} + struct X {}; + X x; + auto lambda = [](){}; } void test() { kernel<<<1, 1>>>(); + + tempKern<<<1, 1>>>(x); + + tempKern<<<1, 1>>>(lambda); } diff --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu index 96657f0f7a131..3b8540377ab22 100644 --- a/clang/test/CodeGenCUDA/managed-var.cu +++ b/clang/test/CodeGenCUDA/managed-var.cu @@ -1,5 +1,3 @@ -// REQUIRES: x86-registered-target, amdgpu-registered-target - // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \ // RUN: -emit-llvm -o - -x hip %s | FileCheck \ // RUN: -check-prefixes=COMMON,DEV,NORDC-D %s @@ -52,15 +50,15 @@ extern __managed__ int ex; // NORDC-D-DAG: @_ZL2sx.managed = addrspace(1) externally_initialized global i32 1, align 4 // NORDC-D-DAG: @_ZL2sx = addrspace(1) externally_initialized global i32 addrspace(1)* null -// RDC-D-DAG: @_ZL2sx__static__[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4 -// RDC-D-DAG: @_ZL2sx__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null +// RDC-D-DAG: @_ZL2sx.static.[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4 +// RDC-D-DAG: @_ZL2sx.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null // HOST-DAG: @_ZL2sx.managed = internal global i32 1 // HOST-DAG: @_ZL2sx = internal externally_initialized global i32* null // NORDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx\00" -// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH:.*]]\00" +// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH:.*]]\00" -// POSTFIX: @_ZL2sx__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null -// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH]]\00" +// POSTFIX: @_ZL2sx.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null +// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH]]\00" static __managed__ int sx = 1; // DEV-DAG: @llvm.compiler.used diff --git a/clang/test/CodeGenCUDA/static-device-var-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-rdc.cu index bb750bd91a928..f2f2c5ff290ad 100644 --- a/clang/test/CodeGenCUDA/static-device-var-rdc.cu +++ b/clang/test/CodeGenCUDA/static-device-var-rdc.cu @@ -40,6 +40,11 @@ // RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host // RUN: cat %t.host | FileCheck -check-prefix=HOST-NEG %s +// Check postfix for CUDA. + +// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx -fcuda-is-device -cuid=abc \ +// RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - %s | FileCheck \ +// RUN: -check-prefixes=CUDA %s #include "Inputs/cuda.h" @@ -55,11 +60,12 @@ // INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00" // Test externalized static device variables -// EXT-DEV-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 -// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH:.*]]\00" +// EXT-DEV-DAG: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 +// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH:.*]]\00" +// CUDA-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 -// POSTFIX: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 -// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH]]\00" +// POSTFIX: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0 +// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH]]\00" static __device__ int x; @@ -73,8 +79,8 @@ static __device__ int x2; // INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00" // Test externalized static device variables -// EXT-DEV-DAG: @_ZL1y__static__[[HASH]] = addrspace(4) externally_initialized global i32 0 -// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y__static__[[HASH]]\00" +// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized global i32 0 +// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00" static __constant__ int y; From fda16285fb0bb4124d6dffb392d0c3b4abd6bc6e Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 24 May 2022 14:52:19 -0400 Subject: [PATCH 3/3] Update static-device-var-rdc.cu --- clang/test/CodeGenCUDA/static-device-var-rdc.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGenCUDA/static-device-var-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-rdc.cu index f2f2c5ff290ad..56ec2faf1e1ad 100644 --- a/clang/test/CodeGenCUDA/static-device-var-rdc.cu +++ b/clang/test/CodeGenCUDA/static-device-var-rdc.cu @@ -42,7 +42,7 @@ // Check postfix for CUDA. -// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx -fcuda-is-device -cuid=abc \ +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \ // RUN: -std=c++11 -fgpu-rdc -emit-llvm -o - %s | FileCheck \ // RUN: -check-prefixes=CUDA %s