From e15ac50231ddd6fb0a68bae5c9351f5f93114e28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tadej=20Ciglari=C4=8D?= Date: Mon, 17 Jan 2022 13:19:26 +0100 Subject: [PATCH] [SYCL][CUDA][libclc] Add atomic loads and stores with various memory orders and scopes (#5191) Adds atomic loads and stores with various memory orders and scopes. These are implemented by adding intrinsics and builtins for PTX loads and stores. Tests for this are here: https://github.com/intel/llvm-test-suite/pull/648 --- clang/include/clang/Basic/BuiltinsNVPTX.def | 86 +++ clang/lib/CodeGen/CGBuiltin.cpp | 94 +++ clang/test/CodeGen/builtins-nvptx.c | 625 ++++++++++++++++ libclc/ptx-nvidiacl/libspirv/SOURCES | 2 + .../libspirv/atomic/atomic_load.cl | 84 +++ .../libspirv/atomic/atomic_store.cl | 85 +++ llvm/include/llvm/IR/IntrinsicsNVVM.td | 42 ++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 151 +++- .../CodeGen/NVPTX/atomics-with-semantics.ll | 671 ++++++++++++++++++ 9 files changed, 1838 insertions(+), 2 deletions(-) create mode 100644 libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl create mode 100644 libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index bb67c1abf2a62..e701b3de3d1d1 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -2037,6 +2037,92 @@ TARGET_BUILTIN(__nvvm_atom_acq_rel_cas_shared_ll, "LLiLLiD*LLiLLi", "n", SM_70) TARGET_BUILTIN(__nvvm_atom_acq_rel_cta_cas_shared_ll, "LLiLLiD*LLiLLi", "n", SM_70) TARGET_BUILTIN(__nvvm_atom_acq_rel_sys_cas_shared_ll, "LLiLLiD*LLiLLi", "n", SM_70) +#pragma push_macro("LD_VOLATILE_BUILTIN_TYPES") +#define LD_VOLATILE_BUILTIN_TYPES(ADDR_SPACE) \ +BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_i, "iiD*", "n") \ +BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_l, "LiLiD*", "n") \ +BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_ll, "LLiLLiD*", "n") \ +BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_f, "ffD*", "n") \ +BUILTIN(__nvvm_volatile_ld##ADDR_SPACE##_d, "ddD*", "n") + +#pragma push_macro("LD_BUILTIN_TYPES") +#define LD_BUILTIN_TYPES(ORDER, SCOPE, ADDR_SPACE) \ +TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_i, "iiD*", "n", SM_70) \ +TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_l, "LiLiD*", "n", SM_70) \ +TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_ll, "LLiLLiD*", "n", SM_70) \ +TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_f, "ffD*", "n", SM_70) \ +TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_d, "ddD*", "n", SM_70) + +#pragma push_macro("LD_BUILTIN_AS_TYPES") +#define LD_BUILTIN_AS_TYPES(ORDER, SCOPE) \ +LD_BUILTIN_TYPES(ORDER, SCOPE, _gen) \ +LD_BUILTIN_TYPES(ORDER, SCOPE, _global) \ +LD_BUILTIN_TYPES(ORDER, SCOPE, _shared) + +#pragma push_macro("LD_BUILTIN_SCOPES_AS_TYPES") +#define LD_BUILTIN_SCOPES_AS_TYPES(ORDER) \ +LD_BUILTIN_AS_TYPES(ORDER, ) \ +LD_BUILTIN_AS_TYPES(ORDER, _cta) \ +LD_BUILTIN_AS_TYPES(ORDER, _sys) + +LD_BUILTIN_SCOPES_AS_TYPES() +LD_BUILTIN_SCOPES_AS_TYPES(_acquire) +LD_VOLATILE_BUILTIN_TYPES(_gen) +LD_VOLATILE_BUILTIN_TYPES(_global) +LD_VOLATILE_BUILTIN_TYPES(_shared) + +#undef LD_VOLATILE_BUILTIN_TYPES +#pragma pop_macro("LD_VOLATILE_BUILTIN_TYPES") +#undef LD_BUILTIN_TYPES +#pragma pop_macro("LD_BUILTIN_TYPES") +#undef LD_BUILTIN_AS_TYPES +#pragma pop_macro("LD_BUILTIN_AS_TYPES") +#undef LD_BUILTIN_SCOPES_AS_TYPES +#pragma pop_macro("LD_BUILTIN_SCOPES_AS_TYPES") + +#pragma push_macro("ST_VOLATILE_BUILTIN_TYPES") +#define ST_VOLATILE_BUILTIN_TYPES(ADDR_SPACE) \ +BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_i, "viD*i", "n") \ +BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_l, "vLiD*Li", "n") \ +BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_ll, "vLLiD*LLi", "n") \ +BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_f, "vfD*f", "n") \ +BUILTIN(__nvvm_volatile_st##ADDR_SPACE##_d, "vdD*d", "n") + +#pragma push_macro("ST_BUILTIN_TYPES") +#define ST_BUILTIN_TYPES(ORDER, SCOPE, ADDR_SPACE) \ +TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_i, "viD*i", "n", SM_70) \ +TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_l, "vLiD*Li", "n", SM_70) \ +TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_ll, "vLLiD*LLi", "n", SM_70) \ +TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_f, "vfD*f", "n", SM_70) \ +TARGET_BUILTIN(__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_d, "vdD*d", "n", SM_70) + +#pragma push_macro("ST_BUILTIN_AS_TYPES") +#define ST_BUILTIN_AS_TYPES(ORDER, SCOPE) \ +ST_BUILTIN_TYPES(ORDER, SCOPE, _gen) \ +ST_BUILTIN_TYPES(ORDER, SCOPE, _global) \ +ST_BUILTIN_TYPES(ORDER, SCOPE, _shared) + +#pragma push_macro("ST_BUILTIN_SCOPES_AS_TYPES") +#define ST_BUILTIN_SCOPES_AS_TYPES(ORDER) \ +ST_BUILTIN_AS_TYPES(ORDER, ) \ +ST_BUILTIN_AS_TYPES(ORDER, _cta) \ +ST_BUILTIN_AS_TYPES(ORDER, _sys) + +ST_BUILTIN_SCOPES_AS_TYPES() +ST_BUILTIN_SCOPES_AS_TYPES(_release) +ST_VOLATILE_BUILTIN_TYPES(_gen) +ST_VOLATILE_BUILTIN_TYPES(_global) +ST_VOLATILE_BUILTIN_TYPES(_shared) + +#undef ST_VOLATILE_BUILTIN_TYPES +#pragma pop_macro("ST_VOLATILE_BUILTIN_TYPES") +#undef ST_BUILTIN_TYPES +#pragma pop_macro("ST_BUILTIN_TYPES") +#undef ST_BUILTIN_AS_TYPES +#pragma pop_macro("ST_BUILTIN_AS_TYPES") +#undef ST_BUILTIN_SCOPES_AS_TYPES +#pragma pop_macro("ST_BUILTIN_SCOPES_AS_TYPES") + // Compiler Error Warn BUILTIN(__nvvm_compiler_error, "vcC*4", "n") BUILTIN(__nvvm_compiler_warn, "vcC*4", "n") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0a257f94aabea..c60e4201823ba 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17492,6 +17492,21 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { Ptr->getType()}), {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); }; + auto MakeScopedLd = [&](unsigned IntrinsicID) { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall( + CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), + Ptr->getType()}), + {Ptr}); + }; + auto MakeScopedSt = [&](unsigned IntrinsicID) { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall( + CGM.getIntrinsic( + IntrinsicID, + {Ptr->getType(), Ptr->getType()->getPointerElementType()}), + {Ptr, EmitScalarExpr(E->getArg(1))}); + }; auto MakeScopedAtomic = [&](unsigned IntrinsicID) { Value *Ptr = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall( @@ -17507,6 +17522,85 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); }; switch (BuiltinID) { + +#define LD_VOLATILE_CASES(ADDR_SPACE) \ + case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_i: \ + case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_l: \ + case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_ll: \ + return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_i_volatile); \ + case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_f: \ + case NVPTX::BI__nvvm_volatile_ld##ADDR_SPACE##_d: \ + return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_f_volatile); + +#define LD_CASES(ORDER, SCOPE, ADDR_SPACE) \ + case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_i: \ + case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_l: \ + case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_ll: \ + return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_i##ORDER##SCOPE); \ + case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_f: \ + case NVPTX::BI__nvvm##ORDER##SCOPE##_ld##ADDR_SPACE##_d: \ + return MakeScopedLd(Intrinsic::nvvm_ld##ADDR_SPACE##_f##ORDER##SCOPE); + +#define LD_CASES_AS(ORDER, SCOPE) \ + LD_CASES(ORDER, SCOPE, _gen) \ + LD_CASES(ORDER, SCOPE, _global) \ + LD_CASES(ORDER, SCOPE, _shared) + +#define LD_CASES_AS_SCOPES(ORDER) \ + LD_CASES_AS(ORDER, ) \ + LD_CASES_AS(ORDER, _cta) \ + LD_CASES_AS(ORDER, _sys) + + LD_CASES_AS_SCOPES() + LD_CASES_AS_SCOPES(_acquire) + LD_VOLATILE_CASES(_gen) + LD_VOLATILE_CASES(_global) + LD_VOLATILE_CASES(_shared) + +#undef LD_VOLATILE_CASES +#undef LD_CASES +#undef LD_CASES_AS +#undef LD_CASES_AS_SCOPES + +#define ST_VOLATILE_CASES(ADDR_SPACE) \ + case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_i: \ + case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_l: \ + case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_ll: \ + return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_i_volatile); \ + case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_f: \ + case NVPTX::BI__nvvm_volatile_st##ADDR_SPACE##_d: \ + return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_f_volatile); + +#define ST_CASES(ORDER, SCOPE, ADDR_SPACE) \ + case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_i: \ + case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_l: \ + case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_ll: \ + return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_i##ORDER##SCOPE); \ + case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_f: \ + case NVPTX::BI__nvvm##ORDER##SCOPE##_st##ADDR_SPACE##_d: \ + return MakeScopedSt(Intrinsic::nvvm_st##ADDR_SPACE##_f##ORDER##SCOPE); + +#define ST_CASES_AS(ORDER, SCOPE) \ + ST_CASES(ORDER, SCOPE, _gen) \ + ST_CASES(ORDER, SCOPE, _global) \ + ST_CASES(ORDER, SCOPE, _shared) + +#define ST_CASES_AS_SCOPES(ORDER) \ + ST_CASES_AS(ORDER, ) \ + ST_CASES_AS(ORDER, _cta) \ + ST_CASES_AS(ORDER, _sys) + + ST_CASES_AS_SCOPES() + ST_CASES_AS_SCOPES(_release) + ST_VOLATILE_CASES(_gen) + ST_VOLATILE_CASES(_global) + ST_VOLATILE_CASES(_shared) + +#undef ST_VOLATILE_CASES +#undef ST_CASES +#undef ST_CASES_AS +#undef ST_CASES_AS_SCOPES + case NVPTX::BI__nvvm_atom_add_gen_i: case NVPTX::BI__nvvm_atom_add_gen_l: case NVPTX::BI__nvvm_atom_add_gen_ll: diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index 9f1b79a3856d7..a1c5ecfe203aa 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -294,6 +294,81 @@ __device__ void nvvm_atom(float *fp, float f, double *dfp, double df, int *ip, // CHECK: call i32 @llvm.nvvm.atomic.load.dec.32.p0i32 __nvvm_atom_dec_gen_ui(uip, ui); + // CHECK: call i32 @llvm.nvvm.ld.gen.i.volatile.i32.p0i32 + __nvvm_volatile_ld_gen_i(ip); + // CHECK: call i32 @llvm.nvvm.ld.global.i.volatile.i32.p1i32 + __nvvm_volatile_ld_global_i((__attribute__((address_space(1))) int *)ip); + // CHECK: call i32 @llvm.nvvm.ld.shared.i.volatile.i32.p3i32 + __nvvm_volatile_ld_shared_i((__attribute__((address_space(3))) int *)ip); + + // CHECK: call void @llvm.nvvm.st.gen.i.volatile.p0i32.i32 + __nvvm_volatile_st_gen_i(ip, i); + // CHECK: call void @llvm.nvvm.st.global.i.volatile.p1i32.i32 + __nvvm_volatile_st_global_i((__attribute__((address_space(1))) int *)ip, i); + // CHECK: call void @llvm.nvvm.st.shared.i.volatile.p3i32.i32 + __nvvm_volatile_st_shared_i((__attribute__((address_space(3))) int *)ip, i); + + // LP32: call i32 @llvm.nvvm.ld.gen.i.volatile.i32.p0i32 + // LP64: call i64 @llvm.nvvm.ld.gen.i.volatile.i64.p0i64 + __nvvm_volatile_ld_gen_l(&dl); + // LP32: call i32 @llvm.nvvm.ld.global.i.volatile.i32.p1i32 + // LP64: call i64 @llvm.nvvm.ld.global.i.volatile.i64.p1i64 + __nvvm_volatile_ld_global_l((__attribute__((address_space(1))) long *)&dl); + // LP32: call i32 @llvm.nvvm.ld.shared.i.volatile.i32.p3i32 + // LP64: call i64 @llvm.nvvm.ld.shared.i.volatile.i64.p3i64 + __nvvm_volatile_ld_shared_l((__attribute__((address_space(3))) long *)&dl); + + // LP32: call void @llvm.nvvm.st.gen.i.volatile.p0i32.i32 + // LP64: call void @llvm.nvvm.st.gen.i.volatile.p0i64.i64 + __nvvm_volatile_st_gen_l(&dl, l); + // LP32: call void @llvm.nvvm.st.global.i.volatile.p1i32.i32 + // LP64: call void @llvm.nvvm.st.global.i.volatile.p1i64.i64 + __nvvm_volatile_st_global_l((__attribute__((address_space(1))) long *)&dl, l); + // LP32: call void @llvm.nvvm.st.shared.i.volatile.p3i32.i32 + // LP64: call void @llvm.nvvm.st.shared.i.volatile.p3i64.i64 + __nvvm_volatile_st_shared_l((__attribute__((address_space(3))) long *)&dl, l); + + // CHECK: call i64 @llvm.nvvm.ld.gen.i.volatile.i64.p0i64 + __nvvm_volatile_ld_gen_ll(&dll); + // CHECK: call i64 @llvm.nvvm.ld.global.i.volatile.i64.p1i64 + __nvvm_volatile_ld_global_ll((__attribute__((address_space(1))) long long *)&dll); + // CHECK: call i64 @llvm.nvvm.ld.shared.i.volatile.i64.p3i64 + __nvvm_volatile_ld_shared_ll((__attribute__((address_space(3))) long long *)&dll); + + // CHECK: call void @llvm.nvvm.st.gen.i.volatile.p0i64.i64 + __nvvm_volatile_st_gen_ll(&dll, ll); + // CHECK: call void @llvm.nvvm.st.global.i.volatile.p1i64.i64 + __nvvm_volatile_st_global_ll((__attribute__((address_space(1))) long long *)&dll, ll); + // CHECK: call void @llvm.nvvm.st.shared.i.volatile.p3i64.i64 + __nvvm_volatile_st_shared_ll((__attribute__((address_space(3))) long long *)&dll, ll); + + // CHECK: call float @llvm.nvvm.ld.gen.f.volatile.f32.p0f32 + __nvvm_volatile_ld_gen_f(fp); + // CHECK: call float @llvm.nvvm.ld.global.f.volatile.f32.p1f32 + __nvvm_volatile_ld_global_f((__attribute__((address_space(1))) float *)fp); + // CHECK: call float @llvm.nvvm.ld.shared.f.volatile.f32.p3f32 + __nvvm_volatile_ld_shared_f((__attribute__((address_space(3))) float *)fp); + + // CHECK: call void @llvm.nvvm.st.gen.f.volatile.p0f32.f32 + __nvvm_volatile_st_gen_f(fp, f); + // CHECK: call void @llvm.nvvm.st.global.f.volatile.p1f32.f32 + __nvvm_volatile_st_global_f((__attribute__((address_space(1))) float *)fp, f); + // CHECK: call void @llvm.nvvm.st.shared.f.volatile.p3f32.f32 + __nvvm_volatile_st_shared_f((__attribute__((address_space(3))) float *)fp, f); + + // CHECK: call double @llvm.nvvm.ld.gen.f.volatile.f64.p0f64 + __nvvm_volatile_ld_gen_d(dfp); + // CHECK: call double @llvm.nvvm.ld.global.f.volatile.f64.p1f64 + __nvvm_volatile_ld_global_d((__attribute__((address_space(1))) double *)dfp); + // CHECK: call double @llvm.nvvm.ld.shared.f.volatile.f64.p3f64 + __nvvm_volatile_ld_shared_d((__attribute__((address_space(3))) double *)dfp); + + // CHECK: call void @llvm.nvvm.st.gen.f.volatile.p0f64.f64 + __nvvm_volatile_st_gen_d(dfp, df); + // CHECK: call void @llvm.nvvm.st.global.f.volatile.p1f64.f64 + __nvvm_volatile_st_global_d((__attribute__((address_space(1))) double *)dfp, df); + // CHECK: call void @llvm.nvvm.st.shared.f.volatile.p3f64.f64 + __nvvm_volatile_st_shared_d((__attribute__((address_space(3))) double *)dfp, df); ////////////////////////////////////////////////////////////////// // Atomics with scope (only supported on sm_60+). @@ -548,6 +623,556 @@ __device__ void nvvm_atom(float *fp, float f, double *dfp, double df, int *ip, #if ERROR_CHECK || __CUDA_ARCH__ >= 700 + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.gen.i.i32.p0i32 + // expected-error@+1 {{'__nvvm_ld_gen_i' needs target feature sm_70}} + __nvvm_ld_gen_i(ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.global.i.i32.p1i32 + // expected-error@+1 {{'__nvvm_ld_global_i' needs target feature sm_70}} + __nvvm_ld_global_i((__attribute__((address_space(1))) int *)ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.shared.i.i32.p3i32 + // expected-error@+1 {{'__nvvm_ld_shared_i' needs target feature sm_70}} + __nvvm_ld_shared_i((__attribute__((address_space(3))) int *)ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.gen.i.acquire.i32.p0i32 + // expected-error@+1 {{'__nvvm_acquire_ld_gen_i' needs target feature sm_70}} + __nvvm_acquire_ld_gen_i(ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.global.i.acquire.i32.p1i32 + // expected-error@+1 {{'__nvvm_acquire_ld_global_i' needs target feature sm_70}} + __nvvm_acquire_ld_global_i((__attribute__((address_space(1))) int *)ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.shared.i.acquire.i32.p3i32 + // expected-error@+1 {{'__nvvm_acquire_ld_shared_i' needs target feature sm_70}} + __nvvm_acquire_ld_shared_i((__attribute__((address_space(3))) int *)ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.gen.i.sys.i32.p0i32 + // expected-error@+1 {{'__nvvm_sys_ld_gen_i' needs target feature sm_70}} + __nvvm_sys_ld_gen_i(ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.global.i.sys.i32.p1i32 + // expected-error@+1 {{'__nvvm_sys_ld_global_i' needs target feature sm_70}} + __nvvm_sys_ld_global_i((__attribute__((address_space(1))) int *)ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.shared.i.sys.i32.p3i32 + // expected-error@+1 {{'__nvvm_sys_ld_shared_i' needs target feature sm_70}} + __nvvm_sys_ld_shared_i((__attribute__((address_space(3))) int *)ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.gen.i.acquire.sys.i32.p0i32 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_gen_i' needs target feature sm_70}} + __nvvm_acquire_sys_ld_gen_i(ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.global.i.acquire.sys.i32.p1i32 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_global_i' needs target feature sm_70}} + __nvvm_acquire_sys_ld_global_i((__attribute__((address_space(1))) int *)ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.shared.i.acquire.sys.i32.p3i32 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_shared_i' needs target feature sm_70}} + __nvvm_acquire_sys_ld_shared_i((__attribute__((address_space(3))) int *)ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.gen.i.cta.i32.p0i32 + // expected-error@+1 {{'__nvvm_cta_ld_gen_i' needs target feature sm_70}} + __nvvm_cta_ld_gen_i(ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.global.i.cta.i32.p1i32 + // expected-error@+1 {{'__nvvm_cta_ld_global_i' needs target feature sm_70}} + __nvvm_cta_ld_global_i((__attribute__((address_space(1))) int *)ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.shared.i.cta.i32.p3i32 + // expected-error@+1 {{'__nvvm_cta_ld_shared_i' needs target feature sm_70}} + __nvvm_cta_ld_shared_i((__attribute__((address_space(3))) int *)ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.gen.i.acquire.cta.i32.p0i32 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_gen_i' needs target feature sm_70}} + __nvvm_acquire_cta_ld_gen_i(ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.global.i.acquire.cta.i32.p1i32 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_global_i' needs target feature sm_70}} + __nvvm_acquire_cta_ld_global_i((__attribute__((address_space(1))) int *)ip); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.ld.shared.i.acquire.cta.i32.p3i32 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_shared_i' needs target feature sm_70}} + __nvvm_acquire_cta_ld_shared_i((__attribute__((address_space(3))) int *)ip); + + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.p0i32.i32 + // expected-error@+1 {{'__nvvm_st_gen_i' needs target feature sm_70}} + __nvvm_st_gen_i(ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.p1i32.i32 + // expected-error@+1 {{'__nvvm_st_global_i' needs target feature sm_70}} + __nvvm_st_global_i((__attribute__((address_space(1))) int *)ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.p3i32.i32 + // expected-error@+1 {{'__nvvm_st_shared_i' needs target feature sm_70}} + __nvvm_st_shared_i((__attribute__((address_space(3))) int *)ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.release.p0i32.i32 + // expected-error@+1 {{'__nvvm_release_st_gen_i' needs target feature sm_70}} + __nvvm_release_st_gen_i(ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.release.p1i32.i32 + // expected-error@+1 {{'__nvvm_release_st_global_i' needs target feature sm_70}} + __nvvm_release_st_global_i((__attribute__((address_space(1))) int *)ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.release.p3i32.i32 + // expected-error@+1 {{'__nvvm_release_st_shared_i' needs target feature sm_70}} + __nvvm_release_st_shared_i((__attribute__((address_space(3))) int *)ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.sys.p0i32.i32 + // expected-error@+1 {{'__nvvm_sys_st_gen_i' needs target feature sm_70}} + __nvvm_sys_st_gen_i(ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.sys.p1i32.i32 + // expected-error@+1 {{'__nvvm_sys_st_global_i' needs target feature sm_70}} + __nvvm_sys_st_global_i((__attribute__((address_space(1))) int *)ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.sys.p3i32.i32 + // expected-error@+1 {{'__nvvm_sys_st_shared_i' needs target feature sm_70}} + __nvvm_sys_st_shared_i((__attribute__((address_space(3))) int *)ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.release.sys.p0i32.i32 + // expected-error@+1 {{'__nvvm_release_sys_st_gen_i' needs target feature sm_70}} + __nvvm_release_sys_st_gen_i(ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.release.sys.p1i32.i32 + // expected-error@+1 {{'__nvvm_release_sys_st_global_i' needs target feature sm_70}} + __nvvm_release_sys_st_global_i((__attribute__((address_space(1))) int *)ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.release.sys.p3i32.i32 + // expected-error@+1 {{'__nvvm_release_sys_st_shared_i' needs target feature sm_70}} + __nvvm_release_sys_st_shared_i((__attribute__((address_space(3))) int *)ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.cta.p0i32.i32 + // expected-error@+1 {{'__nvvm_cta_st_gen_i' needs target feature sm_70}} + __nvvm_cta_st_gen_i(ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.cta.p1i32.i32 + // expected-error@+1 {{'__nvvm_cta_st_global_i' needs target feature sm_70}} + __nvvm_cta_st_global_i((__attribute__((address_space(1))) int *)ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.cta.p3i32.i32 + // expected-error@+1 {{'__nvvm_cta_st_shared_i' needs target feature sm_70}} + __nvvm_cta_st_shared_i((__attribute__((address_space(3))) int *)ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.release.cta.p0i32.i32 + // expected-error@+1 {{'__nvvm_release_cta_st_gen_i' needs target feature sm_70}} + __nvvm_release_cta_st_gen_i(ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.release.cta.p1i32.i32 + // expected-error@+1 {{'__nvvm_release_cta_st_global_i' needs target feature sm_70}} + __nvvm_release_cta_st_global_i((__attribute__((address_space(1))) int *)ip, i); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.release.cta.p3i32.i32 + // expected-error@+1 {{'__nvvm_release_cta_st_shared_i' needs target feature sm_70}} + __nvvm_release_cta_st_shared_i((__attribute__((address_space(3))) int *)ip, i); + + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.gen.i.i64.p0i64 + // expected-error@+1 {{'__nvvm_ld_gen_l' needs target feature sm_70}} + __nvvm_ld_gen_l(&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.global.i.i64.p1i64 + // expected-error@+1 {{'__nvvm_ld_global_l' needs target feature sm_70}} + __nvvm_ld_global_l((__attribute__((address_space(1))) long *)&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.shared.i.i64.p3i64 + // expected-error@+1 {{'__nvvm_ld_shared_l' needs target feature sm_70}} + __nvvm_ld_shared_l((__attribute__((address_space(3))) long *)&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.gen.i.acquire.i64.p0i64 + // expected-error@+1 {{'__nvvm_acquire_ld_gen_l' needs target feature sm_70}} + __nvvm_acquire_ld_gen_l(&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.global.i.acquire.i64.p1i64 + // expected-error@+1 {{'__nvvm_acquire_ld_global_l' needs target feature sm_70}} + __nvvm_acquire_ld_global_l((__attribute__((address_space(1))) long *)&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.shared.i.acquire.i64.p3i64 + // expected-error@+1 {{'__nvvm_acquire_ld_shared_l' needs target feature sm_70}} + __nvvm_acquire_ld_shared_l((__attribute__((address_space(3))) long *)&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.gen.i.sys.i64.p0i64 + // expected-error@+1 {{'__nvvm_sys_ld_gen_l' needs target feature sm_70}} + __nvvm_sys_ld_gen_l(&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.global.i.sys.i64.p1i64 + // expected-error@+1 {{'__nvvm_sys_ld_global_l' needs target feature sm_70}} + __nvvm_sys_ld_global_l((__attribute__((address_space(1))) long *)&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.shared.i.sys.i64.p3i64 + // expected-error@+1 {{'__nvvm_sys_ld_shared_l' needs target feature sm_70}} + __nvvm_sys_ld_shared_l((__attribute__((address_space(3))) long *)&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.gen.i.acquire.sys.i64.p0i64 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_gen_l' needs target feature sm_70}} + __nvvm_acquire_sys_ld_gen_l(&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.global.i.acquire.sys.i64.p1i64 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_global_l' needs target feature sm_70}} + __nvvm_acquire_sys_ld_global_l((__attribute__((address_space(1))) long *)&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.shared.i.acquire.sys.i64.p3i64 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_shared_l' needs target feature sm_70}} + __nvvm_acquire_sys_ld_shared_l((__attribute__((address_space(3))) long *)&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.gen.i.cta.i64.p0i64 + // expected-error@+1 {{'__nvvm_cta_ld_gen_l' needs target feature sm_70}} + __nvvm_cta_ld_gen_l(&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.global.i.cta.i64.p1i64 + // expected-error@+1 {{'__nvvm_cta_ld_global_l' needs target feature sm_70}} + __nvvm_cta_ld_global_l((__attribute__((address_space(1))) long *)&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.shared.i.cta.i64.p3i64 + // expected-error@+1 {{'__nvvm_cta_ld_shared_l' needs target feature sm_70}} + __nvvm_cta_ld_shared_l((__attribute__((address_space(3))) long *)&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.gen.i.acquire.cta.i64.p0i64 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_gen_l' needs target feature sm_70}} + __nvvm_acquire_cta_ld_gen_l(&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.global.i.acquire.cta.i64.p1i64 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_global_l' needs target feature sm_70}} + __nvvm_acquire_cta_ld_global_l((__attribute__((address_space(1))) long *)&dl); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.shared.i.acquire.cta.i64.p3i64 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_shared_l' needs target feature sm_70}} + __nvvm_acquire_cta_ld_shared_l((__attribute__((address_space(3))) long *)&dl); + + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.p0i64.i64 + // expected-error@+1 {{'__nvvm_st_gen_l' needs target feature sm_70}} + __nvvm_st_gen_l(&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.p1i64.i64 + // expected-error@+1 {{'__nvvm_st_global_l' needs target feature sm_70}} + __nvvm_st_global_l((__attribute__((address_space(1))) long *)&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.p3i64.i64 + // expected-error@+1 {{'__nvvm_st_shared_l' needs target feature sm_70}} + __nvvm_st_shared_l((__attribute__((address_space(3))) long *)&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.release.p0i64.i64 + // expected-error@+1 {{'__nvvm_release_st_gen_l' needs target feature sm_70}} + __nvvm_release_st_gen_l(&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.release.p1i64.i64 + // expected-error@+1 {{'__nvvm_release_st_global_l' needs target feature sm_70}} + __nvvm_release_st_global_l((__attribute__((address_space(1))) long *)&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.release.p3i64.i64 + // expected-error@+1 {{'__nvvm_release_st_shared_l' needs target feature sm_70}} + __nvvm_release_st_shared_l((__attribute__((address_space(3))) long *)&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.sys.p0i64.i64 + // expected-error@+1 {{'__nvvm_sys_st_gen_l' needs target feature sm_70}} + __nvvm_sys_st_gen_l(&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.sys.p1i64.i64 + // expected-error@+1 {{'__nvvm_sys_st_global_l' needs target feature sm_70}} + __nvvm_sys_st_global_l((__attribute__((address_space(1))) long *)&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.sys.p3i64.i64 + // expected-error@+1 {{'__nvvm_sys_st_shared_l' needs target feature sm_70}} + __nvvm_sys_st_shared_l((__attribute__((address_space(3))) long *)&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.release.sys.p0i64.i64 + // expected-error@+1 {{'__nvvm_release_sys_st_gen_l' needs target feature sm_70}} + __nvvm_release_sys_st_gen_l(&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.release.sys.p1i64.i64 + // expected-error@+1 {{'__nvvm_release_sys_st_global_l' needs target feature sm_70}} + __nvvm_release_sys_st_global_l((__attribute__((address_space(1))) long *)&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.release.sys.p3i64.i64 + // expected-error@+1 {{'__nvvm_release_sys_st_shared_l' needs target feature sm_70}} + __nvvm_release_sys_st_shared_l((__attribute__((address_space(3))) long *)&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.cta.p0i64.i64 + // expected-error@+1 {{'__nvvm_cta_st_gen_l' needs target feature sm_70}} + __nvvm_cta_st_gen_l(&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.cta.p1i64.i64 + // expected-error@+1 {{'__nvvm_cta_st_global_l' needs target feature sm_70}} + __nvvm_cta_st_global_l((__attribute__((address_space(1))) long *)&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.cta.p3i64.i64 + // expected-error@+1 {{'__nvvm_cta_st_shared_l' needs target feature sm_70}} + __nvvm_cta_st_shared_l((__attribute__((address_space(3))) long *)&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.release.cta.p0i64.i64 + // expected-error@+1 {{'__nvvm_release_cta_st_gen_l' needs target feature sm_70}} + __nvvm_release_cta_st_gen_l(&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.release.cta.p1i64.i64 + // expected-error@+1 {{'__nvvm_release_cta_st_global_l' needs target feature sm_70}} + __nvvm_release_cta_st_global_l((__attribute__((address_space(1))) long *)&dl, l); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.release.cta.p3i64.i64 + // expected-error@+1 {{'__nvvm_release_cta_st_shared_l' needs target feature sm_70}} + __nvvm_release_cta_st_shared_l((__attribute__((address_space(3))) long *)&dl, l); + + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.gen.i.i64.p0i64 + // expected-error@+1 {{'__nvvm_ld_gen_ll' needs target feature sm_70}} + __nvvm_ld_gen_ll(&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.global.i.i64.p1i64 + // expected-error@+1 {{'__nvvm_ld_global_ll' needs target feature sm_70}} + __nvvm_ld_global_ll((__attribute__((address_space(1))) long long *)&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.shared.i.i64.p3i64 + // expected-error@+1 {{'__nvvm_ld_shared_ll' needs target feature sm_70}} + __nvvm_ld_shared_ll((__attribute__((address_space(3))) long long *)&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.gen.i.acquire.i64.p0i64 + // expected-error@+1 {{'__nvvm_acquire_ld_gen_ll' needs target feature sm_70}} + __nvvm_acquire_ld_gen_ll(&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.global.i.acquire.i64.p1i64 + // expected-error@+1 {{'__nvvm_acquire_ld_global_ll' needs target feature sm_70}} + __nvvm_acquire_ld_global_ll((__attribute__((address_space(1))) long long *)&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.shared.i.acquire.i64.p3i64 + // expected-error@+1 {{'__nvvm_acquire_ld_shared_ll' needs target feature sm_70}} + __nvvm_acquire_ld_shared_ll((__attribute__((address_space(3))) long long *)&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.gen.i.sys.i64.p0i64 + // expected-error@+1 {{'__nvvm_sys_ld_gen_ll' needs target feature sm_70}} + __nvvm_sys_ld_gen_ll(&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.global.i.sys.i64.p1i64 + // expected-error@+1 {{'__nvvm_sys_ld_global_ll' needs target feature sm_70}} + __nvvm_sys_ld_global_ll((__attribute__((address_space(1))) long long *)&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.shared.i.sys.i64.p3i64 + // expected-error@+1 {{'__nvvm_sys_ld_shared_ll' needs target feature sm_70}} + __nvvm_sys_ld_shared_ll((__attribute__((address_space(3))) long long *)&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.gen.i.acquire.sys.i64.p0i64 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_gen_ll' needs target feature sm_70}} + __nvvm_acquire_sys_ld_gen_ll(&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.global.i.acquire.sys.i64.p1i64 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_global_ll' needs target feature sm_70}} + __nvvm_acquire_sys_ld_global_ll((__attribute__((address_space(1))) long long *)&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.shared.i.acquire.sys.i64.p3i64 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_shared_ll' needs target feature sm_70}} + __nvvm_acquire_sys_ld_shared_ll((__attribute__((address_space(3))) long long *)&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.gen.i.cta.i64.p0i64 + // expected-error@+1 {{'__nvvm_cta_ld_gen_ll' needs target feature sm_70}} + __nvvm_cta_ld_gen_ll(&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.global.i.cta.i64.p1i64 + // expected-error@+1 {{'__nvvm_cta_ld_global_ll' needs target feature sm_70}} + __nvvm_cta_ld_global_ll((__attribute__((address_space(1))) long long *)&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.shared.i.cta.i64.p3i64 + // expected-error@+1 {{'__nvvm_cta_ld_shared_ll' needs target feature sm_70}} + __nvvm_cta_ld_shared_ll((__attribute__((address_space(3))) long long *)&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.gen.i.acquire.cta.i64.p0i64 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_gen_ll' needs target feature sm_70}} + __nvvm_acquire_cta_ld_gen_ll(&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.global.i.acquire.cta.i64.p1i64 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_global_ll' needs target feature sm_70}} + __nvvm_acquire_cta_ld_global_ll((__attribute__((address_space(1))) long long *)&dll); + // CHECK_SM70_LP64: call i64 @llvm.nvvm.ld.shared.i.acquire.cta.i64.p3i64 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_shared_ll' needs target feature sm_70}} + __nvvm_acquire_cta_ld_shared_ll((__attribute__((address_space(3))) long long *)&dll); + + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.p0i64.i64 + // expected-error@+1 {{'__nvvm_st_gen_ll' needs target feature sm_70}} + __nvvm_st_gen_ll(&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.p1i64.i64 + // expected-error@+1 {{'__nvvm_st_global_ll' needs target feature sm_70}} + __nvvm_st_global_ll((__attribute__((address_space(1))) long long *)&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.p3i64.i64 + // expected-error@+1 {{'__nvvm_st_shared_ll' needs target feature sm_70}} + __nvvm_st_shared_ll((__attribute__((address_space(3))) long long *)&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.release.p0i64.i64 + // expected-error@+1 {{'__nvvm_release_st_gen_ll' needs target feature sm_70}} + __nvvm_release_st_gen_ll(&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.release.p1i64.i64 + // expected-error@+1 {{'__nvvm_release_st_global_ll' needs target feature sm_70}} + __nvvm_release_st_global_ll((__attribute__((address_space(1))) long long *)&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.release.p3i64.i64 + // expected-error@+1 {{'__nvvm_release_st_shared_ll' needs target feature sm_70}} + __nvvm_release_st_shared_ll((__attribute__((address_space(3))) long long *)&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.sys.p0i64.i64 + // expected-error@+1 {{'__nvvm_sys_st_gen_ll' needs target feature sm_70}} + __nvvm_sys_st_gen_ll(&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.sys.p1i64.i64 + // expected-error@+1 {{'__nvvm_sys_st_global_ll' needs target feature sm_70}} + __nvvm_sys_st_global_ll((__attribute__((address_space(1))) long long *)&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.sys.p3i64.i64 + // expected-error@+1 {{'__nvvm_sys_st_shared_ll' needs target feature sm_70}} + __nvvm_sys_st_shared_ll((__attribute__((address_space(3))) long long *)&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.release.sys.p0i64.i64 + // expected-error@+1 {{'__nvvm_release_sys_st_gen_ll' needs target feature sm_70}} + __nvvm_release_sys_st_gen_ll(&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.release.sys.p1i64.i64 + // expected-error@+1 {{'__nvvm_release_sys_st_global_ll' needs target feature sm_70}} + __nvvm_release_sys_st_global_ll((__attribute__((address_space(1))) long long *)&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.release.sys.p3i64.i64 + // expected-error@+1 {{'__nvvm_release_sys_st_shared_ll' needs target feature sm_70}} + __nvvm_release_sys_st_shared_ll((__attribute__((address_space(3))) long long *)&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.cta.p0i64.i64 + // expected-error@+1 {{'__nvvm_cta_st_gen_ll' needs target feature sm_70}} + __nvvm_cta_st_gen_ll(&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.cta.p1i64.i64 + // expected-error@+1 {{'__nvvm_cta_st_global_ll' needs target feature sm_70}} + __nvvm_cta_st_global_ll((__attribute__((address_space(1))) long long *)&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.cta.p3i64.i64 + // expected-error@+1 {{'__nvvm_cta_st_shared_ll' needs target feature sm_70}} + __nvvm_cta_st_shared_ll((__attribute__((address_space(3))) long long *)&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.i.release.cta.p0i64.i64 + // expected-error@+1 {{'__nvvm_release_cta_st_gen_ll' needs target feature sm_70}} + __nvvm_release_cta_st_gen_ll(&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.i.release.cta.p1i64.i64 + // expected-error@+1 {{'__nvvm_release_cta_st_global_ll' needs target feature sm_70}} + __nvvm_release_cta_st_global_ll((__attribute__((address_space(1))) long long *)&dll, ll); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.i.release.cta.p3i64.i64 + // expected-error@+1 {{'__nvvm_release_cta_st_shared_ll' needs target feature sm_70}} + __nvvm_release_cta_st_shared_ll((__attribute__((address_space(3))) long long *)&dll, ll); + + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.gen.f.f32.p0f32 + // expected-error@+1 {{'__nvvm_ld_gen_f' needs target feature sm_70}} + __nvvm_ld_gen_f(fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.global.f.f32.p1f32 + // expected-error@+1 {{'__nvvm_ld_global_f' needs target feature sm_70}} + __nvvm_ld_global_f((__attribute__((address_space(1))) float *)fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.shared.f.f32.p3f32 + // expected-error@+1 {{'__nvvm_ld_shared_f' needs target feature sm_70}} + __nvvm_ld_shared_f((__attribute__((address_space(3))) float *)fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.gen.f.acquire.f32.p0f32 + // expected-error@+1 {{'__nvvm_acquire_ld_gen_f' needs target feature sm_70}} + __nvvm_acquire_ld_gen_f(fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.global.f.acquire.f32.p1f32 + // expected-error@+1 {{'__nvvm_acquire_ld_global_f' needs target feature sm_70}} + __nvvm_acquire_ld_global_f((__attribute__((address_space(1))) float *)fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.shared.f.acquire.f32.p3f32 + // expected-error@+1 {{'__nvvm_acquire_ld_shared_f' needs target feature sm_70}} + __nvvm_acquire_ld_shared_f((__attribute__((address_space(3))) float *)fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.gen.f.sys.f32.p0f32 + // expected-error@+1 {{'__nvvm_sys_ld_gen_f' needs target feature sm_70}} + __nvvm_sys_ld_gen_f(fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.global.f.sys.f32.p1f32 + // expected-error@+1 {{'__nvvm_sys_ld_global_f' needs target feature sm_70}} + __nvvm_sys_ld_global_f((__attribute__((address_space(1))) float *)fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.shared.f.sys.f32.p3f32 + // expected-error@+1 {{'__nvvm_sys_ld_shared_f' needs target feature sm_70}} + __nvvm_sys_ld_shared_f((__attribute__((address_space(3))) float *)fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.gen.f.acquire.sys.f32.p0f32 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_gen_f' needs target feature sm_70}} + __nvvm_acquire_sys_ld_gen_f(fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.global.f.acquire.sys.f32.p1f32 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_global_f' needs target feature sm_70}} + __nvvm_acquire_sys_ld_global_f((__attribute__((address_space(1))) float *)fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.shared.f.acquire.sys.f32.p3f32 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_shared_f' needs target feature sm_70}} + __nvvm_acquire_sys_ld_shared_f((__attribute__((address_space(3))) float *)fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.gen.f.cta.f32.p0f32 + // expected-error@+1 {{'__nvvm_cta_ld_gen_f' needs target feature sm_70}} + __nvvm_cta_ld_gen_f(fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.global.f.cta.f32.p1f32 + // expected-error@+1 {{'__nvvm_cta_ld_global_f' needs target feature sm_70}} + __nvvm_cta_ld_global_f((__attribute__((address_space(1))) float *)fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.shared.f.cta.f32.p3f32 + // expected-error@+1 {{'__nvvm_cta_ld_shared_f' needs target feature sm_70}} + __nvvm_cta_ld_shared_f((__attribute__((address_space(3))) float *)fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.gen.f.acquire.cta.f32.p0f32 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_gen_f' needs target feature sm_70}} + __nvvm_acquire_cta_ld_gen_f(fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.global.f.acquire.cta.f32.p1f32 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_global_f' needs target feature sm_70}} + __nvvm_acquire_cta_ld_global_f((__attribute__((address_space(1))) float *)fp); + // CHECK_SM70_LP64: call float @llvm.nvvm.ld.shared.f.acquire.cta.f32.p3f32 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_shared_f' needs target feature sm_70}} + __nvvm_acquire_cta_ld_shared_f((__attribute__((address_space(3))) float *)fp); + + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.f.p0f32.f32 + // expected-error@+1 {{'__nvvm_st_gen_f' needs target feature sm_70}} + __nvvm_st_gen_f(fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.f.p1f32.f32 + // expected-error@+1 {{'__nvvm_st_global_f' needs target feature sm_70}} + __nvvm_st_global_f((__attribute__((address_space(1))) float *)fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.f.p3f32.f32 + // expected-error@+1 {{'__nvvm_st_shared_f' needs target feature sm_70}} + __nvvm_st_shared_f((__attribute__((address_space(3))) float *)fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.f.release.p0f32.f32 + // expected-error@+1 {{'__nvvm_release_st_gen_f' needs target feature sm_70}} + __nvvm_release_st_gen_f(fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.f.release.p1f32.f32 + // expected-error@+1 {{'__nvvm_release_st_global_f' needs target feature sm_70}} + __nvvm_release_st_global_f((__attribute__((address_space(1))) float *)fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.f.release.p3f32.f32 + // expected-error@+1 {{'__nvvm_release_st_shared_f' needs target feature sm_70}} + __nvvm_release_st_shared_f((__attribute__((address_space(3))) float *)fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.f.sys.p0f32.f32 + // expected-error@+1 {{'__nvvm_sys_st_gen_f' needs target feature sm_70}} + __nvvm_sys_st_gen_f(fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.f.sys.p1f32.f32 + // expected-error@+1 {{'__nvvm_sys_st_global_f' needs target feature sm_70}} + __nvvm_sys_st_global_f((__attribute__((address_space(1))) float *)fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.f.sys.p3f32.f32 + // expected-error@+1 {{'__nvvm_sys_st_shared_f' needs target feature sm_70}} + __nvvm_sys_st_shared_f((__attribute__((address_space(3))) float *)fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.f.release.sys.p0f32.f32 + // expected-error@+1 {{'__nvvm_release_sys_st_gen_f' needs target feature sm_70}} + __nvvm_release_sys_st_gen_f(fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.f.release.sys.p1f32.f32 + // expected-error@+1 {{'__nvvm_release_sys_st_global_f' needs target feature sm_70}} + __nvvm_release_sys_st_global_f((__attribute__((address_space(1))) float *)fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.f.release.sys.p3f32.f32 + // expected-error@+1 {{'__nvvm_release_sys_st_shared_f' needs target feature sm_70}} + __nvvm_release_sys_st_shared_f((__attribute__((address_space(3))) float *)fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.f.cta.p0f32.f32 + // expected-error@+1 {{'__nvvm_cta_st_gen_f' needs target feature sm_70}} + __nvvm_cta_st_gen_f(fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.f.cta.p1f32.f32 + // expected-error@+1 {{'__nvvm_cta_st_global_f' needs target feature sm_70}} + __nvvm_cta_st_global_f((__attribute__((address_space(1))) float *)fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.f.cta.p3f32.f32 + // expected-error@+1 {{'__nvvm_cta_st_shared_f' needs target feature sm_70}} + __nvvm_cta_st_shared_f((__attribute__((address_space(3))) float *)fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.f.release.cta.p0f32.f32 + // expected-error@+1 {{'__nvvm_release_cta_st_gen_f' needs target feature sm_70}} + __nvvm_release_cta_st_gen_f(fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.f.release.cta.p1f32.f32 + // expected-error@+1 {{'__nvvm_release_cta_st_global_f' needs target feature sm_70}} + __nvvm_release_cta_st_global_f((__attribute__((address_space(1))) float *)fp, f); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.f.release.cta.p3f32.f32 + // expected-error@+1 {{'__nvvm_release_cta_st_shared_f' needs target feature sm_70}} + __nvvm_release_cta_st_shared_f((__attribute__((address_space(3))) float *)fp, f); + + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.gen.f.f64.p0f64 + // expected-error@+1 {{'__nvvm_ld_gen_d' needs target feature sm_70}} + __nvvm_ld_gen_d(dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.global.f.f64.p1f64 + // expected-error@+1 {{'__nvvm_ld_global_d' needs target feature sm_70}} + __nvvm_ld_global_d((__attribute__((address_space(1))) double *)dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.shared.f.f64.p3f64 + // expected-error@+1 {{'__nvvm_ld_shared_d' needs target feature sm_70}} + __nvvm_ld_shared_d((__attribute__((address_space(3))) double *)dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.gen.f.acquire.f64.p0f64 + // expected-error@+1 {{'__nvvm_acquire_ld_gen_d' needs target feature sm_70}} + __nvvm_acquire_ld_gen_d(dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.global.f.acquire.f64.p1f64 + // expected-error@+1 {{'__nvvm_acquire_ld_global_d' needs target feature sm_70}} + __nvvm_acquire_ld_global_d((__attribute__((address_space(1))) double *)dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.shared.f.acquire.f64.p3f64 + // expected-error@+1 {{'__nvvm_acquire_ld_shared_d' needs target feature sm_70}} + __nvvm_acquire_ld_shared_d((__attribute__((address_space(3))) double *)dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.gen.f.sys.f64.p0f64 + // expected-error@+1 {{'__nvvm_sys_ld_gen_d' needs target feature sm_70}} + __nvvm_sys_ld_gen_d(dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.global.f.sys.f64.p1f64 + // expected-error@+1 {{'__nvvm_sys_ld_global_d' needs target feature sm_70}} + __nvvm_sys_ld_global_d((__attribute__((address_space(1))) double *)dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.shared.f.sys.f64.p3f64 + // expected-error@+1 {{'__nvvm_sys_ld_shared_d' needs target feature sm_70}} + __nvvm_sys_ld_shared_d((__attribute__((address_space(3))) double *)dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.gen.f.acquire.sys.f64.p0f64 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_gen_d' needs target feature sm_70}} + __nvvm_acquire_sys_ld_gen_d(dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.global.f.acquire.sys.f64.p1f64 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_global_d' needs target feature sm_70}} + __nvvm_acquire_sys_ld_global_d((__attribute__((address_space(1))) double *)dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.shared.f.acquire.sys.f64.p3f64 + // expected-error@+1 {{'__nvvm_acquire_sys_ld_shared_d' needs target feature sm_70}} + __nvvm_acquire_sys_ld_shared_d((__attribute__((address_space(3))) double *)dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.gen.f.cta.f64.p0f64 + // expected-error@+1 {{'__nvvm_cta_ld_gen_d' needs target feature sm_70}} + __nvvm_cta_ld_gen_d(dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.global.f.cta.f64.p1f64 + // expected-error@+1 {{'__nvvm_cta_ld_global_d' needs target feature sm_70}} + __nvvm_cta_ld_global_d((__attribute__((address_space(1))) double *)dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.shared.f.cta.f64.p3f64 + // expected-error@+1 {{'__nvvm_cta_ld_shared_d' needs target feature sm_70}} + __nvvm_cta_ld_shared_d((__attribute__((address_space(3))) double *)dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.gen.f.acquire.cta.f64.p0f64 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_gen_d' needs target feature sm_70}} + __nvvm_acquire_cta_ld_gen_d(dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.global.f.acquire.cta.f64.p1f64 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_global_d' needs target feature sm_70}} + __nvvm_acquire_cta_ld_global_d((__attribute__((address_space(1))) double *)dfp); + // CHECK_SM70_LP64: call double @llvm.nvvm.ld.shared.f.acquire.cta.f64.p3f64 + // expected-error@+1 {{'__nvvm_acquire_cta_ld_shared_d' needs target feature sm_70}} + __nvvm_acquire_cta_ld_shared_d((__attribute__((address_space(3))) double *)dfp); + + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.f.p0f64.f64 + // expected-error@+1 {{'__nvvm_st_gen_d' needs target feature sm_70}} + __nvvm_st_gen_d(dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.f.p1f64.f64 + // expected-error@+1 {{'__nvvm_st_global_d' needs target feature sm_70}} + __nvvm_st_global_d((__attribute__((address_space(1))) double *)dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.f.p3f64.f64 + // expected-error@+1 {{'__nvvm_st_shared_d' needs target feature sm_70}} + __nvvm_st_shared_d((__attribute__((address_space(3))) double *)dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.f.release.p0f64.f64 + // expected-error@+1 {{'__nvvm_release_st_gen_d' needs target feature sm_70}} + __nvvm_release_st_gen_d(dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.f.release.p1f64.f64 + // expected-error@+1 {{'__nvvm_release_st_global_d' needs target feature sm_70}} + __nvvm_release_st_global_d((__attribute__((address_space(1))) double *)dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.f.release.p3f64.f64 + // expected-error@+1 {{'__nvvm_release_st_shared_d' needs target feature sm_70}} + __nvvm_release_st_shared_d((__attribute__((address_space(3))) double *)dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.f.sys.p0f64.f64 + // expected-error@+1 {{'__nvvm_sys_st_gen_d' needs target feature sm_70}} + __nvvm_sys_st_gen_d(dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.f.sys.p1f64.f64 + // expected-error@+1 {{'__nvvm_sys_st_global_d' needs target feature sm_70}} + __nvvm_sys_st_global_d((__attribute__((address_space(1))) double *)dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.f.sys.p3f64.f64 + // expected-error@+1 {{'__nvvm_sys_st_shared_d' needs target feature sm_70}} + __nvvm_sys_st_shared_d((__attribute__((address_space(3))) double *)dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.f.release.sys.p0f64.f64 + // expected-error@+1 {{'__nvvm_release_sys_st_gen_d' needs target feature sm_70}} + __nvvm_release_sys_st_gen_d(dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.f.release.sys.p1f64.f64 + // expected-error@+1 {{'__nvvm_release_sys_st_global_d' needs target feature sm_70}} + __nvvm_release_sys_st_global_d((__attribute__((address_space(1))) double *)dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.f.release.sys.p3f64.f64 + // expected-error@+1 {{'__nvvm_release_sys_st_shared_d' needs target feature sm_70}} + __nvvm_release_sys_st_shared_d((__attribute__((address_space(3))) double *)dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.f.cta.p0f64.f64 + // expected-error@+1 {{'__nvvm_cta_st_gen_d' needs target feature sm_70}} + __nvvm_cta_st_gen_d(dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.f.cta.p1f64.f64 + // expected-error@+1 {{'__nvvm_cta_st_global_d' needs target feature sm_70}} + __nvvm_cta_st_global_d((__attribute__((address_space(1))) double *)dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.f.cta.p3f64.f64 + // expected-error@+1 {{'__nvvm_cta_st_shared_d' needs target feature sm_70}} + __nvvm_cta_st_shared_d((__attribute__((address_space(3))) double *)dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.gen.f.release.cta.p0f64.f64 + // expected-error@+1 {{'__nvvm_release_cta_st_gen_d' needs target feature sm_70}} + __nvvm_release_cta_st_gen_d(dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.global.f.release.cta.p1f64.f64 + // expected-error@+1 {{'__nvvm_release_cta_st_global_d' needs target feature sm_70}} + __nvvm_release_cta_st_global_d((__attribute__((address_space(1))) double *)dfp, df); + // CHECK_SM70_LP64: call void @llvm.nvvm.st.shared.f.release.cta.p3f64.f64 + // expected-error@+1 {{'__nvvm_release_cta_st_shared_d' needs target feature sm_70}} + __nvvm_release_cta_st_shared_d((__attribute__((address_space(3))) double *)dfp, df); + // CHECK_SM70_LP64: call i32 @llvm.nvvm.atomic.add.gen.i.acquire.i32.p0i32 // expected-error@+1 {{'__nvvm_atom_acquire_add_gen_i' needs target feature sm_70}} __nvvm_atom_acquire_add_gen_i(ip, i); diff --git a/libclc/ptx-nvidiacl/libspirv/SOURCES b/libclc/ptx-nvidiacl/libspirv/SOURCES index 969d7d28c2b7b..31faf012de6fa 100644 --- a/libclc/ptx-nvidiacl/libspirv/SOURCES +++ b/libclc/ptx-nvidiacl/libspirv/SOURCES @@ -98,3 +98,5 @@ atomic/atomic_min.cl atomic/atomic_xchg.cl atomic/atomic_or.cl atomic/atomic_xor.cl +atomic/atomic_load.cl +atomic/atomic_store.cl diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl new file mode 100644 index 0000000000000..cf1bb1dbbd82b --- /dev/null +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl @@ -0,0 +1,84 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +extern int __clc_nvvm_reflect_arch(); + +#define __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, ORDER) \ + switch (scope) { \ + case Subgroup: \ + case Workgroup: { \ + TYPE_NV res = __nvvm##ORDER##_cta_ld##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + (ADDR_SPACE TYPE_NV *)pointer); \ + return *(TYPE *)&res; \ + } \ + case Device: { \ + TYPE_NV res = __nvvm##ORDER##_ld##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + (ADDR_SPACE TYPE_NV *)pointer); \ + return *(TYPE *)&res; \ + } \ + case CrossDevice: \ + default: { \ + TYPE_NV res = __nvvm##ORDER##_sys_ld##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + (ADDR_SPACE TYPE_NV *)pointer); \ + return *(TYPE *)&res; \ + } \ + } + +#define __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ + TYPE_MANGLED_NV, ADDR_SPACE, \ + ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ + _CLC_DECL TYPE \ + _Z18__spirv_AtomicLoadPU3##ADDR_SPACE_MANGLED##K##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ + const volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics) { \ + /* Semantics mask may include memory order, storage class and other info \ +Memory order is stored in the lowest 5 bits */ \ + unsigned int order = semantics & 0x1F; \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + switch (order) { \ + case None: \ + __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, ) \ + case Acquire: \ + __CLC_NVVM_ATOMIC_LOAD_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, _acquire) \ + } \ + } else { \ + if (order == None) { \ + TYPE_NV res = __nvvm_volatile_ld##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + (ADDR_SPACE TYPE_NV *)pointer); \ + return *(TYPE *)&res; \ + } \ + } \ + __builtin_trap(); \ + __builtin_unreachable(); \ + } + +#define __CLC_NVVM_ATOMIC_LOAD(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ + __global, AS1, _global_) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ + __local, AS3, _shared_) + +__CLC_NVVM_ATOMIC_LOAD(int, i, int, i) +__CLC_NVVM_ATOMIC_LOAD(uint, j, int, i) +__CLC_NVVM_ATOMIC_LOAD(long, l, long, l) +__CLC_NVVM_ATOMIC_LOAD(ulong, m, long, l) + +__CLC_NVVM_ATOMIC_LOAD(float, f, float, f) +#ifdef cl_khr_int64_base_atomics +__CLC_NVVM_ATOMIC_LOAD(double, d, double, d) +#endif + +#undef __CLC_NVVM_ATOMIC_LOAD_TYPES +#undef __CLC_NVVM_ATOMIC_LOAD +#undef __CLC_NVVM_ATOMIC_LOAD_IMPL diff --git a/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl new file mode 100644 index 0000000000000..9c57e5b1d7554 --- /dev/null +++ b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_store.cl @@ -0,0 +1,85 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +extern int __clc_nvvm_reflect_arch(); + +#define __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, ORDER) \ + switch (scope) { \ + case Subgroup: \ + case Workgroup: { \ + __nvvm##ORDER##_cta_st##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + (ADDR_SPACE TYPE_NV *)pointer, *(TYPE_NV *)&value); \ + return; \ + } \ + case Device: { \ + __nvvm##ORDER##_st##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + (ADDR_SPACE TYPE_NV *)pointer, *(TYPE_NV *)&value); \ + return; \ + } \ + case CrossDevice: \ + default: { \ + __nvvm##ORDER##_sys_st##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + (ADDR_SPACE TYPE_NV *)pointer, *(TYPE_NV *)&value); \ + return; \ + } \ + } + +#define __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ + TYPE_MANGLED_NV, ADDR_SPACE, \ + ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ + _CLC_DECL void \ + _Z19__spirv_AtomicStorePU3##ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ + volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ + enum MemorySemanticsMask semantics, TYPE value) { \ + /* Semantics mask may include memory order, storage class and other info \ +Memory order is stored in the lowest 5 bits */ \ + unsigned int order = semantics & 0x1F; \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + switch (order) { \ + case None: \ + __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, ) \ + case Release: \ + __CLC_NVVM_ATOMIC_STORE_IMPL_ORDER(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV, \ + _release) \ + } \ + } else { \ + if (order == None) { \ + __nvvm_volatile_st##ADDR_SPACE_NV##TYPE_MANGLED_NV( \ + (ADDR_SPACE TYPE_NV *)pointer, *(TYPE_NV *)&value); \ + return; \ + } \ + } \ + __builtin_trap(); \ + __builtin_unreachable(); \ + } + +#define __CLC_NVVM_ATOMIC_STORE(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \ + __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ + __global, AS1, _global_) \ + __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ + __local, AS3, _shared_) + +__CLC_NVVM_ATOMIC_STORE(int, i, int, i) +__CLC_NVVM_ATOMIC_STORE(uint, j, int, i) +__CLC_NVVM_ATOMIC_STORE(long, l, long, l) +__CLC_NVVM_ATOMIC_STORE(ulong, m, long, l) + +__CLC_NVVM_ATOMIC_STORE(float, f, float, f) +#ifdef cl_khr_int64_base_atomics +__CLC_NVVM_ATOMIC_STORE(double, d, double, d) +#endif + +#undef __CLC_NVVM_ATOMIC_STORE_TYPES +#undef __CLC_NVVM_ATOMIC_STORE +#undef __CLC_NVVM_ATOMIC_STORE_IMPL diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index fb7598d6ac75d..d13d3ad7c7d9a 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -1213,6 +1213,14 @@ let TargetPrefix = "nvvm" in { [LLVMAnyPointerType, llvm_i32_ty], [IntrArgMemOnly, NoCapture>]>; + class SCOPED_ATOMIC_ld_impl + : Intrinsic<[elty], + [LLVMAnyPointerType>], + [IntrArgMemOnly, NoCapture>]>; + class SCOPED_ATOMIC_st_impl + : Intrinsic<[], + [LLVMAnyPointerType>, elty], + [IntrArgMemOnly, NoCapture>]>; class SCOPED_ATOMIC2_impl : Intrinsic<[elty], [LLVMAnyPointerType>, LLVMMatchType<0>], @@ -1223,6 +1231,16 @@ let TargetPrefix = "nvvm" in { LLVMMatchType<0>], [IntrArgMemOnly, NoCapture>]>; + multiclass PTXAtomicWithScope_ld { + def "" : SCOPED_ATOMIC_ld_impl; + def _cta : SCOPED_ATOMIC_ld_impl; + def _sys : SCOPED_ATOMIC_ld_impl; + } + multiclass PTXAtomicWithScope_st { + def "" : SCOPED_ATOMIC_st_impl; + def _cta : SCOPED_ATOMIC_st_impl; + def _sys : SCOPED_ATOMIC_st_impl; + } multiclass PTXAtomicWithScope2 { def "" : SCOPED_ATOMIC2_impl; def _cta : SCOPED_ATOMIC2_impl; @@ -1233,6 +1251,16 @@ let TargetPrefix = "nvvm" in { def _cta : SCOPED_ATOMIC3_impl; def _sys : SCOPED_ATOMIC3_impl; } + multiclass PTXLdWithScope_sem { + defm "": PTXAtomicWithScope_ld; + defm _acquire: PTXAtomicWithScope_ld; + def _volatile: SCOPED_ATOMIC_ld_impl; + } + multiclass PTXStWithScope_sem { + defm "": PTXAtomicWithScope_st; + defm _release: PTXAtomicWithScope_st; + def _volatile: SCOPED_ATOMIC_st_impl; + } multiclass PTXAtomicWithScope2_sem { defm "": PTXAtomicWithScope2; defm _acquire: PTXAtomicWithScope2; @@ -1245,10 +1273,20 @@ let TargetPrefix = "nvvm" in { defm _release: PTXAtomicWithScope3; defm _acq_rel: PTXAtomicWithScope3; } + multiclass PTXLdWithScope_fi { + defm _f: PTXLdWithScope_sem; + defm _i: PTXLdWithScope_sem; + } + multiclass PTXStWithScope_fi { + defm _f: PTXStWithScope_sem; + defm _i: PTXStWithScope_sem; + } multiclass PTXAtomicWithScope2_fi { defm _f: PTXAtomicWithScope2_sem; defm _i: PTXAtomicWithScope2_sem; } + defm int_nvvm_ld_gen : PTXLdWithScope_fi; + defm int_nvvm_st_gen : PTXStWithScope_fi; defm int_nvvm_atomic_add_gen : PTXAtomicWithScope2_fi; defm int_nvvm_atomic_inc_gen_i : PTXAtomicWithScope2_sem; defm int_nvvm_atomic_dec_gen_i : PTXAtomicWithScope2_sem; @@ -1262,6 +1300,8 @@ let TargetPrefix = "nvvm" in { defm int_nvvm_atomic_and_gen_i : PTXAtomicWithScope2_sem; defm int_nvvm_atomic_cas_gen_i : PTXAtomicWithScope3_sem; + defm int_nvvm_ld_shared : PTXLdWithScope_fi; + defm int_nvvm_st_shared : PTXStWithScope_fi; defm int_nvvm_atomic_add_shared : PTXAtomicWithScope2_fi; defm int_nvvm_atomic_inc_shared_i : PTXAtomicWithScope2_sem; defm int_nvvm_atomic_dec_shared_i : PTXAtomicWithScope2_sem; @@ -1275,6 +1315,8 @@ let TargetPrefix = "nvvm" in { defm int_nvvm_atomic_and_shared_i : PTXAtomicWithScope2_sem; defm int_nvvm_atomic_cas_shared_i : PTXAtomicWithScope3_sem; + defm int_nvvm_ld_global : PTXLdWithScope_fi; + defm int_nvvm_st_global : PTXStWithScope_fi; defm int_nvvm_atomic_add_global : PTXAtomicWithScope2_fi; defm int_nvvm_atomic_inc_global_i : PTXAtomicWithScope2_sem; defm int_nvvm_atomic_dec_global_i : PTXAtomicWithScope2_sem; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index d7c8acd146082..94289f0b58016 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1698,7 +1698,44 @@ class ATOM23_impl Preds, Requires; // Define instruction variants for all addressing modes. -multiclass ATOM2P_impl Preds> { + let AddedComplexity = 1 in { + def : ATOM23_impl; + def : ATOM23_impl; + } +} + +multiclass ATOM_StP_impl Preds> { + let AddedComplexity = 1 in { + def : NVPTXInst<(outs), (ins Int32Regs:$src, regclass:$b), + AsmStr, + [(Intr Int32Regs:$src, regclass:$b)]>, + Requires; + def : NVPTXInst<(outs), (ins Int64Regs:$src, regclass:$b), + AsmStr, + [(Intr Int64Regs:$src, regclass:$b)]>, + Requires; + } + def : NVPTXInst<(outs), (ins Int32Regs:$src, ImmType:$b), + AsmStr, + [(Intr Int32Regs:$src, (ImmTy Imm:$b))]>, + Requires; + def : NVPTXInst<(outs), (ins Int64Regs:$src, ImmType:$b), + AsmStr, + [(Intr Int64Regs:$src, (ImmTy Imm:$b))]>, + Requires; +} + +multiclass ATOM2P_impl Preds> { @@ -1757,6 +1794,35 @@ multiclass ATOM3P_impl Preds> { + defm : ATOM_LdP_impl<"ld." # SemStr + # !if(!eq(SemStr, "volatile"), "", "." # ScopeStr) + # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) + # "." # TypeStr + # " \t$result, [$src];", + !cast( + "int_nvvm_ld_" # SpaceStr # "_" # IntTypeStr + # !if(!eq(SemStr, "relaxed"), "", "_" # SemStr) + # !if(!or(!eq(ScopeStr, "gpu"), !eq(ScopeStr, "")), "", "_" # ScopeStr)), + regclass, Preds>; +} +multiclass ATOM_StN_impl Preds> { + defm : ATOM_StP_impl<"st." # SemStr + # !if(!eq(SemStr, "volatile"), "", "." # ScopeStr) + # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) + # "." # TypeStr + # " \t[$src], $b;", + !cast( + "int_nvvm_st_" # SpaceStr # "_" # IntTypeStr + # !if(!eq(SemStr, "relaxed"), "", "_" # SemStr) + # !if(!or(!eq(ScopeStr, "gpu"), !eq(ScopeStr, "")), "", "_" # ScopeStr)), + regclass, ImmType, Imm, ImmTy, Preds>; +} multiclass ATOM2N_impl Preds> { + defm _gen : ATOM_LdN_impl; + defm _global : ATOM_LdN_impl; + defm _shared : ATOM_LdN_impl; +} +multiclass ATOM_StN_spaces_impl Preds> { + defm _gen : ATOM_StN_impl; + defm _global : ATOM_StN_impl; + defm _shared : ATOM_StN_impl; +} multiclass ATOM2N_spaces_impl; } -// Constructs variants for different address spaces. +// Constructs variants for different semantic orders. +multiclass ATOM_LdA_impl Preds> { + defm _relaxed_ : ATOM_LdN_spaces_impl; + defm _acquire_ : ATOM_LdN_spaces_impl; +} +multiclass ATOM_StA_impl Preds> { + defm _relaxed_ : ATOM_StN_spaces_impl; + defm _release_ : ATOM_StN_spaces_impl; +} multiclass ATOM2A_impl Preds> { @@ -1841,6 +1944,32 @@ multiclass ATOM3A_impl Preds> { + defm _volatile_ : ATOM_LdN_spaces_impl; + defm "" : ATOM_LdA_impl; + defm _cta : ATOM_LdA_impl; + defm _sys : ATOM_LdA_impl; +} +multiclass ATOM_StS_impl Preds> { + defm _volatile_ : ATOM_StN_spaces_impl; + defm "" : ATOM_StA_impl; + defm _cta : ATOM_StA_impl; + defm _sys : ATOM_StA_impl; +} multiclass ATOM2S_impl Preds> { @@ -1868,6 +1997,22 @@ multiclass ATOM3S_impl; } +// ld +multiclass ATOM_ld_impl { + defm _s32 : ATOM_LdS_impl<"i", "s32", Int32Regs, []>; + defm _u64 : ATOM_LdS_impl<"i", "s64", Int64Regs, []>; + defm _f32 : ATOM_LdS_impl<"f", "f32", Float32Regs, []>; + defm _f64 : ATOM_LdS_impl<"f", "f64", Float64Regs, []>; +} +// st +multiclass ATOM_st_impl { + defm _s32 : ATOM_StS_impl<"i", "s32", Int32Regs, i32imm, imm, i32, []>; + defm _u64 : ATOM_StS_impl<"i", "s64", Int64Regs, i64imm, imm, i64, []>; + defm _f32 : ATOM_StS_impl<"f", "f32", Float32Regs, f32imm, fpimm, f32, + []>; + defm _f64 : ATOM_StS_impl<"f", "f64", Float64Regs, f64imm, fpimm, f64, + []>; +} // atom.add multiclass ATOM2_add_impl { defm _s32 : ATOM2S_impl; @@ -1914,6 +2059,8 @@ multiclass ATOM3_cas_impl { defm _b64 : ATOM3S_impl; } +defm INT_PTX_LD : ATOM_ld_impl; +defm INT_PTX_ST : ATOM_st_impl; defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">; defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">; defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">; diff --git a/llvm/test/CodeGen/NVPTX/atomics-with-semantics.ll b/llvm/test/CodeGen/NVPTX/atomics-with-semantics.ll index a11b9a4a6c3d5..2f565b71db79d 100644 --- a/llvm/test/CodeGen/NVPTX/atomics-with-semantics.ll +++ b/llvm/test/CodeGen/NVPTX/atomics-with-semantics.ll @@ -2126,6 +2126,509 @@ entry: ; CHECK: atom.acq_rel.cta.shared.cas.b64 %tmp701 = tail call i64 @llvm.nvvm.atomic.cas.shared.i.acq.rel.cta.i64.p3i64(i64 addrspace(3)* %llp3, i64 %ll, i64 %ll); + ; CHECK: ld.relaxed.gpu.s32 + %tmpldst0 = tail call i32 @llvm.nvvm.ld.gen.i.i32.p0(i32* %ip); + + ; CHECK: ld.relaxed.gpu.global.s32 + %tmpldst1 = tail call i32 @llvm.nvvm.ld.global.i.i32.p1(i32 addrspace(1)* %ip1); + + ; CHECK: ld.relaxed.gpu.shared.s32 + %tmpldst2 = tail call i32 @llvm.nvvm.ld.shared.i.i32.p3(i32 addrspace(3)* %ip3); + + ; CHECK: ld.acquire.gpu.s32 + %tmpldst3 = tail call i32 @llvm.nvvm.ld.gen.i.acquire.i32.p0(i32* %ip); + + ; CHECK: ld.acquire.gpu.global.s32 + %tmpldst4 = tail call i32 @llvm.nvvm.ld.global.i.acquire.i32.p1(i32 addrspace(1)* %ip1); + + ; CHECK: ld.acquire.gpu.shared.s32 + %tmpldst5 = tail call i32 @llvm.nvvm.ld.shared.i.acquire.i32.p3(i32 addrspace(3)* %ip3); + + ; CHECK: ld.relaxed.sys.s32 + %tmpldst6 = tail call i32 @llvm.nvvm.ld.gen.i.sys.i32.p0(i32* %ip); + + ; CHECK: ld.relaxed.sys.global.s32 + %tmpldst7 = tail call i32 @llvm.nvvm.ld.global.i.sys.i32.p1(i32 addrspace(1)* %ip1); + + ; CHECK: ld.relaxed.sys.shared.s32 + %tmpldst8 = tail call i32 @llvm.nvvm.ld.shared.i.sys.i32.p3(i32 addrspace(3)* %ip3); + + ; CHECK: ld.acquire.sys.s32 + %tmpldst9 = tail call i32 @llvm.nvvm.ld.gen.i.acquire.sys.i32.p0(i32* %ip); + + ; CHECK: ld.acquire.sys.global.s32 + %tmpldst10 = tail call i32 @llvm.nvvm.ld.global.i.acquire.sys.i32.p1(i32 addrspace(1)* %ip1); + + ; CHECK: ld.acquire.sys.shared.s32 + %tmpldst11 = tail call i32 @llvm.nvvm.ld.shared.i.acquire.sys.i32.p3(i32 addrspace(3)* %ip3); + + ; CHECK: ld.relaxed.cta.s32 + %tmpldst12 = tail call i32 @llvm.nvvm.ld.gen.i.cta.i32.p0(i32* %ip); + + ; CHECK: ld.relaxed.cta.global.s32 + %tmpldst13 = tail call i32 @llvm.nvvm.ld.global.i.cta.i32.p1(i32 addrspace(1)* %ip1); + + ; CHECK: ld.relaxed.cta.shared.s32 + %tmpldst14 = tail call i32 @llvm.nvvm.ld.shared.i.cta.i32.p3(i32 addrspace(3)* %ip3); + + ; CHECK: ld.acquire.cta.s32 + %tmpldst15 = tail call i32 @llvm.nvvm.ld.gen.i.acquire.cta.i32.p0(i32* %ip); + + ; CHECK: ld.acquire.cta.global.s32 + %tmpldst16 = tail call i32 @llvm.nvvm.ld.global.i.acquire.cta.i32.p1(i32 addrspace(1)* %ip1); + + ; CHECK: ld.acquire.cta.shared.s32 + %tmpldst17 = tail call i32 @llvm.nvvm.ld.shared.i.acquire.cta.i32.p3(i32 addrspace(3)* %ip3); + + ; CHECK: ld.relaxed.gpu.s64 + %tmpldst18 = tail call i64 @llvm.nvvm.ld.gen.i.i64.p0(i64* %llp); + + ; CHECK: ld.relaxed.gpu.global.s64 + %tmpldst19 = tail call i64 @llvm.nvvm.ld.global.i.i64.p1(i64 addrspace(1)* %llp1); + + ; CHECK: ld.relaxed.gpu.shared.s64 + %tmpldst20 = tail call i64 @llvm.nvvm.ld.shared.i.i64.p3(i64 addrspace(3)* %llp3); + + ; CHECK: ld.acquire.gpu.s64 + %tmpldst21 = tail call i64 @llvm.nvvm.ld.gen.i.acquire.i64.p0(i64* %llp); + + ; CHECK: ld.acquire.gpu.global.s64 + %tmpldst22 = tail call i64 @llvm.nvvm.ld.global.i.acquire.i64.p1(i64 addrspace(1)* %llp1); + + ; CHECK: ld.acquire.gpu.shared.s64 + %tmpldst23 = tail call i64 @llvm.nvvm.ld.shared.i.acquire.i64.p3(i64 addrspace(3)* %llp3); + + ; CHECK: ld.relaxed.sys.s64 + %tmpldst24 = tail call i64 @llvm.nvvm.ld.gen.i.sys.i64.p0(i64* %llp); + + ; CHECK: ld.relaxed.sys.global.s64 + %tmpldst25 = tail call i64 @llvm.nvvm.ld.global.i.sys.i64.p1(i64 addrspace(1)* %llp1); + + ; CHECK: ld.relaxed.sys.shared.s64 + %tmpldst26 = tail call i64 @llvm.nvvm.ld.shared.i.sys.i64.p3(i64 addrspace(3)* %llp3); + + ; CHECK: ld.acquire.sys.s64 + %tmpldst27 = tail call i64 @llvm.nvvm.ld.gen.i.acquire.sys.i64.p0(i64* %llp); + + ; CHECK: ld.acquire.sys.global.s64 + %tmpldst28 = tail call i64 @llvm.nvvm.ld.global.i.acquire.sys.i64.p1(i64 addrspace(1)* %llp1); + + ; CHECK: ld.acquire.sys.shared.s64 + %tmpldst29 = tail call i64 @llvm.nvvm.ld.shared.i.acquire.sys.i64.p3(i64 addrspace(3)* %llp3); + + ; CHECK: ld.relaxed.cta.s64 + %tmpldst30 = tail call i64 @llvm.nvvm.ld.gen.i.cta.i64.p0(i64* %llp); + + ; CHECK: ld.relaxed.cta.global.s64 + %tmpldst31 = tail call i64 @llvm.nvvm.ld.global.i.cta.i64.p1(i64 addrspace(1)* %llp1); + + ; CHECK: ld.relaxed.cta.shared.s64 + %tmpldst32 = tail call i64 @llvm.nvvm.ld.shared.i.cta.i64.p3(i64 addrspace(3)* %llp3); + + ; CHECK: ld.acquire.cta.s64 + %tmpldst33 = tail call i64 @llvm.nvvm.ld.gen.i.acquire.cta.i64.p0(i64* %llp); + + ; CHECK: ld.acquire.cta.global.s64 + %tmpldst34 = tail call i64 @llvm.nvvm.ld.global.i.acquire.cta.i64.p1(i64 addrspace(1)* %llp1); + + ; CHECK: ld.acquire.cta.shared.s64 + %tmpldst35 = tail call i64 @llvm.nvvm.ld.shared.i.acquire.cta.i64.p3(i64 addrspace(3)* %llp3); + + ; CHECK: ld.relaxed.gpu.f32 + %tmpldst36 = tail call float @llvm.nvvm.ld.gen.f.f32.p0(float* %fp); + + ; CHECK: ld.relaxed.gpu.global.f32 + %tmpldst37 = tail call float @llvm.nvvm.ld.global.f.f32.p1(float addrspace(1)* %fp1); + + ; CHECK: ld.relaxed.gpu.shared.f32 + %tmpldst38 = tail call float @llvm.nvvm.ld.shared.f.f32.p3(float addrspace(3)* %fp3); + + ; CHECK: ld.acquire.gpu.f32 + %tmpldst39 = tail call float @llvm.nvvm.ld.gen.f.acquire.f32.p0(float* %fp); + + ; CHECK: ld.acquire.gpu.global.f32 + %tmpldst40 = tail call float @llvm.nvvm.ld.global.f.acquire.f32.p1(float addrspace(1)* %fp1); + + ; CHECK: ld.acquire.gpu.shared.f32 + %tmpldst41 = tail call float @llvm.nvvm.ld.shared.f.acquire.f32.p3(float addrspace(3)* %fp3); + + ; CHECK: ld.relaxed.sys.f32 + %tmpldst42 = tail call float @llvm.nvvm.ld.gen.f.sys.f32.p0(float* %fp); + + ; CHECK: ld.relaxed.sys.global.f32 + %tmpldst43 = tail call float @llvm.nvvm.ld.global.f.sys.f32.p1(float addrspace(1)* %fp1); + + ; CHECK: ld.relaxed.sys.shared.f32 + %tmpldst44 = tail call float @llvm.nvvm.ld.shared.f.sys.f32.p3(float addrspace(3)* %fp3); + + ; CHECK: ld.acquire.sys.f32 + %tmpldst45 = tail call float @llvm.nvvm.ld.gen.f.acquire.sys.f32.p0(float* %fp); + + ; CHECK: ld.acquire.sys.global.f32 + %tmpldst46 = tail call float @llvm.nvvm.ld.global.f.acquire.sys.f32.p1(float addrspace(1)* %fp1); + + ; CHECK: ld.acquire.sys.shared.f32 + %tmpldst47 = tail call float @llvm.nvvm.ld.shared.f.acquire.sys.f32.p3(float addrspace(3)* %fp3); + + ; CHECK: ld.relaxed.cta.f32 + %tmpldst48 = tail call float @llvm.nvvm.ld.gen.f.cta.f32.p0(float* %fp); + + ; CHECK: ld.relaxed.cta.global.f32 + %tmpldst49 = tail call float @llvm.nvvm.ld.global.f.cta.f32.p1(float addrspace(1)* %fp1); + + ; CHECK: ld.relaxed.cta.shared.f32 + %tmpldst50 = tail call float @llvm.nvvm.ld.shared.f.cta.f32.p3(float addrspace(3)* %fp3); + + ; CHECK: ld.acquire.cta.f32 + %tmpldst51 = tail call float @llvm.nvvm.ld.gen.f.acquire.cta.f32.p0(float* %fp); + + ; CHECK: ld.acquire.cta.global.f32 + %tmpldst52 = tail call float @llvm.nvvm.ld.global.f.acquire.cta.f32.p1(float addrspace(1)* %fp1); + + ; CHECK: ld.acquire.cta.shared.f32 + %tmpldst53 = tail call float @llvm.nvvm.ld.shared.f.acquire.cta.f32.p3(float addrspace(3)* %fp3); + + ; CHECK: ld.relaxed.gpu.f64 + %tmpldst54 = tail call double @llvm.nvvm.ld.gen.f.f64.p0(double* %dfp); + + ; CHECK: ld.relaxed.gpu.global.f64 + %tmpldst55 = tail call double @llvm.nvvm.ld.global.f.f64.p1(double addrspace(1)* %dfp1); + + ; CHECK: ld.relaxed.gpu.shared.f64 + %tmpldst56 = tail call double @llvm.nvvm.ld.shared.f.f64.p3(double addrspace(3)* %dfp3); + + ; CHECK: ld.acquire.gpu.f64 + %tmpldst57 = tail call double @llvm.nvvm.ld.gen.f.acquire.f64.p0(double* %dfp); + + ; CHECK: ld.acquire.gpu.global.f64 + %tmpldst58 = tail call double @llvm.nvvm.ld.global.f.acquire.f64.p1(double addrspace(1)* %dfp1); + + ; CHECK: ld.acquire.gpu.shared.f64 + %tmpldst59 = tail call double @llvm.nvvm.ld.shared.f.acquire.f64.p3(double addrspace(3)* %dfp3); + + ; CHECK: ld.relaxed.sys.f64 + %tmpldst60 = tail call double @llvm.nvvm.ld.gen.f.sys.f64.p0(double* %dfp); + + ; CHECK: ld.relaxed.sys.global.f64 + %tmpldst61 = tail call double @llvm.nvvm.ld.global.f.sys.f64.p1(double addrspace(1)* %dfp1); + + ; CHECK: ld.relaxed.sys.shared.f64 + %tmpldst62 = tail call double @llvm.nvvm.ld.shared.f.sys.f64.p3(double addrspace(3)* %dfp3); + + ; CHECK: ld.acquire.sys.f64 + %tmpldst63 = tail call double @llvm.nvvm.ld.gen.f.acquire.sys.f64.p0(double* %dfp); + + ; CHECK: ld.acquire.sys.global.f64 + %tmpldst64 = tail call double @llvm.nvvm.ld.global.f.acquire.sys.f64.p1(double addrspace(1)* %dfp1); + + ; CHECK: ld.acquire.sys.shared.f64 + %tmpldst65 = tail call double @llvm.nvvm.ld.shared.f.acquire.sys.f64.p3(double addrspace(3)* %dfp3); + + ; CHECK: ld.relaxed.cta.f64 + %tmpldst66 = tail call double @llvm.nvvm.ld.gen.f.cta.f64.p0(double* %dfp); + + ; CHECK: ld.relaxed.cta.global.f64 + %tmpldst67 = tail call double @llvm.nvvm.ld.global.f.cta.f64.p1(double addrspace(1)* %dfp1); + + ; CHECK: ld.relaxed.cta.shared.f64 + %tmpldst68 = tail call double @llvm.nvvm.ld.shared.f.cta.f64.p3(double addrspace(3)* %dfp3); + + ; CHECK: ld.acquire.cta.f64 + %tmpldst69 = tail call double @llvm.nvvm.ld.gen.f.acquire.cta.f64.p0(double* %dfp); + + ; CHECK: ld.acquire.cta.global.f64 + %tmpldst70 = tail call double @llvm.nvvm.ld.global.f.acquire.cta.f64.p1(double addrspace(1)* %dfp1); + + ; CHECK: ld.acquire.cta.shared.f64 + %tmpldst71 = tail call double @llvm.nvvm.ld.shared.f.acquire.cta.f64.p3(double addrspace(3)* %dfp3); + + ; CHECK: st.relaxed.gpu.s32 + tail call void @llvm.nvvm.st.gen.i.p0i32.i32(i32* %ip, i32 %i); + + ; CHECK: st.relaxed.gpu.global.s32 + tail call void @llvm.nvvm.st.global.i.p1i32.i32(i32 addrspace(1)* %ip1, i32 %i); + + ; CHECK: st.relaxed.gpu.shared.s32 + tail call void @llvm.nvvm.st.shared.i.p3i32.i32(i32 addrspace(3)* %ip3, i32 %i); + + ; CHECK: st.release.gpu.s32 + tail call void @llvm.nvvm.st.gen.i.release.p0i32.i32(i32* %ip, i32 %i); + + ; CHECK: st.release.gpu.global.s32 + tail call void @llvm.nvvm.st.global.i.release.p1i32.i32(i32 addrspace(1)* %ip1, i32 %i); + + ; CHECK: st.release.gpu.shared.s32 + tail call void @llvm.nvvm.st.shared.i.release.p3i32.i32(i32 addrspace(3)* %ip3, i32 %i); + + ; CHECK: st.relaxed.sys.s32 + tail call void @llvm.nvvm.st.gen.i.sys.p0i32.i32(i32* %ip, i32 %i); + + ; CHECK: st.relaxed.sys.global.s32 + tail call void @llvm.nvvm.st.global.i.sys.p1i32.i32(i32 addrspace(1)* %ip1, i32 %i); + + ; CHECK: st.relaxed.sys.shared.s32 + tail call void @llvm.nvvm.st.shared.i.sys.p3i32.i32(i32 addrspace(3)* %ip3, i32 %i); + + ; CHECK: st.release.sys.s32 + tail call void @llvm.nvvm.st.gen.i.release.sys.p0i32.i32(i32* %ip, i32 %i); + + ; CHECK: st.release.sys.global.s32 + tail call void @llvm.nvvm.st.global.i.release.sys.p1i32.i32(i32 addrspace(1)* %ip1, i32 %i); + + ; CHECK: st.release.sys.shared.s32 + tail call void @llvm.nvvm.st.shared.i.release.sys.p3i32.i32(i32 addrspace(3)* %ip3, i32 %i); + + ; CHECK: st.relaxed.cta.s32 + tail call void @llvm.nvvm.st.gen.i.cta.p0i32.i32(i32* %ip, i32 %i); + + ; CHECK: st.relaxed.cta.global.s32 + tail call void @llvm.nvvm.st.global.i.cta.p1i32.i32(i32 addrspace(1)* %ip1, i32 %i); + + ; CHECK: st.relaxed.cta.shared.s32 + tail call void @llvm.nvvm.st.shared.i.cta.p3i32.i32(i32 addrspace(3)* %ip3, i32 %i); + + ; CHECK: st.release.cta.s32 + tail call void @llvm.nvvm.st.gen.i.release.cta.p0i32.i32(i32* %ip, i32 %i); + + ; CHECK: st.release.cta.global.s32 + tail call void @llvm.nvvm.st.global.i.release.cta.p1i32.i32(i32 addrspace(1)* %ip1, i32 %i); + + ; CHECK: st.release.cta.shared.s32 + tail call void @llvm.nvvm.st.shared.i.release.cta.p3i32.i32(i32 addrspace(3)* %ip3, i32 %i); + + ; CHECK: st.relaxed.gpu.s64 + tail call void @llvm.nvvm.st.gen.i.p0i64.i64(i64* %llp, i64 %ll); + + ; CHECK: st.relaxed.gpu.global.s64 + tail call void @llvm.nvvm.st.global.i.p1i64.i64(i64 addrspace(1)* %llp1, i64 %ll); + + ; CHECK: st.relaxed.gpu.shared.s64 + tail call void @llvm.nvvm.st.shared.i.p3i64.i64(i64 addrspace(3)* %llp3, i64 %ll); + + ; CHECK: st.release.gpu.s64 + tail call void @llvm.nvvm.st.gen.i.release.p0i64.i64(i64* %llp, i64 %ll); + + ; CHECK: st.release.gpu.global.s64 + tail call void @llvm.nvvm.st.global.i.release.p1i64.i64(i64 addrspace(1)* %llp1, i64 %ll); + + ; CHECK: st.release.gpu.shared.s64 + tail call void @llvm.nvvm.st.shared.i.release.p3i64.i64(i64 addrspace(3)* %llp3, i64 %ll); + + ; CHECK: st.relaxed.sys.s64 + tail call void @llvm.nvvm.st.gen.i.sys.p0i64.i64(i64* %llp, i64 %ll); + + ; CHECK: st.relaxed.sys.global.s64 + tail call void @llvm.nvvm.st.global.i.sys.p1i64.i64(i64 addrspace(1)* %llp1, i64 %ll); + + ; CHECK: st.relaxed.sys.shared.s64 + tail call void @llvm.nvvm.st.shared.i.sys.p3i64.i64(i64 addrspace(3)* %llp3, i64 %ll); + + ; CHECK: st.release.sys.s64 + tail call void @llvm.nvvm.st.gen.i.release.sys.p0i64.i64(i64* %llp, i64 %ll); + + ; CHECK: st.release.sys.global.s64 + tail call void @llvm.nvvm.st.global.i.release.sys.p1i64.i64(i64 addrspace(1)* %llp1, i64 %ll); + + ; CHECK: st.release.sys.shared.s64 + tail call void @llvm.nvvm.st.shared.i.release.sys.p3i64.i64(i64 addrspace(3)* %llp3, i64 %ll); + + ; CHECK: st.relaxed.cta.s64 + tail call void @llvm.nvvm.st.gen.i.cta.p0i64.i64(i64* %llp, i64 %ll); + + ; CHECK: st.relaxed.cta.global.s64 + tail call void @llvm.nvvm.st.global.i.cta.p1i64.i64(i64 addrspace(1)* %llp1, i64 %ll); + + ; CHECK: st.relaxed.cta.shared.s64 + tail call void @llvm.nvvm.st.shared.i.cta.p3i64.i64(i64 addrspace(3)* %llp3, i64 %ll); + + ; CHECK: st.release.cta.s64 + tail call void @llvm.nvvm.st.gen.i.release.cta.p0i64.i64(i64* %llp, i64 %ll); + + ; CHECK: st.release.cta.global.s64 + tail call void @llvm.nvvm.st.global.i.release.cta.p1i64.i64(i64 addrspace(1)* %llp1, i64 %ll); + + ; CHECK: st.release.cta.shared.s64 + tail call void @llvm.nvvm.st.shared.i.release.cta.p3i64.i64(i64 addrspace(3)* %llp3, i64 %ll); + + ; CHECK: st.relaxed.gpu.f32 + tail call void @llvm.nvvm.st.gen.f.p0f32.f32(float* %fp, float %f); + + ; CHECK: st.relaxed.gpu.global.f32 + tail call void @llvm.nvvm.st.global.f.p1f32.f32(float addrspace(1)* %fp1, float %f); + + ; CHECK: st.relaxed.gpu.shared.f32 + tail call void @llvm.nvvm.st.shared.f.p3f32.f32(float addrspace(3)* %fp3, float %f); + + ; CHECK: st.release.gpu.f32 + tail call void @llvm.nvvm.st.gen.f.release.p0f32.f32(float* %fp, float %f); + + ; CHECK: st.release.gpu.global.f32 + tail call void @llvm.nvvm.st.global.f.release.p1f32.f32(float addrspace(1)* %fp1, float %f); + + ; CHECK: st.release.gpu.shared.f32 + tail call void @llvm.nvvm.st.shared.f.release.p3f32.f32(float addrspace(3)* %fp3, float %f); + + ; CHECK: st.relaxed.sys.f32 + tail call void @llvm.nvvm.st.gen.f.sys.p0f32.f32(float* %fp, float %f); + + ; CHECK: st.relaxed.sys.global.f32 + tail call void @llvm.nvvm.st.global.f.sys.p1f32.f32(float addrspace(1)* %fp1, float %f); + + ; CHECK: st.relaxed.sys.shared.f32 + tail call void @llvm.nvvm.st.shared.f.sys.p3f32.f32(float addrspace(3)* %fp3, float %f); + + ; CHECK: st.release.sys.f32 + tail call void @llvm.nvvm.st.gen.f.release.sys.p0f32.f32(float* %fp, float %f); + + ; CHECK: st.release.sys.global.f32 + tail call void @llvm.nvvm.st.global.f.release.sys.p1f32.f32(float addrspace(1)* %fp1, float %f); + + ; CHECK: st.release.sys.shared.f32 + tail call void @llvm.nvvm.st.shared.f.release.sys.p3f32.f32(float addrspace(3)* %fp3, float %f); + + ; CHECK: st.relaxed.cta.f32 + tail call void @llvm.nvvm.st.gen.f.cta.p0f32.f32(float* %fp, float %f); + + ; CHECK: st.relaxed.cta.global.f32 + tail call void @llvm.nvvm.st.global.f.cta.p1f32.f32(float addrspace(1)* %fp1, float %f); + + ; CHECK: st.relaxed.cta.shared.f32 + tail call void @llvm.nvvm.st.shared.f.cta.p3f32.f32(float addrspace(3)* %fp3, float %f); + + ; CHECK: st.release.cta.f32 + tail call void @llvm.nvvm.st.gen.f.release.cta.p0f32.f32(float* %fp, float %f); + + ; CHECK: st.release.cta.global.f32 + tail call void @llvm.nvvm.st.global.f.release.cta.p1f32.f32(float addrspace(1)* %fp1, float %f); + + ; CHECK: st.release.cta.shared.f32 + tail call void @llvm.nvvm.st.shared.f.release.cta.p3f32.f32(float addrspace(3)* %fp3, float %f); + + ; CHECK: st.relaxed.gpu.f64 + tail call void @llvm.nvvm.st.gen.f.p0f64.f64(double* %dfp, double %df); + + ; CHECK: st.relaxed.gpu.global.f64 + tail call void @llvm.nvvm.st.global.f.p1f64.f64(double addrspace(1)* %dfp1, double %df); + + ; CHECK: st.relaxed.gpu.shared.f64 + tail call void @llvm.nvvm.st.shared.f.p3f64.f64(double addrspace(3)* %dfp3, double %df); + + ; CHECK: st.release.gpu.f64 + tail call void @llvm.nvvm.st.gen.f.release.p0f64.f64(double* %dfp, double %df); + + ; CHECK: st.release.gpu.global.f64 + tail call void @llvm.nvvm.st.global.f.release.p1f64.f64(double addrspace(1)* %dfp1, double %df); + + ; CHECK: st.release.gpu.shared.f64 + tail call void @llvm.nvvm.st.shared.f.release.p3f64.f64(double addrspace(3)* %dfp3, double %df); + + ; CHECK: st.relaxed.sys.f64 + tail call void @llvm.nvvm.st.gen.f.sys.p0f64.f64(double* %dfp, double %df); + + ; CHECK: st.relaxed.sys.global.f64 + tail call void @llvm.nvvm.st.global.f.sys.p1f64.f64(double addrspace(1)* %dfp1, double %df); + + ; CHECK: st.relaxed.sys.shared.f64 + tail call void @llvm.nvvm.st.shared.f.sys.p3f64.f64(double addrspace(3)* %dfp3, double %df); + + ; CHECK: st.release.sys.f64 + tail call void @llvm.nvvm.st.gen.f.release.sys.p0f64.f64(double* %dfp, double %df); + + ; CHECK: st.release.sys.global.f64 + tail call void @llvm.nvvm.st.global.f.release.sys.p1f64.f64(double addrspace(1)* %dfp1, double %df); + + ; CHECK: st.release.sys.shared.f64 + tail call void @llvm.nvvm.st.shared.f.release.sys.p3f64.f64(double addrspace(3)* %dfp3, double %df); + + ; CHECK: st.relaxed.cta.f64 + tail call void @llvm.nvvm.st.gen.f.cta.p0f64.f64(double* %dfp, double %df); + + ; CHECK: st.relaxed.cta.global.f64 + tail call void @llvm.nvvm.st.global.f.cta.p1f64.f64(double addrspace(1)* %dfp1, double %df); + + ; CHECK: st.relaxed.cta.shared.f64 + tail call void @llvm.nvvm.st.shared.f.cta.p3f64.f64(double addrspace(3)* %dfp3, double %df); + + ; CHECK: st.release.cta.f64 + tail call void @llvm.nvvm.st.gen.f.release.cta.p0f64.f64(double* %dfp, double %df); + + ; CHECK: st.release.cta.global.f64 + tail call void @llvm.nvvm.st.global.f.release.cta.p1f64.f64(double addrspace(1)* %dfp1, double %df); + + ; CHECK: st.release.cta.shared.f64 + tail call void @llvm.nvvm.st.shared.f.release.cta.p3f64.f64(double addrspace(3)* %dfp3, double %df); + + ; CHECK: ld.volatile.s32 + %tmpldst144 = tail call i32 @llvm.nvvm.ld.gen.i.volatile.i32.p0(i32* %ip); + + ; CHECK: ld.volatile.global.s32 + %tmpldst145 = tail call i32 @llvm.nvvm.ld.global.i.volatile.i32.p1(i32 addrspace(1)* %ip1); + + ; CHECK: ld.volatile.shared.s32 + %tmpldst146 = tail call i32 @llvm.nvvm.ld.shared.i.volatile.i32.p3(i32 addrspace(3)* %ip3); + + ; CHECK: ld.volatile.s64 + %tmpldst147 = tail call i64 @llvm.nvvm.ld.gen.i.volatile.i64.p0(i64* %llp); + + ; CHECK: ld.volatile.global.s64 + %tmpldst148 = tail call i64 @llvm.nvvm.ld.global.i.volatile.i64.p1(i64 addrspace(1)* %llp1); + + ; CHECK: ld.volatile.shared.s64 + %tmpldst149 = tail call i64 @llvm.nvvm.ld.shared.i.volatile.i64.p3(i64 addrspace(3)* %llp3); + + ; CHECK: ld.volatile.f32 + %tmpldst150 = tail call float @llvm.nvvm.ld.gen.f.volatile.f32.p0(float* %fp); + + ; CHECK: ld.volatile.global.f32 + %tmpldst151 = tail call float @llvm.nvvm.ld.global.f.volatile.f32.p1(float addrspace(1)* %fp1); + + ; CHECK: ld.volatile.shared.f32 + %tmpldst152 = tail call float @llvm.nvvm.ld.shared.f.volatile.f32.p3(float addrspace(3)* %fp3); + + ; CHECK: ld.volatile.f64 + %tmpldst153 = tail call double @llvm.nvvm.ld.gen.f.volatile.f64.p0(double* %dfp); + + ; CHECK: ld.volatile.global.f64 + %tmpldst154 = tail call double @llvm.nvvm.ld.global.f.volatile.f64.p1(double addrspace(1)* %dfp1); + + ; CHECK: ld.volatile.shared.f64 + %tmpldst155 = tail call double @llvm.nvvm.ld.shared.f.volatile.f64.p3(double addrspace(3)* %dfp3); + + ; CHECK: st.volatile.s32 + tail call void @llvm.nvvm.st.gen.i.volatile.p0i32.i32(i32* %ip, i32 %i); + + ; CHECK: st.volatile.global.s32 + tail call void @llvm.nvvm.st.global.i.volatile.p1i32.i32(i32 addrspace(1)* %ip1, i32 %i); + + ; CHECK: st.volatile.shared.s32 + tail call void @llvm.nvvm.st.shared.i.volatile.p3i32.i32(i32 addrspace(3)* %ip3, i32 %i); + + ; CHECK: st.volatile.s64 + tail call void @llvm.nvvm.st.gen.i.volatile.p0i64.i64(i64* %llp, i64 %ll); + + ; CHECK: st.volatile.global.s64 + tail call void @llvm.nvvm.st.global.i.volatile.p1i64.i64(i64 addrspace(1)* %llp1, i64 %ll); + + ; CHECK: st.volatile.shared.s64 + tail call void @llvm.nvvm.st.shared.i.volatile.p3i64.i64(i64 addrspace(3)* %llp3, i64 %ll); + + ; CHECK: st.volatile.f32 + tail call void @llvm.nvvm.st.gen.f.volatile.p0f32.f32(float* %fp, float %f); + + ; CHECK: st.volatile.global.f32 + tail call void @llvm.nvvm.st.global.f.volatile.p1f32.f32(float addrspace(1)* %fp1, float %f); + + ; CHECK: st.volatile.shared.f32 + tail call void @llvm.nvvm.st.shared.f.volatile.p3f32.f32(float addrspace(3)* %fp3, float %f); + + ; CHECK: st.volatile.f64 + tail call void @llvm.nvvm.st.gen.f.volatile.p0f64.f64(double* %dfp, double %df); + + ; CHECK: st.volatile.global.f64 + tail call void @llvm.nvvm.st.global.f.volatile.p1f64.f64(double addrspace(1)* %dfp1, double %df); + + ; CHECK: st.volatile.shared.f64 + tail call void @llvm.nvvm.st.shared.f.volatile.p3f64.f64(double addrspace(3)* %dfp3, double %df); ; CHECK: ret ret void @@ -2145,6 +2648,174 @@ define void @test_atomics_scope_imm(float* %fp, float %f, ret void } +declare i32 @llvm.nvvm.ld.gen.i.i32.p0(i32* nocapture) #1 +declare i32 @llvm.nvvm.ld.global.i.i32.p1(i32 addrspace(1)* nocapture) #1 +declare i32 @llvm.nvvm.ld.shared.i.i32.p3(i32 addrspace(3)* nocapture) #1 +declare i32 @llvm.nvvm.ld.gen.i.acquire.i32.p0(i32* nocapture) #1 +declare i32 @llvm.nvvm.ld.global.i.acquire.i32.p1(i32 addrspace(1)* nocapture) #1 +declare i32 @llvm.nvvm.ld.shared.i.acquire.i32.p3(i32 addrspace(3)* nocapture) #1 +declare i32 @llvm.nvvm.ld.gen.i.sys.i32.p0(i32* nocapture) #1 +declare i32 @llvm.nvvm.ld.global.i.sys.i32.p1(i32 addrspace(1)* nocapture) #1 +declare i32 @llvm.nvvm.ld.shared.i.sys.i32.p3(i32 addrspace(3)* nocapture) #1 +declare i32 @llvm.nvvm.ld.gen.i.acquire.sys.i32.p0(i32* nocapture) #1 +declare i32 @llvm.nvvm.ld.global.i.acquire.sys.i32.p1(i32 addrspace(1)* nocapture) #1 +declare i32 @llvm.nvvm.ld.shared.i.acquire.sys.i32.p3(i32 addrspace(3)* nocapture) #1 +declare i32 @llvm.nvvm.ld.gen.i.cta.i32.p0(i32* nocapture) #1 +declare i32 @llvm.nvvm.ld.global.i.cta.i32.p1(i32 addrspace(1)* nocapture) #1 +declare i32 @llvm.nvvm.ld.shared.i.cta.i32.p3(i32 addrspace(3)* nocapture) #1 +declare i32 @llvm.nvvm.ld.gen.i.acquire.cta.i32.p0(i32* nocapture) #1 +declare i32 @llvm.nvvm.ld.global.i.acquire.cta.i32.p1(i32 addrspace(1)* nocapture) #1 +declare i32 @llvm.nvvm.ld.shared.i.acquire.cta.i32.p3(i32 addrspace(3)* nocapture) #1 +declare i64 @llvm.nvvm.ld.gen.i.i64.p0(i64* nocapture) #1 +declare i64 @llvm.nvvm.ld.global.i.i64.p1(i64 addrspace(1)* nocapture) #1 +declare i64 @llvm.nvvm.ld.shared.i.i64.p3(i64 addrspace(3)* nocapture) #1 +declare i64 @llvm.nvvm.ld.gen.i.acquire.i64.p0(i64* nocapture) #1 +declare i64 @llvm.nvvm.ld.global.i.acquire.i64.p1(i64 addrspace(1)* nocapture) #1 +declare i64 @llvm.nvvm.ld.shared.i.acquire.i64.p3(i64 addrspace(3)* nocapture) #1 +declare i64 @llvm.nvvm.ld.gen.i.sys.i64.p0(i64* nocapture) #1 +declare i64 @llvm.nvvm.ld.global.i.sys.i64.p1(i64 addrspace(1)* nocapture) #1 +declare i64 @llvm.nvvm.ld.shared.i.sys.i64.p3(i64 addrspace(3)* nocapture) #1 +declare i64 @llvm.nvvm.ld.gen.i.acquire.sys.i64.p0(i64* nocapture) #1 +declare i64 @llvm.nvvm.ld.global.i.acquire.sys.i64.p1(i64 addrspace(1)* nocapture) #1 +declare i64 @llvm.nvvm.ld.shared.i.acquire.sys.i64.p3(i64 addrspace(3)* nocapture) #1 +declare i64 @llvm.nvvm.ld.gen.i.cta.i64.p0(i64* nocapture) #1 +declare i64 @llvm.nvvm.ld.global.i.cta.i64.p1(i64 addrspace(1)* nocapture) #1 +declare i64 @llvm.nvvm.ld.shared.i.cta.i64.p3(i64 addrspace(3)* nocapture) #1 +declare i64 @llvm.nvvm.ld.gen.i.acquire.cta.i64.p0(i64* nocapture) #1 +declare i64 @llvm.nvvm.ld.global.i.acquire.cta.i64.p1(i64 addrspace(1)* nocapture) #1 +declare i64 @llvm.nvvm.ld.shared.i.acquire.cta.i64.p3(i64 addrspace(3)* nocapture) #1 +declare float @llvm.nvvm.ld.gen.f.f32.p0(float* nocapture) #1 +declare float @llvm.nvvm.ld.global.f.f32.p1(float addrspace(1)* nocapture) #1 +declare float @llvm.nvvm.ld.shared.f.f32.p3(float addrspace(3)* nocapture) #1 +declare float @llvm.nvvm.ld.gen.f.acquire.f32.p0(float* nocapture) #1 +declare float @llvm.nvvm.ld.global.f.acquire.f32.p1(float addrspace(1)* nocapture) #1 +declare float @llvm.nvvm.ld.shared.f.acquire.f32.p3(float addrspace(3)* nocapture) #1 +declare float @llvm.nvvm.ld.gen.f.sys.f32.p0(float* nocapture) #1 +declare float @llvm.nvvm.ld.global.f.sys.f32.p1(float addrspace(1)* nocapture) #1 +declare float @llvm.nvvm.ld.shared.f.sys.f32.p3(float addrspace(3)* nocapture) #1 +declare float @llvm.nvvm.ld.gen.f.acquire.sys.f32.p0(float* nocapture) #1 +declare float @llvm.nvvm.ld.global.f.acquire.sys.f32.p1(float addrspace(1)* nocapture) #1 +declare float @llvm.nvvm.ld.shared.f.acquire.sys.f32.p3(float addrspace(3)* nocapture) #1 +declare float @llvm.nvvm.ld.gen.f.cta.f32.p0(float* nocapture) #1 +declare float @llvm.nvvm.ld.global.f.cta.f32.p1(float addrspace(1)* nocapture) #1 +declare float @llvm.nvvm.ld.shared.f.cta.f32.p3(float addrspace(3)* nocapture) #1 +declare float @llvm.nvvm.ld.gen.f.acquire.cta.f32.p0(float* nocapture) #1 +declare float @llvm.nvvm.ld.global.f.acquire.cta.f32.p1(float addrspace(1)* nocapture) #1 +declare float @llvm.nvvm.ld.shared.f.acquire.cta.f32.p3(float addrspace(3)* nocapture) #1 +declare double @llvm.nvvm.ld.gen.f.f64.p0(double* nocapture) #1 +declare double @llvm.nvvm.ld.global.f.f64.p1(double addrspace(1)* nocapture) #1 +declare double @llvm.nvvm.ld.shared.f.f64.p3(double addrspace(3)* nocapture) #1 +declare double @llvm.nvvm.ld.gen.f.acquire.f64.p0(double* nocapture) #1 +declare double @llvm.nvvm.ld.global.f.acquire.f64.p1(double addrspace(1)* nocapture) #1 +declare double @llvm.nvvm.ld.shared.f.acquire.f64.p3(double addrspace(3)* nocapture) #1 +declare double @llvm.nvvm.ld.gen.f.sys.f64.p0(double* nocapture) #1 +declare double @llvm.nvvm.ld.global.f.sys.f64.p1(double addrspace(1)* nocapture) #1 +declare double @llvm.nvvm.ld.shared.f.sys.f64.p3(double addrspace(3)* nocapture) #1 +declare double @llvm.nvvm.ld.gen.f.acquire.sys.f64.p0(double* nocapture) #1 +declare double @llvm.nvvm.ld.global.f.acquire.sys.f64.p1(double addrspace(1)* nocapture) #1 +declare double @llvm.nvvm.ld.shared.f.acquire.sys.f64.p3(double addrspace(3)* nocapture) #1 +declare double @llvm.nvvm.ld.gen.f.cta.f64.p0(double* nocapture) #1 +declare double @llvm.nvvm.ld.global.f.cta.f64.p1(double addrspace(1)* nocapture) #1 +declare double @llvm.nvvm.ld.shared.f.cta.f64.p3(double addrspace(3)* nocapture) #1 +declare double @llvm.nvvm.ld.gen.f.acquire.cta.f64.p0(double* nocapture) #1 +declare double @llvm.nvvm.ld.global.f.acquire.cta.f64.p1(double addrspace(1)* nocapture) #1 +declare double @llvm.nvvm.ld.shared.f.acquire.cta.f64.p3(double addrspace(3)* nocapture) #1 +declare void @llvm.nvvm.st.gen.i.p0i32.i32(i32* nocapture, i32) #1 +declare void @llvm.nvvm.st.global.i.p1i32.i32(i32 addrspace(1)* nocapture, i32) #1 +declare void @llvm.nvvm.st.shared.i.p3i32.i32(i32 addrspace(3)* nocapture, i32) #1 +declare void @llvm.nvvm.st.gen.i.release.p0i32.i32(i32* nocapture, i32) #1 +declare void @llvm.nvvm.st.global.i.release.p1i32.i32(i32 addrspace(1)* nocapture, i32) #1 +declare void @llvm.nvvm.st.shared.i.release.p3i32.i32(i32 addrspace(3)* nocapture, i32) #1 +declare void @llvm.nvvm.st.gen.i.sys.p0i32.i32(i32* nocapture, i32) #1 +declare void @llvm.nvvm.st.global.i.sys.p1i32.i32(i32 addrspace(1)* nocapture, i32) #1 +declare void @llvm.nvvm.st.shared.i.sys.p3i32.i32(i32 addrspace(3)* nocapture, i32) #1 +declare void @llvm.nvvm.st.gen.i.release.sys.p0i32.i32(i32* nocapture, i32) #1 +declare void @llvm.nvvm.st.global.i.release.sys.p1i32.i32(i32 addrspace(1)* nocapture, i32) #1 +declare void @llvm.nvvm.st.shared.i.release.sys.p3i32.i32(i32 addrspace(3)* nocapture, i32) #1 +declare void @llvm.nvvm.st.gen.i.cta.p0i32.i32(i32* nocapture, i32) #1 +declare void @llvm.nvvm.st.global.i.cta.p1i32.i32(i32 addrspace(1)* nocapture, i32) #1 +declare void @llvm.nvvm.st.shared.i.cta.p3i32.i32(i32 addrspace(3)* nocapture, i32) #1 +declare void @llvm.nvvm.st.gen.i.release.cta.p0i32.i32(i32* nocapture, i32) #1 +declare void @llvm.nvvm.st.global.i.release.cta.p1i32.i32(i32 addrspace(1)* nocapture, i32) #1 +declare void @llvm.nvvm.st.shared.i.release.cta.p3i32.i32(i32 addrspace(3)* nocapture, i32) #1 +declare void @llvm.nvvm.st.gen.i.p0i64.i64(i64* nocapture, i64) #1 +declare void @llvm.nvvm.st.global.i.p1i64.i64(i64 addrspace(1)* nocapture, i64) #1 +declare void @llvm.nvvm.st.shared.i.p3i64.i64(i64 addrspace(3)* nocapture, i64) #1 +declare void @llvm.nvvm.st.gen.i.release.p0i64.i64(i64* nocapture, i64) #1 +declare void @llvm.nvvm.st.global.i.release.p1i64.i64(i64 addrspace(1)* nocapture, i64) #1 +declare void @llvm.nvvm.st.shared.i.release.p3i64.i64(i64 addrspace(3)* nocapture, i64) #1 +declare void @llvm.nvvm.st.gen.i.sys.p0i64.i64(i64* nocapture, i64) #1 +declare void @llvm.nvvm.st.global.i.sys.p1i64.i64(i64 addrspace(1)* nocapture, i64) #1 +declare void @llvm.nvvm.st.shared.i.sys.p3i64.i64(i64 addrspace(3)* nocapture, i64) #1 +declare void @llvm.nvvm.st.gen.i.release.sys.p0i64.i64(i64* nocapture, i64) #1 +declare void @llvm.nvvm.st.global.i.release.sys.p1i64.i64(i64 addrspace(1)* nocapture, i64) #1 +declare void @llvm.nvvm.st.shared.i.release.sys.p3i64.i64(i64 addrspace(3)* nocapture, i64) #1 +declare void @llvm.nvvm.st.gen.i.cta.p0i64.i64(i64* nocapture, i64) #1 +declare void @llvm.nvvm.st.global.i.cta.p1i64.i64(i64 addrspace(1)* nocapture, i64) #1 +declare void @llvm.nvvm.st.shared.i.cta.p3i64.i64(i64 addrspace(3)* nocapture, i64) #1 +declare void @llvm.nvvm.st.gen.i.release.cta.p0i64.i64(i64* nocapture, i64) #1 +declare void @llvm.nvvm.st.global.i.release.cta.p1i64.i64(i64 addrspace(1)* nocapture, i64) #1 +declare void @llvm.nvvm.st.shared.i.release.cta.p3i64.i64(i64 addrspace(3)* nocapture, i64) #1 +declare void @llvm.nvvm.st.gen.f.p0f32.f32(float* nocapture, float) #1 +declare void @llvm.nvvm.st.global.f.p1f32.f32(float addrspace(1)* nocapture, float) #1 +declare void @llvm.nvvm.st.shared.f.p3f32.f32(float addrspace(3)* nocapture, float) #1 +declare void @llvm.nvvm.st.gen.f.release.p0f32.f32(float* nocapture, float) #1 +declare void @llvm.nvvm.st.global.f.release.p1f32.f32(float addrspace(1)* nocapture, float) #1 +declare void @llvm.nvvm.st.shared.f.release.p3f32.f32(float addrspace(3)* nocapture, float) #1 +declare void @llvm.nvvm.st.gen.f.sys.p0f32.f32(float* nocapture, float) #1 +declare void @llvm.nvvm.st.global.f.sys.p1f32.f32(float addrspace(1)* nocapture, float) #1 +declare void @llvm.nvvm.st.shared.f.sys.p3f32.f32(float addrspace(3)* nocapture, float) #1 +declare void @llvm.nvvm.st.gen.f.release.sys.p0f32.f32(float* nocapture, float) #1 +declare void @llvm.nvvm.st.global.f.release.sys.p1f32.f32(float addrspace(1)* nocapture, float) #1 +declare void @llvm.nvvm.st.shared.f.release.sys.p3f32.f32(float addrspace(3)* nocapture, float) #1 +declare void @llvm.nvvm.st.gen.f.cta.p0f32.f32(float* nocapture, float) #1 +declare void @llvm.nvvm.st.global.f.cta.p1f32.f32(float addrspace(1)* nocapture, float) #1 +declare void @llvm.nvvm.st.shared.f.cta.p3f32.f32(float addrspace(3)* nocapture, float) #1 +declare void @llvm.nvvm.st.gen.f.release.cta.p0f32.f32(float* nocapture, float) #1 +declare void @llvm.nvvm.st.global.f.release.cta.p1f32.f32(float addrspace(1)* nocapture, float) #1 +declare void @llvm.nvvm.st.shared.f.release.cta.p3f32.f32(float addrspace(3)* nocapture, float) #1 +declare void @llvm.nvvm.st.gen.f.p0f64.f64(double* nocapture, double) #1 +declare void @llvm.nvvm.st.global.f.p1f64.f64(double addrspace(1)* nocapture, double) #1 +declare void @llvm.nvvm.st.shared.f.p3f64.f64(double addrspace(3)* nocapture, double) #1 +declare void @llvm.nvvm.st.gen.f.release.p0f64.f64(double* nocapture, double) #1 +declare void @llvm.nvvm.st.global.f.release.p1f64.f64(double addrspace(1)* nocapture, double) #1 +declare void @llvm.nvvm.st.shared.f.release.p3f64.f64(double addrspace(3)* nocapture, double) #1 +declare void @llvm.nvvm.st.gen.f.sys.p0f64.f64(double* nocapture, double) #1 +declare void @llvm.nvvm.st.global.f.sys.p1f64.f64(double addrspace(1)* nocapture, double) #1 +declare void @llvm.nvvm.st.shared.f.sys.p3f64.f64(double addrspace(3)* nocapture, double) #1 +declare void @llvm.nvvm.st.gen.f.release.sys.p0f64.f64(double* nocapture, double) #1 +declare void @llvm.nvvm.st.global.f.release.sys.p1f64.f64(double addrspace(1)* nocapture, double) #1 +declare void @llvm.nvvm.st.shared.f.release.sys.p3f64.f64(double addrspace(3)* nocapture, double) #1 +declare void @llvm.nvvm.st.gen.f.cta.p0f64.f64(double* nocapture, double) #1 +declare void @llvm.nvvm.st.global.f.cta.p1f64.f64(double addrspace(1)* nocapture, double) #1 +declare void @llvm.nvvm.st.shared.f.cta.p3f64.f64(double addrspace(3)* nocapture, double) #1 +declare void @llvm.nvvm.st.gen.f.release.cta.p0f64.f64(double* nocapture, double) #1 +declare void @llvm.nvvm.st.global.f.release.cta.p1f64.f64(double addrspace(1)* nocapture, double) #1 +declare void @llvm.nvvm.st.shared.f.release.cta.p3f64.f64(double addrspace(3)* nocapture, double) #1 +declare i32 @llvm.nvvm.ld.gen.i.volatile.i32.p0(i32* nocapture) #1 +declare i32 @llvm.nvvm.ld.global.i.volatile.i32.p1(i32 addrspace(1)* nocapture) #1 +declare i32 @llvm.nvvm.ld.shared.i.volatile.i32.p3(i32 addrspace(3)* nocapture) #1 +declare i64 @llvm.nvvm.ld.gen.i.volatile.i64.p0(i64* nocapture) #1 +declare i64 @llvm.nvvm.ld.global.i.volatile.i64.p1(i64 addrspace(1)* nocapture) #1 +declare i64 @llvm.nvvm.ld.shared.i.volatile.i64.p3(i64 addrspace(3)* nocapture) #1 +declare float @llvm.nvvm.ld.gen.f.volatile.f32.p0(float* nocapture) #1 +declare float @llvm.nvvm.ld.global.f.volatile.f32.p1(float addrspace(1)* nocapture) #1 +declare float @llvm.nvvm.ld.shared.f.volatile.f32.p3(float addrspace(3)* nocapture) #1 +declare double @llvm.nvvm.ld.gen.f.volatile.f64.p0(double* nocapture) #1 +declare double @llvm.nvvm.ld.global.f.volatile.f64.p1(double addrspace(1)* nocapture) #1 +declare double @llvm.nvvm.ld.shared.f.volatile.f64.p3(double addrspace(3)* nocapture) #1 +declare void @llvm.nvvm.st.gen.i.volatile.p0i32.i32(i32* nocapture, i32) #1 +declare void @llvm.nvvm.st.global.i.volatile.p1i32.i32(i32 addrspace(1)* nocapture, i32) #1 +declare void @llvm.nvvm.st.shared.i.volatile.p3i32.i32(i32 addrspace(3)* nocapture, i32) #1 +declare void @llvm.nvvm.st.gen.i.volatile.p0i64.i64(i64* nocapture, i64) #1 +declare void @llvm.nvvm.st.global.i.volatile.p1i64.i64(i64 addrspace(1)* nocapture, i64) #1 +declare void @llvm.nvvm.st.shared.i.volatile.p3i64.i64(i64 addrspace(3)* nocapture, i64) #1 +declare void @llvm.nvvm.st.gen.f.volatile.p0f32.f32(float* nocapture, float) #1 +declare void @llvm.nvvm.st.global.f.volatile.p1f32.f32(float addrspace(1)* nocapture, float) #1 +declare void @llvm.nvvm.st.shared.f.volatile.p3f32.f32(float addrspace(3)* nocapture, float) #1 +declare void @llvm.nvvm.st.gen.f.volatile.p0f64.f64(double* nocapture, double) #1 +declare void @llvm.nvvm.st.global.f.volatile.p1f64.f64(double addrspace(1)* nocapture, double) #1 +declare void @llvm.nvvm.st.shared.f.volatile.p3f64.f64(double addrspace(3)* nocapture, double) #1 declare i32 @llvm.nvvm.atomic.add.gen.i.acquire.i32.p0i32(i32* nocapture, i32) #1 declare i32 @llvm.nvvm.atomic.add.global.i.acquire.i32.p1i32(i32 addrspace(1)* nocapture, i32) #1