Skip to content

Port SYCL tests to OpenMP for AddressSanitizer #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 23 commits into
base: gpu_san
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ LANGOPT(GPUMaxThreadsPerBlock, 32, 1024, "default max threads per block for kern
LANGOPT(GPUDeferDiag, 1, 0, "defer host/device related diagnostic messages for CUDA/HIP")
LANGOPT(GPUExcludeWrongSideOverloads, 1, 0, "always exclude wrong side overloads in overloading resolution for CUDA/HIP")
LANGOPT(OffloadingNewDriver, 1, 0, "use the new driver for generating offloading code.")
LANGOPT(OffloadViaLLVM, 1, 0, "target LLVM/Offload as portable offloading runtime.")

LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")
LANGOPT(SYCLIsHost , 1, 0, "SYCL host compilation")
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/Sanitizers.def
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,9 @@ SANITIZER_GROUP("bounds", Bounds, ArrayBounds | LocalBounds)
// Scudo hardened allocator
SANITIZER("scudo", Scudo)

// LLVM/Offload sanitizer
SANITIZER("offload", Offload)

// Magic group, containing all sanitizers. For example, "-fno-sanitize=all"
// can be used to disable all the sanitizers.
SANITIZER_GROUP("all", All, ~SanitizerMask())
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1293,6 +1293,12 @@ def no_offload_compress : Flag<["--"], "no-offload-compress">;
def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">,
Flags<[HelpHidden]>,
HelpText<"Compression level for offload device binaries (HIP only)">;

defm offload_via_llvm : BoolFOption<"offload-via-llvm",
LangOpts<"OffloadViaLLVM">, DefaultFalse,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "Use">,
NegFlag<SetFalse, [], [ClangOption], "Don't use">,
BothFlags<[], [ClangOption], " LLVM/Offload as portable offloading runtime.">>;
}

// CUDA options
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Driver/SanitizerArgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ class SanitizerArgs {

bool needsMemProfRt() const { return NeedsMemProfRt; }
bool needsAsanRt() const { return Sanitizers.has(SanitizerKind::Address); }
bool needsOffloadKernels() const {
return Sanitizers.has(SanitizerKind::Offload);
}
bool needsHwasanRt() const {
return Sanitizers.has(SanitizerKind::HWAddress);
}
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
#include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
#include "llvm/Transforms/Instrumentation/GPUSan.h"
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
#include "llvm/Transforms/Instrumentation/KCFI.h"
Expand Down
97 changes: 82 additions & 15 deletions clang/lib/CodeGen/CGCUDANV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#include "CGCXXABI.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/Decl.h"
#include "clang/Basic/Cuda.h"
#include "clang/CodeGen/CodeGenABITypes.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Frontend/Offloading/Utility.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
Expand All @@ -36,6 +38,11 @@ constexpr unsigned HIPFatMagic = 0x48495046; // "HIPF"

class CGNVCUDARuntime : public CGCUDARuntime {

/// The prefix used for function calls and section names (CUDA, HIP, LLVM)
StringRef Prefix;
/// TODO: We should transition the OpenMP section to LLVM/Offload
StringRef SectionPrefix;

private:
llvm::IntegerType *IntTy, *SizeTy;
llvm::Type *VoidTy;
Expand Down Expand Up @@ -132,6 +139,9 @@ class CGNVCUDARuntime : public CGCUDARuntime {
return DummyFunc;
}

Address prepareKernelArgs(CodeGenFunction &CGF, FunctionArgList &Args);
Address prepareKernelArgsLLVMOffload(CodeGenFunction &CGF,
FunctionArgList &Args);
void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args);
void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
std::string getDeviceSideName(const NamedDecl *ND) override;
Expand Down Expand Up @@ -191,15 +201,11 @@ class CGNVCUDARuntime : public CGCUDARuntime {
} // end anonymous namespace

std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName) const {
if (CGM.getLangOpts().HIP)
return ((Twine("hip") + Twine(FuncName)).str());
return ((Twine("cuda") + Twine(FuncName)).str());
return (Prefix + FuncName).str();
}
std::string
CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const {
if (CGM.getLangOpts().HIP)
return ((Twine("__hip") + Twine(FuncName)).str());
return ((Twine("__cuda") + Twine(FuncName)).str());
return ("__" + Prefix + FuncName).str();
}

static std::unique_ptr<MangleContext> InitDeviceMC(CodeGenModule &CGM) {
Expand Down Expand Up @@ -227,6 +233,14 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
SizeTy = CGM.SizeTy;
VoidTy = CGM.VoidTy;
PtrTy = CGM.UnqualPtrTy;

if (CGM.getLangOpts().OffloadViaLLVM) {
Prefix = "llvm";
SectionPrefix = "omp";
} else if (CGM.getLangOpts().HIP)
SectionPrefix = Prefix = "hip";
else
SectionPrefix = Prefix = "cuda";
}

llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const {
Expand Down Expand Up @@ -305,18 +319,58 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
}
if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
CudaFeature::CUDA_USES_NEW_LAUNCH) ||
(CGF.getLangOpts().HIP && CGF.getLangOpts().HIPUseNewLaunchAPI))
(CGF.getLangOpts().HIP && CGF.getLangOpts().HIPUseNewLaunchAPI) ||
(CGF.getLangOpts().OffloadViaLLVM))
emitDeviceStubBodyNew(CGF, Args);
else
emitDeviceStubBodyLegacy(CGF, Args);
}

// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local
// array and kernels are launched using cudaLaunchKernel().
void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
FunctionArgList &Args) {
// Build the shadow stack entry at the very start of the function.
/// CUDA passes the arguments with a level of indirection. For example, a
/// (void*, short, void*) is passed as {void **, short *, void **} to the launch
/// function. For the LLVM/offload launch we flatten the arguments into the
/// struct directly. In addition, we include the size of the arguments, thus
/// pass {sizeof({void *, short, void *}), ptr to {void *, short, void *},
/// nullptr}. The last nullptr needs to be initialized to an array of pointers
/// pointing to the arguments if we want to offload to the host.
Address CGNVCUDARuntime::prepareKernelArgsLLVMOffload(CodeGenFunction &CGF,
FunctionArgList &Args) {
SmallVector<llvm::Type *> ArgTypes, KernelLaunchParamsTypes;
for (auto &Arg : Args)
ArgTypes.push_back(CGF.ConvertTypeForMem(Arg->getType()));
llvm::StructType *KernelArgsTy = llvm::StructType::create(ArgTypes);

auto *Int64Ty = CGF.Builder.getInt64Ty();
KernelLaunchParamsTypes.push_back(Int64Ty);
KernelLaunchParamsTypes.push_back(PtrTy);
KernelLaunchParamsTypes.push_back(PtrTy);

llvm::StructType *KernelLaunchParamsTy =
llvm::StructType::create(KernelLaunchParamsTypes);
Address KernelArgs = CGF.CreateTempAllocaWithoutCast(
KernelArgsTy, CharUnits::fromQuantity(16), "kernel_args");
Address KernelLaunchParams = CGF.CreateTempAllocaWithoutCast(
KernelLaunchParamsTy, CharUnits::fromQuantity(16),
"kernel_launch_params");

auto KernelArgsSize = CGM.getDataLayout().getTypeAllocSize(KernelArgsTy);
CGF.Builder.CreateStore(llvm::ConstantInt::get(Int64Ty, KernelArgsSize),
CGF.Builder.CreateStructGEP(KernelLaunchParams, 0));
CGF.Builder.CreateStore(KernelArgs.emitRawPointer(CGF),
CGF.Builder.CreateStructGEP(KernelLaunchParams, 1));
CGF.Builder.CreateStore(llvm::Constant::getNullValue(PtrTy),
CGF.Builder.CreateStructGEP(KernelLaunchParams, 2));

for (unsigned i = 0; i < Args.size(); ++i) {
auto *ArgVal = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(Args[i]));
CGF.Builder.CreateStore(ArgVal, CGF.Builder.CreateStructGEP(KernelArgs, i));
}

return KernelLaunchParams;
}

Address CGNVCUDARuntime::prepareKernelArgs(CodeGenFunction &CGF,
FunctionArgList &Args) {
// Calculate amount of space we will need for all arguments. If we have no
// args, allocate a single pointer so we still have a valid pointer to the
// argument array that we can pass to runtime, even if it will be unused.
Expand All @@ -331,6 +385,17 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
VoidVarPtr, CGF.Builder.CreateConstGEP1_32(
PtrTy, KernelArgs.emitRawPointer(CGF), i));
}
return KernelArgs;
}

// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local
// array and kernels are launched using cudaLaunchKernel().
void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
FunctionArgList &Args) {
// Build the shadow stack entry at the very start of the function.
Address KernelArgs = CGF.getLangOpts().OffloadViaLLVM
? prepareKernelArgsLLVMOffload(CGF, Args)
: prepareKernelArgs(CGF, Args);

llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");

Expand Down Expand Up @@ -1129,8 +1194,9 @@ void CGNVCUDARuntime::transformManagedVars() {
// registered. The linker will provide a pointer to this section so we can
// register the symbols with the linked device image.
void CGNVCUDARuntime::createOffloadingEntries() {
StringRef Section = CGM.getLangOpts().HIP ? "hip_offloading_entries"
: "cuda_offloading_entries";
SmallVector<char, 32> Out;
StringRef Section = (SectionPrefix + "_offloading_entries").toStringRef(Out);

llvm::Module &M = CGM.getModule();
for (KernelInfo &I : EmittedKernels)
llvm::offloading::emitOffloadingEntry(
Expand Down Expand Up @@ -1199,7 +1265,8 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() {
}
return nullptr;
}
if (CGM.getLangOpts().OffloadingNewDriver && RelocatableDeviceCode)
if (CGM.getLangOpts().OffloadViaLLVM ||
(CGM.getLangOpts().OffloadingNewDriver && RelocatableDeviceCode))
createOffloadingEntries();
else
return makeModuleCtorFunction();
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/CGDeclCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrCleanUpFunction(
!isInNoSanitizeList(SanitizerKind::Address, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SanitizeAddress);

if (getLangOpts().Sanitize.has(SanitizerKind::Offload) &&
!isInNoSanitizeList(SanitizerKind::Offload, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SanitizeAddress);

if (getLangOpts().Sanitize.has(SanitizerKind::KernelAddress) &&
!isInNoSanitizeList(SanitizerKind::KernelAddress, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SanitizeAddress);
Expand Down
9 changes: 7 additions & 2 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "clang/AST/StmtObjC.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/Sanitizers.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
Expand Down Expand Up @@ -67,7 +68,8 @@ static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts,
// Sanitizers may use markers.
if (CGOpts.SanitizeAddressUseAfterScope ||
LangOpts.Sanitize.has(SanitizerKind::HWAddress) ||
LangOpts.Sanitize.has(SanitizerKind::Memory))
LangOpts.Sanitize.has(SanitizerKind::Memory) ||
LangOpts.Sanitize.has(SanitizerKind::Offload))
return true;

// For now, only in optimized builds.
Expand Down Expand Up @@ -791,6 +793,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
SanOpts.set(SanitizerKind::KernelHWAddress, false);
if (no_sanitize_mask & SanitizerKind::KernelHWAddress)
SanOpts.set(SanitizerKind::HWAddress, false);
if (no_sanitize_mask & SanitizerKind::Offload)
SanOpts.set(SanitizerKind::Offload, false);

if (SanitizeBounds && !SanOpts.hasOneOf(SanitizerKind::Bounds))
Fn->addFnAttr(llvm::Attribute::NoSanitizeBounds);
Expand All @@ -809,7 +813,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
CurFn->addFnAttr(llvm::Attribute::DisableSanitizerInstrumentation);
} else {
// Apply sanitizer attributes to the function.
if (SanOpts.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress))
if (SanOpts.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress |
SanitizerKind::Offload))
Fn->addFnAttr(llvm::Attribute::SanitizeAddress);
if (SanOpts.hasOneOf(SanitizerKind::HWAddress |
SanitizerKind::KernelHWAddress))
Expand Down
19 changes: 12 additions & 7 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -792,11 +792,13 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
}) ||
C.getInputArgs().hasArg(options::OPT_hip_link) ||
C.getInputArgs().hasArg(options::OPT_hipstdpar);
bool UseLLVMOffload = C.getInputArgs().hasArg(
options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false);
if (IsCuda && IsHIP) {
Diag(clang::diag::err_drv_mix_cuda_hip);
return;
}
if (IsCuda) {
if (IsCuda && !UseLLVMOffload) {
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
const llvm::Triple &HostTriple = HostTC->getTriple();
auto OFK = Action::OFK_Cuda;
Expand All @@ -818,7 +820,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
CudaInstallation.WarnIfUnsupportedVersion();
}
C.addOffloadDeviceToolChain(CudaTC.get(), OFK);
} else if (IsHIP) {
} else if (IsHIP && !UseLLVMOffload) {
if (auto *OMPTargetArg =
C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) {
Diag(clang::diag::err_drv_unsupported_opt_for_language_mode)
Expand All @@ -842,10 +844,11 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
// We need to generate an OpenMP toolchain if the user specified targets with
// the -fopenmp-targets option or used --offload-arch with OpenMP enabled.
bool IsOpenMPOffloading =
C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
options::OPT_fno_openmp, false) &&
(C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ) ||
C.getInputArgs().hasArg(options::OPT_offload_arch_EQ));
((IsCuda || IsHIP) && UseLLVMOffload) ||
(C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
options::OPT_fno_openmp, false) &&
(C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ) ||
C.getInputArgs().hasArg(options::OPT_offload_arch_EQ)));
if (IsOpenMPOffloading) {
// We expect that -fopenmp-targets is always used in conjunction with the
// option -fopenmp specifying a valid runtime with offloading support, i.e.
Expand Down Expand Up @@ -873,7 +876,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
for (StringRef T : OpenMPTargets->getValues())
OpenMPTriples.insert(T);
} else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) &&
!IsHIP && !IsCuda) {
((!IsHIP && !IsCuda) || UseLLVMOffload)) {
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(),
Expand Down Expand Up @@ -4146,6 +4149,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,

bool UseNewOffloadingDriver =
C.isOffloadingHostKind(Action::OFK_OpenMP) ||
Args.hasFlag(options::OPT_foffload_via_llvm,
options::OPT_fno_offload_via_llvm, false) ||
Args.hasFlag(options::OPT_offload_new_driver,
options::OPT_no_offload_new_driver, false);

Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Driver/ToolChain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1367,7 +1367,8 @@ SanitizerMask ToolChain::getSupportedSanitizers() const {
SanitizerKind::CFICastStrict | SanitizerKind::FloatDivideByZero |
SanitizerKind::KCFI | SanitizerKind::UnsignedIntegerOverflow |
SanitizerKind::UnsignedShiftBase | SanitizerKind::ImplicitConversion |
SanitizerKind::Nullability | SanitizerKind::LocalBounds;
SanitizerKind::Nullability | SanitizerKind::LocalBounds |
SanitizerKind::Offload;
if (getTriple().getArch() == llvm::Triple::x86 ||
getTriple().getArch() == llvm::Triple::x86_64 ||
getTriple().getArch() == llvm::Triple::arm || getTriple().isWasm() ||
Expand Down
27 changes: 23 additions & 4 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,18 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
CmdArgs.push_back("__clang_openmp_device_functions.h");
}

if (Args.hasArg(options::OPT_foffload_via_llvm)) {
// Add llvm_wrappers/* to our system include path. This lets us wrap
// standard library headers and other headers.
SmallString<128> P(D.ResourceDir);
llvm::sys::path::append(P, "include", "llvm_offload_wrappers");
CmdArgs.append({"-internal-isystem", Args.MakeArgString(P), "-include"});
if (JA.isDeviceOffloading(Action::OFK_OpenMP))
CmdArgs.push_back("__llvm_offload_device.h");
else
CmdArgs.push_back("__llvm_offload_host.h");
}

// Add -i* options, and automatically translate to
// -include-pch/-include-pth for transparent PCH support. It's
// wonky, but we include looking for .gch so we can support seamless
Expand Down Expand Up @@ -6598,6 +6610,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
// device offloading action other than OpenMP.
if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
options::OPT_fno_openmp, false) &&
!Args.hasFlag(options::OPT_foffload_via_llvm,
options::OPT_fno_offload_via_llvm, false) &&
(JA.isDeviceOffloading(Action::OFK_None) ||
JA.isDeviceOffloading(Action::OFK_OpenMP))) {
switch (D.getOpenMPRuntime(Args)) {
Expand Down Expand Up @@ -6675,11 +6689,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.addOptOutFlag(CmdArgs, options::OPT_fopenmp_extensions,
options::OPT_fno_openmp_extensions);
}

// Forward the new driver to change offloading code generation.
if (Args.hasFlag(options::OPT_offload_new_driver,
options::OPT_no_offload_new_driver, false))
// Forward the offload runtime change to code generation, liboffload implies
// new driver. Otherwise, check if we should forward the new driver to change
// offloading code generation.
if (Args.hasFlag(options::OPT_foffload_via_llvm,
options::OPT_fno_offload_via_llvm, false)) {
CmdArgs.append({"--offload-new-driver", "-foffload-via-llvm"});
} else if (Args.hasFlag(options::OPT_offload_new_driver,
options::OPT_no_offload_new_driver, false)) {
CmdArgs.push_back("--offload-new-driver");
}

SanitizeArgs.addArgs(TC, Args, CmdArgs, InputType);

Expand Down
Loading