Skip to content

Commit

Permalink
[SYCL] Add support for -foffload-fp32-prec-div/sqrt options. (intel#1…
Browse files Browse the repository at this point in the history
…5836)

Add support for options `-f[no]-offload-fp32-prec-div` and
`-f[no-]-offload-fp32-prec-sqrt`.
These options are added to allow users to control whether `fdiv` and
`sqrt` operations in offload device code are required to return
correctly rounded results. In order to communicate this to the device
code, we need the front end to generate IR that reflects the choice.

When the correctly rounded setting is used, we can just generate the
`fdiv` instruction and `llvm.sqrt` intrinsic, because these operations
are required to be correctly rounded by default in LLVM IR.

When the result is not required to be correctly rounded, the front end
should generate a call to the `llvm.fpbuiltin.fdiv` or
`llvm.fpbuiltin.sqrt` intrinsic with the `fpbuiltin-max-error` attribute
set. For single precision` fdiv`, the setting should be `2.5`. For
single-precision sqrt, the setting should be `3.0`.

If the -ffp-accuracy option is used, we should issue warnings if the
settings conflict with an explicitly set `-foffload-fp32-prec-div` or
`-foffload-fp32-prec-sqrt` option.
  • Loading branch information
zahiraam authored Jan 31, 2025
1 parent 2339bac commit 5823125
Show file tree
Hide file tree
Showing 14 changed files with 798 additions and 55 deletions.
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/DiagnosticCommonKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,11 @@ def err_ppc_impossible_musttail: Error<
def err_aix_musttail_unsupported: Error<
"'musttail' attribute is not supported on AIX">;

def warn_acuracy_conflicts_with_explicit_offload_fp32_prec_option : Warning<
"floating point accuracy control '%0' conflicts with explicit target "
"precision option '%1'">,
InGroup<DiagGroup<"accuracy-conflicts-with-explicit-offload-fp32-prec-option">>;

// Source manager
def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;
def err_file_modified : Error<
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/FPOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,6 @@ OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, Float16Exce
OPTION(FPAccuracy, LangOptions::FPAccuracyKind, 3, BFloat16ExcessPrecision)
OPTION(MathErrno, bool, 1, FPAccuracy)
OPTION(ComplexRange, LangOptions::ComplexRangeKind, 2, MathErrno)
OPTION(OffloadFP32PrecDi, bool, 1, ComplexRange)
OPTION(OffloadFP32PrecSqrt, bool, 1, OffloadFP32PrecDi)
#undef OPTION
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,8 @@ BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "
ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for Float16 arithmetic")
ENUM_LANGOPT(BFloat16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for BFloat16 arithmetic")
BENIGN_ENUM_LANGOPT(FPAccuracy, FPAccuracyKind, 3, FPA_Default, "Accuracy for floating point operations and library functions")
LANGOPT(OffloadFP32PrecDiv, 1, 1, "Return correctly rounded results of fdiv")
LANGOPT(OffloadFP32PrecSqrt, 1, 1, "Return correctly rounded results of sqrt")
LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment")
LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility")
LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting")
Expand Down
16 changes: 16 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1167,6 +1167,22 @@ defm cx_fortran_rules: BoolOptionWithoutMarshalling<"f", "cx-fortran-rules",
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Range reduction is disabled "
"for complex arithmetic operations">>;

defm offload_fp32_prec_div: BoolOption<"f", "offload-fp32-prec-div",
LangOpts<"OffloadFP32PrecDiv">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;

defm offload_fp32_prec_sqrt: BoolOption<"f", "offload-fp32-prec-sqrt",
LangOpts<"OffloadFP32PrecSqrt">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;

// OpenCL-only Options
def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>,
Visibility<[ClangOption, CC1Option]>,
Expand Down
46 changes: 9 additions & 37 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -666,29 +666,6 @@ static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
return Store;
}

static CallInst *CreateBuiltinCallWithAttr(CodeGenFunction &CGF, StringRef Name,
llvm::Function *FPBuiltinF,
ArrayRef<Value *> Args,
unsigned ID) {
llvm::CallInst *CI = CGF.Builder.CreateCall(FPBuiltinF, Args);
// TODO: Replace AttrList with a single attribute. The call can only have a
// single FPAccuracy attribute.
llvm::AttributeList AttrList;
// "sycl_used_aspects" metadata associated with the call.
llvm::Metadata *AspectMD = nullptr;
// sincos() doesn't return a value, but it still has a type associated with
// it that corresponds to the operand type.
CGF.CGM.getFPAccuracyFuncAttributes(
Name, AttrList, AspectMD, ID,
Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
CI->setAttributes(AttrList);

if (CGF.getLangOpts().SYCLIsDevice && AspectMD)
CI->setMetadata("sycl_used_aspects",
llvm::MDNode::get(CGF.CGM.getLLVMContext(), AspectMD));
return CI;
}

static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0,
unsigned FPIntrinsicID, unsigned IntrinsicID,
bool HasAccuracyRequirement) {
Expand All @@ -697,13 +674,6 @@ static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0,
: CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
}

static bool hasAccuracyRequirement(CodeGenFunction &CGF, StringRef Name) {
if (!CGF.getLangOpts().FPAccuracyVal.empty())
return true;
auto FuncMapIt = CGF.getLangOpts().FPAccuracyFuncMap.find(Name.str());
return FuncMapIt != CGF.getLangOpts().FPAccuracyFuncMap.end();
}

static Function *emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
unsigned FPAccuracyIntrinsicID,
unsigned IntrinsicID, llvm::Value *Src0,
Expand All @@ -722,7 +692,7 @@ static Function *emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
CGF.CGM.getContext().BuiltinInfo.getName(CGF.getCurrentBuiltinID());
// Use fpbuiltin intrinsic only when needed.
Func = getIntrinsic(CGF, Src0, FPAccuracyIntrinsicID, IntrinsicID,
hasAccuracyRequirement(CGF, Name));
CGF.hasAccuracyRequirement(Name));
}
}
}
Expand All @@ -741,8 +711,8 @@ static Value *emitUnaryMaybeConstrainedFPBuiltin(
Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
IntrinsicID, Src0, Name);
if (Func)
return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0},
FPAccuracyIntrinsicID);
return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0},
FPAccuracyIntrinsicID);

CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
if (CGF.Builder.getIsFPConstrained()) {
Expand All @@ -766,8 +736,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(
Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
IntrinsicID, Src0, Name);
if (Func)
return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0, Src1},
FPAccuracyIntrinsicID);
return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0, Src1},
FPAccuracyIntrinsicID);

CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
if (CGF.Builder.getIsFPConstrained()) {
Expand Down Expand Up @@ -25194,6 +25164,7 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
.Case("sincos", llvm::Intrinsic::fpbuiltin_sincos)
.Case("exp10", llvm::Intrinsic::fpbuiltin_exp10)
.Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt)
.Case("sqrt", llvm::Intrinsic::fpbuiltin_sqrt)
.Default(0);
} else {
// The function has a clang builtin. Create an attribute for it
Expand Down Expand Up @@ -25295,10 +25266,11 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
// a TU fp-accuracy requested.
const LangOptions &LangOpts = getLangOpts();
if (hasFuncNameRequestedFPAccuracy(Name, LangOpts) ||
!LangOpts.FPAccuracyVal.empty()) {
!LangOpts.FPAccuracyVal.empty() || !LangOpts.OffloadFP32PrecDiv ||
!LangOpts.OffloadFP32PrecSqrt) {
llvm::Function *Func =
CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType());
return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs),
return CreateBuiltinCallWithAttr(Name, Func, ArrayRef(IRArgs),
FPAccuracyIntrinsicID);
}
return nullptr;
Expand Down
43 changes: 34 additions & 9 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1902,25 +1902,44 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
// the 'FPAccuracyFuncMap'; if no accuracy is mapped to Name (FuncAttrs
// is empty), then set its accuracy from the TU's accuracy value.
if (!getLangOpts().FPAccuracyFuncMap.empty()) {
StringRef FPAccuracyVal;
auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
if (FuncMapIt != getLangOpts().FPAccuracyFuncMap.end()) {
StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
ID, FuncType, convertFPAccuracy(FuncMapIt->second));
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
FPAccuracyVal = "2.5";
else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
FPAccuracyVal = "3.0";
else
FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
ID, FuncType, convertFPAccuracy(FuncMapIt->second));
assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
Int32Ty, convertFPAccuracyToAspect(FuncMapIt->second)));
}
}
if (FuncAttrs.attrs().size() == 0)
if (FuncAttrs.attrs().size() == 0) {
if (!getLangOpts().FPAccuracyVal.empty()) {
StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
StringRef FPAccuracyVal;
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
FPAccuracyVal = "2.5";
else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
FPAccuracyVal = "3.0";
else
FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)));
} else {
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv") {
FuncAttrs.addAttribute("fpbuiltin-max-error", "2.5");
} else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt") {
FuncAttrs.addAttribute("fpbuiltin-max-error", "3.0");
}
}
}
}

/// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the
Expand Down Expand Up @@ -5864,10 +5883,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Emit the actual call/invoke instruction.
llvm::CallBase *CI;
if (!InvokeDest) {
if (!getLangOpts().FPAccuracyFuncMap.empty() ||
!getLangOpts().FPAccuracyVal.empty()) {
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
if (FD && FD->getNameInfo().getName().isIdentifier()) {
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
if (FD && FD->getNameInfo().getName().isIdentifier()) {
StringRef FuncName = FD->getName();
const bool IsFloat32Type = FD->getReturnType()->isFloat32Type();
bool hasFPAccuracyFuncMap = hasAccuracyRequirement(FuncName);
bool hasFPAccuracyVal = !getLangOpts().FPAccuracyVal.empty();
bool isFp32SqrtFunction =
(FuncName == "sqrt" && !getLangOpts().OffloadFP32PrecSqrt &&
IsFloat32Type);
if (hasFPAccuracyFuncMap || hasFPAccuracyVal || isFp32SqrtFunction) {
CI = MaybeEmitFPBuiltinofFD(IRFuncTy, IRCallArgs, CalleePtr,
FD->getName(), FD->getBuiltinID());
if (CI)
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/CodeGen/CGExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3866,6 +3866,16 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
llvm::Value *Val;
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
if (Ops.LHS->getType()->isFloatTy()) {
if (!CGF.getLangOpts().OffloadFP32PrecDiv) {
unsigned FPAccuracyIntrinsicID = llvm::Intrinsic::fpbuiltin_fdiv;
llvm::Function *Func =
CGF.CGM.getIntrinsic(FPAccuracyIntrinsicID, Ops.LHS->getType());
llvm::Value *Val = CGF.CreateBuiltinCallWithAttr(
"fdiv", Func, {Ops.LHS, Ops.RHS}, FPAccuracyIntrinsicID);
return Val;
}
}
Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
CGF.SetDivFPAccuracy(Val);
return Val;
Expand Down
29 changes: 29 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,35 @@ clang::ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) {
}
}

bool CodeGenFunction::hasAccuracyRequirement(StringRef Name) {
if (!getLangOpts().FPAccuracyVal.empty())
return true;
auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
return FuncMapIt != getLangOpts().FPAccuracyFuncMap.end();
}

llvm::CallInst *CodeGenFunction::CreateBuiltinCallWithAttr(
StringRef Name, llvm::Function *FPBuiltinF, ArrayRef<llvm::Value *> Args,
unsigned ID) {
llvm::CallInst *CI = Builder.CreateCall(FPBuiltinF, Args);
// TODO: Replace AttrList with a single attribute. The call can only have a
// single FPAccuracy attribute.
llvm::AttributeList AttrList;
// "sycl_used_aspects" metadata associated with the call.
llvm::Metadata *AspectMD = nullptr;
// sincos() doesn't return a value, but it still has a type associated with
// it that corresponds to the operand type.
CGM.getFPAccuracyFuncAttributes(
Name, AttrList, AspectMD, ID,
Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
CI->setAttributes(AttrList);

if (getLangOpts().SYCLIsDevice && AspectMD)
CI->setMetadata("sycl_used_aspects",
llvm::MDNode::get(CGM.getLLVMContext(), AspectMD));
return CI;
}

void CodeGenFunction::SetFastMathFlags(FPOptions FPFeatures) {
llvm::FastMathFlags FMF;
FMF.setAllowReassoc(FPFeatures.getAllowFPReassociate());
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -5285,6 +5285,13 @@ class CodeGenFunction : public CodeGenTypeCache {
/// CodeGenOpts.
void SetDivFPAccuracy(llvm::Value *Val);

bool hasAccuracyRequirement(StringRef Name);

llvm::CallInst *CreateBuiltinCallWithAttr(StringRef Name,
llvm::Function *FPBuiltinF,
ArrayRef<llvm::Value *> Args,
unsigned ID);

/// Set the codegen fast-math flags.
void SetFastMathFlags(FPOptions FPFeatures);

Expand Down
Loading

0 comments on commit 5823125

Please sign in to comment.