[SYCL] Add support for -foffload-fp32-prec-div/sqrt options. (intel#1…

…5836) Add support for options `-f[no]-offload-fp32-prec-div` and `-f[no-]-offload-fp32-prec-sqrt`. These options are added to allow users to control whether `fdiv` and `sqrt` operations in offload device code are required to return correctly rounded results. In order to communicate this to the device code, we need the front end to generate IR that reflects the choice. When the correctly rounded setting is used, we can just generate the `fdiv` instruction and `llvm.sqrt` intrinsic, because these operations are required to be correctly rounded by default in LLVM IR. When the result is not required to be correctly rounded, the front end should generate a call to the `llvm.fpbuiltin.fdiv` or `llvm.fpbuiltin.sqrt` intrinsic with the `fpbuiltin-max-error` attribute set. For single precision` fdiv`, the setting should be `2.5`. For single-precision sqrt, the setting should be `3.0`. If the -ffp-accuracy option is used, we should issue warnings if the settings conflict with an explicitly set `-foffload-fp32-prec-div` or `-foffload-fp32-prec-sqrt` option.
gmlueck · Jan 31, 2025 · 5823125 · 5823125
1 parent 2339bac
commit 5823125
Show file tree

Hide file tree

Showing 14 changed files with 798 additions and 55 deletions.
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -374,6 +374,11 @@ def err_ppc_impossible_musttail: Error<
 def err_aix_musttail_unsupported: Error<
   "'musttail' attribute is not supported on AIX">;
 
+def warn_acuracy_conflicts_with_explicit_offload_fp32_prec_option : Warning<
+  "floating point accuracy control '%0' conflicts with explicit target "
+  "precision option '%1'">,
+  InGroup<DiagGroup<"accuracy-conflicts-with-explicit-offload-fp32-prec-option">>;
+
 // Source manager
 def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;
 def err_file_modified : Error<

diff --git a/clang/include/clang/Basic/FPOptions.def b/clang/include/clang/Basic/FPOptions.def
@@ -30,4 +30,6 @@ OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, Float16Exce
 OPTION(FPAccuracy, LangOptions::FPAccuracyKind, 3, BFloat16ExcessPrecision)
 OPTION(MathErrno, bool, 1, FPAccuracy)
 OPTION(ComplexRange, LangOptions::ComplexRangeKind, 2, MathErrno)
+OPTION(OffloadFP32PrecDi, bool, 1, ComplexRange)
+OPTION(OffloadFP32PrecSqrt, bool, 1, OffloadFP32PrecDi)
 #undef OPTION
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
@@ -377,6 +377,8 @@ BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "
 ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for Float16 arithmetic")
 ENUM_LANGOPT(BFloat16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for BFloat16 arithmetic")
 BENIGN_ENUM_LANGOPT(FPAccuracy, FPAccuracyKind, 3, FPA_Default, "Accuracy for floating point operations and library functions")
+LANGOPT(OffloadFP32PrecDiv, 1, 1, "Return correctly rounded results of fdiv")
+LANGOPT(OffloadFP32PrecSqrt, 1, 1, "Return correctly rounded results of sqrt")
 LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment")
 LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility")
 LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting")

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
@@ -1167,6 +1167,22 @@ defm cx_fortran_rules: BoolOptionWithoutMarshalling<"f", "cx-fortran-rules",
   NegFlag<SetFalse, [], [ClangOption, CC1Option], "Range reduction is disabled "
   "for complex arithmetic operations">>;
 
+ defm offload_fp32_prec_div: BoolOption<"f", "offload-fp32-prec-div",
+   LangOpts<"OffloadFP32PrecDiv">, DefaultTrue,
+   PosFlag<SetTrue, [], [ClangOption, CC1Option], "fdiv operations in offload device "
+   "code are required to return correctly rounded results.">,
+   NegFlag<SetFalse, [], [ClangOption, CC1Option], "fdiv operations in offload device "
+   "code are not required to return correctly rounded results.">>,
+   Group<f_Group>;
+
+ defm offload_fp32_prec_sqrt: BoolOption<"f", "offload-fp32-prec-sqrt",
+   LangOpts<"OffloadFP32PrecSqrt">, DefaultTrue,
+   PosFlag<SetTrue, [], [ClangOption, CC1Option], "sqrt operations in offload device "
+   "code are required to return correctly rounded results.">,
+   NegFlag<SetFalse, [], [ClangOption, CC1Option], "sqrt operations in offload device "
+   "code are not required to return correctly rounded results.">>,
+   Group<f_Group>;
+
 // OpenCL-only Options
 def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>,
   Visibility<[ClangOption, CC1Option]>,

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -666,29 +666,6 @@ static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
   return Store;
 }
 
-static CallInst *CreateBuiltinCallWithAttr(CodeGenFunction &CGF, StringRef Name,
-                                           llvm::Function *FPBuiltinF,
-                                           ArrayRef<Value *> Args,
-                                           unsigned ID) {
-  llvm::CallInst *CI = CGF.Builder.CreateCall(FPBuiltinF, Args);
-  // TODO: Replace AttrList with a single attribute. The call can only have a
-  // single FPAccuracy attribute.
-  llvm::AttributeList AttrList;
-  // "sycl_used_aspects" metadata associated with the call.
-  llvm::Metadata *AspectMD = nullptr;
-  // sincos() doesn't return a value, but it still has a type associated with
-  // it that corresponds to the operand type.
-  CGF.CGM.getFPAccuracyFuncAttributes(
-      Name, AttrList, AspectMD, ID,
-      Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
-  CI->setAttributes(AttrList);
-
-  if (CGF.getLangOpts().SYCLIsDevice && AspectMD)
-    CI->setMetadata("sycl_used_aspects",
-                    llvm::MDNode::get(CGF.CGM.getLLVMContext(), AspectMD));
-  return CI;
-}
-
 static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0,
                               unsigned FPIntrinsicID, unsigned IntrinsicID,
                               bool HasAccuracyRequirement) {
@@ -697,13 +674,6 @@ static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0,
              : CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
 }
 
-static bool hasAccuracyRequirement(CodeGenFunction &CGF, StringRef Name) {
-  if (!CGF.getLangOpts().FPAccuracyVal.empty())
-    return true;
-  auto FuncMapIt = CGF.getLangOpts().FPAccuracyFuncMap.find(Name.str());
-  return FuncMapIt != CGF.getLangOpts().FPAccuracyFuncMap.end();
-}
-
 static Function *emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
                                     unsigned FPAccuracyIntrinsicID,
                                     unsigned IntrinsicID, llvm::Value *Src0,
@@ -722,7 +692,7 @@ static Function *emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
             CGF.CGM.getContext().BuiltinInfo.getName(CGF.getCurrentBuiltinID());
         // Use fpbuiltin intrinsic only when needed.
         Func = getIntrinsic(CGF, Src0, FPAccuracyIntrinsicID, IntrinsicID,
-                            hasAccuracyRequirement(CGF, Name));
+                            CGF.hasAccuracyRequirement(Name));
       }
     }
   }
@@ -741,8 +711,8 @@ static Value *emitUnaryMaybeConstrainedFPBuiltin(
   Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
                                       IntrinsicID, Src0, Name);
   if (Func)
-    return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0},
-                                     FPAccuracyIntrinsicID);
+    return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0},
+                                         FPAccuracyIntrinsicID);
 
   CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
@@ -766,8 +736,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(
   Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
                                       IntrinsicID, Src0, Name);
   if (Func)
-    return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0, Src1},
-                                     FPAccuracyIntrinsicID);
+    return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0, Src1},
+                                         FPAccuracyIntrinsicID);
 
   CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
@@ -25194,6 +25164,7 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
             .Case("sincos", llvm::Intrinsic::fpbuiltin_sincos)
             .Case("exp10", llvm::Intrinsic::fpbuiltin_exp10)
             .Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt)
+            .Case("sqrt", llvm::Intrinsic::fpbuiltin_sqrt)
             .Default(0);
   } else {
     // The function has a clang builtin. Create an attribute for it
@@ -25295,10 +25266,11 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
   // a TU fp-accuracy requested.
   const LangOptions &LangOpts = getLangOpts();
   if (hasFuncNameRequestedFPAccuracy(Name, LangOpts) ||
-      !LangOpts.FPAccuracyVal.empty()) {
+      !LangOpts.FPAccuracyVal.empty() || !LangOpts.OffloadFP32PrecDiv ||
+      !LangOpts.OffloadFP32PrecSqrt) {
     llvm::Function *Func =
         CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType());
-    return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs),
+    return CreateBuiltinCallWithAttr(Name, Func, ArrayRef(IRArgs),
                                      FPAccuracyIntrinsicID);
   }
   return nullptr;

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
@@ -1902,25 +1902,44 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
   // the 'FPAccuracyFuncMap'; if no accuracy is mapped to Name (FuncAttrs
   // is empty), then set its accuracy from the TU's accuracy value.
   if (!getLangOpts().FPAccuracyFuncMap.empty()) {
+    StringRef FPAccuracyVal;
     auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
     if (FuncMapIt != getLangOpts().FPAccuracyFuncMap.end()) {
-      StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
-          ID, FuncType, convertFPAccuracy(FuncMapIt->second));
+      if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
+        FPAccuracyVal = "2.5";
+      else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
+        FPAccuracyVal = "3.0";
+      else
+        FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
+            ID, FuncType, convertFPAccuracy(FuncMapIt->second));
       assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
       FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
       MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
           Int32Ty, convertFPAccuracyToAspect(FuncMapIt->second)));
     }
   }
-  if (FuncAttrs.attrs().size() == 0)
+  if (FuncAttrs.attrs().size() == 0) {
     if (!getLangOpts().FPAccuracyVal.empty()) {
-      StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
-          ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
+      StringRef FPAccuracyVal;
+      if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
+        FPAccuracyVal = "2.5";
+      else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
+        FPAccuracyVal = "3.0";
+      else
+        FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
+            ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
       assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
       FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
       MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
           Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)));
+    } else {
+      if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv") {
+        FuncAttrs.addAttribute("fpbuiltin-max-error", "2.5");
+      } else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt") {
+        FuncAttrs.addAttribute("fpbuiltin-max-error", "3.0");
+      }
     }
+  }
 }
 
 /// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the
@@ -5864,10 +5883,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   // Emit the actual call/invoke instruction.
   llvm::CallBase *CI;
   if (!InvokeDest) {
-    if (!getLangOpts().FPAccuracyFuncMap.empty() ||
-        !getLangOpts().FPAccuracyVal.empty()) {
-      const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
-      if (FD && FD->getNameInfo().getName().isIdentifier()) {
+    const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
+    if (FD && FD->getNameInfo().getName().isIdentifier()) {
+      StringRef FuncName = FD->getName();
+      const bool IsFloat32Type = FD->getReturnType()->isFloat32Type();
+      bool hasFPAccuracyFuncMap = hasAccuracyRequirement(FuncName);
+      bool hasFPAccuracyVal = !getLangOpts().FPAccuracyVal.empty();
+      bool isFp32SqrtFunction =
+          (FuncName == "sqrt" && !getLangOpts().OffloadFP32PrecSqrt &&
+           IsFloat32Type);
+      if (hasFPAccuracyFuncMap || hasFPAccuracyVal || isFp32SqrtFunction) {
         CI = MaybeEmitFPBuiltinofFD(IRFuncTy, IRCallArgs, CalleePtr,
                                     FD->getName(), FD->getBuiltinID());
         if (CI)

diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3866,6 +3866,16 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
   if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
     llvm::Value *Val;
     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
+    if (Ops.LHS->getType()->isFloatTy()) {
+      if (!CGF.getLangOpts().OffloadFP32PrecDiv) {
+        unsigned FPAccuracyIntrinsicID = llvm::Intrinsic::fpbuiltin_fdiv;
+        llvm::Function *Func =
+            CGF.CGM.getIntrinsic(FPAccuracyIntrinsicID, Ops.LHS->getType());
+        llvm::Value *Val = CGF.CreateBuiltinCallWithAttr(
+            "fdiv", Func, {Ops.LHS, Ops.RHS}, FPAccuracyIntrinsicID);
+        return Val;
+      }
+    }
     Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
     CGF.SetDivFPAccuracy(Val);
     return Val;

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -121,6 +121,35 @@ clang::ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) {
   }
 }
 
+bool CodeGenFunction::hasAccuracyRequirement(StringRef Name) {
+  if (!getLangOpts().FPAccuracyVal.empty())
+    return true;
+  auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
+  return FuncMapIt != getLangOpts().FPAccuracyFuncMap.end();
+}
+
+llvm::CallInst *CodeGenFunction::CreateBuiltinCallWithAttr(
+    StringRef Name, llvm::Function *FPBuiltinF, ArrayRef<llvm::Value *> Args,
+    unsigned ID) {
+  llvm::CallInst *CI = Builder.CreateCall(FPBuiltinF, Args);
+  // TODO: Replace AttrList with a single attribute. The call can only have a
+  // single FPAccuracy attribute.
+  llvm::AttributeList AttrList;
+  // "sycl_used_aspects" metadata associated with the call.
+  llvm::Metadata *AspectMD = nullptr;
+  // sincos() doesn't return a value, but it still has a type associated with
+  // it that corresponds to the operand type.
+  CGM.getFPAccuracyFuncAttributes(
+      Name, AttrList, AspectMD, ID,
+      Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
+  CI->setAttributes(AttrList);
+
+  if (getLangOpts().SYCLIsDevice && AspectMD)
+    CI->setMetadata("sycl_used_aspects",
+                    llvm::MDNode::get(CGM.getLLVMContext(), AspectMD));
+  return CI;
+}
+
 void CodeGenFunction::SetFastMathFlags(FPOptions FPFeatures) {
   llvm::FastMathFlags FMF;
   FMF.setAllowReassoc(FPFeatures.getAllowFPReassociate());

diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
@@ -5285,6 +5285,13 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// CodeGenOpts.
   void SetDivFPAccuracy(llvm::Value *Val);
 
+  bool hasAccuracyRequirement(StringRef Name);
+
+  llvm::CallInst *CreateBuiltinCallWithAttr(StringRef Name,
+                                            llvm::Function *FPBuiltinF,
+                                            ArrayRef<llvm::Value *> Args,
+                                            unsigned ID);
+
   /// Set the codegen fast-math flags.
   void SetFastMathFlags(FPOptions FPFeatures);