intel · zahiraam · Oct 23, 2024 · Oct 23, 2024 · Oct 24, 2024 · Oct 24, 2024
@@ -379,6 +379,11 @@ def err_ppc_impossible_musttail: Error<
 def err_aix_musttail_unsupported: Error<
   "'musttail' attribute is not supported on AIX">;
 
+def warn_acuracy_conflicts_with_explicit_offload_fp32_prec_option : Warning<
+  "floating point accuracy control '%0' conflicts with explicit target "
+  "precision option '%1'">,
+  InGroup<DiagGroup<"accuracy-conflicts-with-explicit-offload-fp32-prec-option">>;
+
 // Source manager
 def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;
 def err_file_modified : Error<

@@ -30,4 +30,6 @@ OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, Float16Exce
 OPTION(FPAccuracy, LangOptions::FPAccuracyKind, 3, BFloat16ExcessPrecision)
 OPTION(MathErrno, bool, 1, FPAccuracy)
 OPTION(ComplexRange, LangOptions::ComplexRangeKind, 2, MathErrno)
+OPTION(OffloadFP32PrecDi, bool, 1, ComplexRange)
+OPTION(OffloadFP32PrecSqrt, bool, 1, OffloadFP32PrecDi)
 #undef OPTION
@@ -372,6 +372,8 @@ BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "
 ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for Float16 arithmetic")
 ENUM_LANGOPT(BFloat16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for BFloat16 arithmetic")
 BENIGN_ENUM_LANGOPT(FPAccuracy, FPAccuracyKind, 3, FPA_Default, "Accuracy for floating point operations and library functions")
+LANGOPT(OffloadFP32PrecDiv, 1, 1, "Return correctly rounded results of fdiv")
+LANGOPT(OffloadFP32PrecSqrt, 1, 1, "Return correctly rounded results of sqrt")
 LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment")
 LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility")
 LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting")

@@ -1157,6 +1157,22 @@ defm cx_fortran_rules: BoolOptionWithoutMarshalling<"f", "cx-fortran-rules",
   NegFlag<SetFalse, [], [ClangOption, CC1Option], "Range reduction is disabled "
   "for complex arithmetic operations">>;
 
+ defm offload_fp32_prec_div: BoolOption<"f", "offload-fp32-prec-div",
+   LangOpts<"OffloadFP32PrecDiv">, DefaultTrue,
+   PosFlag<SetTrue, [], [ClangOption, CC1Option], "fdiv operations in offload device "
+   "code are required to return correctly rounded results.">,
+   NegFlag<SetFalse, [], [ClangOption, CC1Option], "fdiv operations in offload device "
+   "code are not required to return correctly rounded results.">>,
+   Group<f_Group>;
+
+ defm offload_fp32_prec_sqrt: BoolOption<"f", "offload-fp32-prec-sqrt",
+   LangOpts<"OffloadFP32PrecSqrt">, DefaultTrue,
+   PosFlag<SetTrue, [], [ClangOption, CC1Option], "sqrt operations in offload device "
+   "code are required to return correctly rounded results.">,
+   NegFlag<SetFalse, [], [ClangOption, CC1Option], "sqrt operations in offload device "
+   "code are not required to return correctly rounded results.">>,
+   Group<f_Group>;
+
 // OpenCL-only Options
 def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>,
   Visibility<[ClangOption, CC1Option]>,

@@ -490,29 +490,6 @@ static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
   return Store;
 }
 
-static CallInst *CreateBuiltinCallWithAttr(CodeGenFunction &CGF, StringRef Name,
-                                           llvm::Function *FPBuiltinF,
-                                           ArrayRef<Value *> Args,
-                                           unsigned ID) {
-  llvm::CallInst *CI = CGF.Builder.CreateCall(FPBuiltinF, Args);
-  // TODO: Replace AttrList with a single attribute. The call can only have a
-  // single FPAccuracy attribute.
-  llvm::AttributeList AttrList;
-  // "sycl_used_aspects" metadata associated with the call.
-  llvm::Metadata *AspectMD = nullptr;
-  // sincos() doesn't return a value, but it still has a type associated with
-  // it that corresponds to the operand type.
-  CGF.CGM.getFPAccuracyFuncAttributes(
-      Name, AttrList, AspectMD, ID,
-      Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
-  CI->setAttributes(AttrList);
-
-  if (CGF.getLangOpts().SYCLIsDevice && AspectMD)
-    CI->setMetadata("sycl_used_aspects",
-                    llvm::MDNode::get(CGF.CGM.getLLVMContext(), AspectMD));
-  return CI;
-}
-
 static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0,
                               unsigned FPIntrinsicID, unsigned IntrinsicID,
                               bool HasAccuracyRequirement) {
@@ -521,13 +498,6 @@ static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0,
              : CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
 }
 
-static bool hasAccuracyRequirement(CodeGenFunction &CGF, StringRef Name) {
-  if (!CGF.getLangOpts().FPAccuracyVal.empty())
-    return true;
-  auto FuncMapIt = CGF.getLangOpts().FPAccuracyFuncMap.find(Name.str());
-  return FuncMapIt != CGF.getLangOpts().FPAccuracyFuncMap.end();
-}
-
 static Function *emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
                                     unsigned FPAccuracyIntrinsicID,
                                     unsigned IntrinsicID, llvm::Value *Src0,
@@ -546,7 +516,7 @@ static Function *emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
             CGF.CGM.getContext().BuiltinInfo.getName(CGF.getCurrentBuiltinID());
         // Use fpbuiltin intrinsic only when needed.
         Func = getIntrinsic(CGF, Src0, FPAccuracyIntrinsicID, IntrinsicID,
-                            hasAccuracyRequirement(CGF, Name));
+                            CGF.hasAccuracyRequirement(Name));
       }
     }
   }
@@ -565,8 +535,8 @@ static Value *emitUnaryMaybeConstrainedFPBuiltin(
   Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
                                       IntrinsicID, Src0, Name);
   if (Func)
-    return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0},
-                                     FPAccuracyIntrinsicID);
+    return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0},
+                                         FPAccuracyIntrinsicID);
 
   CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
@@ -590,8 +560,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(
   Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
                                       IntrinsicID, Src0, Name);
   if (Func)
-    return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0, Src1},
-                                     FPAccuracyIntrinsicID);
+    return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0, Src1},
+                                         FPAccuracyIntrinsicID);
 
   CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
@@ -24099,6 +24069,7 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
             .Case("sincos", llvm::Intrinsic::fpbuiltin_sincos)
             .Case("exp10", llvm::Intrinsic::fpbuiltin_exp10)
             .Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt)
+            .Case("sqrt", llvm::Intrinsic::fpbuiltin_sqrt)
             .Default(0);
   } else {
     // The function has a clang builtin. Create an attribute for it
@@ -24200,10 +24171,11 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
   // a TU fp-accuracy requested.
   const LangOptions &LangOpts = getLangOpts();
   if (hasFuncNameRequestedFPAccuracy(Name, LangOpts) ||
-      !LangOpts.FPAccuracyVal.empty()) {
+      !LangOpts.FPAccuracyVal.empty() || !LangOpts.OffloadFP32PrecDiv ||
+      !LangOpts.OffloadFP32PrecSqrt) {
     llvm::Function *Func =
         CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType());
-    return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs),
+    return CreateBuiltinCallWithAttr(Name, Func, ArrayRef(IRArgs),
                                      FPAccuracyIntrinsicID);
   }
   return nullptr;

@@ -1879,25 +1879,44 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
   // the 'FPAccuracyFuncMap'; if no accuracy is mapped to Name (FuncAttrs
   // is empty), then set its accuracy from the TU's accuracy value.
   if (!getLangOpts().FPAccuracyFuncMap.empty()) {
+    StringRef FPAccuracyVal;
     auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
     if (FuncMapIt != getLangOpts().FPAccuracyFuncMap.end()) {
-      StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
-          ID, FuncType, convertFPAccuracy(FuncMapIt->second));
+      if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
+        FPAccuracyVal = "2.5";
+      else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
+        FPAccuracyVal = "3.0";
+      else
+        FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
+            ID, FuncType, convertFPAccuracy(FuncMapIt->second));
       assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
       FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
       MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
           Int32Ty, convertFPAccuracyToAspect(FuncMapIt->second)));
     }
   }
-  if (FuncAttrs.attrs().size() == 0)
+  if (FuncAttrs.attrs().size() == 0) {
     if (!getLangOpts().FPAccuracyVal.empty()) {
-      StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
-          ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
+      StringRef FPAccuracyVal;
+      if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
+        FPAccuracyVal = "2.5";
+      else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
+        FPAccuracyVal = "3.0";
+      else
+        FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
+            ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
       assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
       FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
       MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
           Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)));
+    } else {
+      if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv") {
+        FuncAttrs.addAttribute("fpbuiltin-max-error", "2.5");
+      } else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt") {
+        FuncAttrs.addAttribute("fpbuiltin-max-error", "3.0");
+      }
     }
+  }
 }
 
 /// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the
@@ -5790,10 +5809,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   // Emit the actual call/invoke instruction.
   llvm::CallBase *CI;
   if (!InvokeDest) {
-    if (!getLangOpts().FPAccuracyFuncMap.empty() ||
-        !getLangOpts().FPAccuracyVal.empty()) {
-      const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
-      if (FD && FD->getNameInfo().getName().isIdentifier()) {
+    const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
+    if (FD && FD->getNameInfo().getName().isIdentifier()) {
+      StringRef FuncName = FD->getName();
+      const bool IsFloat32Type = FD->getReturnType()->isFloat32Type();
+      bool hasFPAccuracyFuncMap = hasAccuracyRequirement(FuncName);
+      bool hasFPAccuracyVal = !getLangOpts().FPAccuracyVal.empty();
+      bool isFp32SqrtFunction =
+          (FuncName == "sqrt" && !getLangOpts().OffloadFP32PrecSqrt &&
+           IsFloat32Type);
+      if (hasFPAccuracyFuncMap || hasFPAccuracyVal || isFp32SqrtFunction) {
         CI = MaybeEmitFPBuiltinofFD(IRFuncTy, IRCallArgs, CalleePtr,
                                     FD->getName(), FD->getBuiltinID());
         if (CI)

@@ -3783,6 +3783,16 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
   if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
     llvm::Value *Val;
     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
+    if (Ops.LHS->getType()->isFloatTy()) {
+      if (!CGF.getLangOpts().OffloadFP32PrecDiv) {
+        unsigned FPAccuracyIntrinsicID = llvm::Intrinsic::fpbuiltin_fdiv;
+        llvm::Function *Func =
+            CGF.CGM.getIntrinsic(FPAccuracyIntrinsicID, Ops.LHS->getType());
+        llvm::Value *Val = CGF.CreateBuiltinCallWithAttr(
+            "fdiv", Func, {Ops.LHS, Ops.RHS}, FPAccuracyIntrinsicID);
+        return Val;
+      }
+    }
     Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
     CGF.SetDivFPAccuracy(Val);
     return Val;

@@ -122,6 +122,35 @@ clang::ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) {
   }
 }
 
+bool CodeGenFunction::hasAccuracyRequirement(StringRef Name) {
+  if (!getLangOpts().FPAccuracyVal.empty())
+    return true;
+  auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
+  return FuncMapIt != getLangOpts().FPAccuracyFuncMap.end();
+}
+
+llvm::CallInst *CodeGenFunction::CreateBuiltinCallWithAttr(
+    StringRef Name, llvm::Function *FPBuiltinF, ArrayRef<llvm::Value *> Args,
+    unsigned ID) {
+  llvm::CallInst *CI = Builder.CreateCall(FPBuiltinF, Args);
+  // TODO: Replace AttrList with a single attribute. The call can only have a
+  // single FPAccuracy attribute.
+  llvm::AttributeList AttrList;
+  // "sycl_used_aspects" metadata associated with the call.
+  llvm::Metadata *AspectMD = nullptr;
+  // sincos() doesn't return a value, but it still has a type associated with
+  // it that corresponds to the operand type.
+  CGM.getFPAccuracyFuncAttributes(
+      Name, AttrList, AspectMD, ID,
+      Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
+  CI->setAttributes(AttrList);
+
+  if (getLangOpts().SYCLIsDevice && AspectMD)
+    CI->setMetadata("sycl_used_aspects",
+                    llvm::MDNode::get(CGM.getLLVMContext(), AspectMD));
+  return CI;
+}
+
 void CodeGenFunction::SetFastMathFlags(FPOptions FPFeatures) {
   llvm::FastMathFlags FMF;
   FMF.setAllowReassoc(FPFeatures.getAllowFPReassociate());

@@ -5213,6 +5213,13 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// CodeGenOpts.
   void SetDivFPAccuracy(llvm::Value *Val);
 
+  bool hasAccuracyRequirement(StringRef Name);
+
+  llvm::CallInst *CreateBuiltinCallWithAttr(StringRef Name,
+                                            llvm::Function *FPBuiltinF,
+                                            ArrayRef<llvm::Value *> Args,
+                                            unsigned ID);
+
   /// Set the codegen fast-math flags.
   void SetFastMathFlags(FPOptions FPFeatures);