Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Add support for -foffload-fp32-prec-div/sqrt options. #15836

Open
wants to merge 25 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f8caf83
Add support for -ftarget-prec-div/sqrt options.
zahiraam Oct 23, 2024
00ffb5a
Added fast-math run lines to LIT tests.
zahiraam Oct 23, 2024
795dd38
Renamed the options accordingly.
zahiraam Oct 24, 2024
78a9005
Fix format.
zahiraam Oct 24, 2024
50e71c0
Changed the place where the options are added in order for the options
zahiraam Oct 28, 2024
54f2409
Fix format.
zahiraam Oct 28, 2024
bdf78d7
Addresed review comments.
zahiraam Oct 29, 2024
8cd6d8b
Put the code to handle the options in RenderFloatingPointOptions
zahiraam Oct 30, 2024
755d630
Addressed review comments.
zahiraam Oct 30, 2024
27011c8
Fixed up condition to clearer code.
zahiraam Oct 31, 2024
ff2b3d9
Addressed review comments.
zahiraam Nov 4, 2024
07752e2
Add extension SPV_INTEL_fp_max_error.
zahiraam Nov 5, 2024
aa909d2
Fixed LIT test.
zahiraam Nov 5, 2024
fcc4786
Addressed review comment.
zahiraam Nov 5, 2024
24711fd
Addressed review comments.
zahiraam Nov 8, 2024
56314b7
Renamed function.
zahiraam Nov 12, 2024
e643027
Addressed review comments.
zahiraam Nov 13, 2024
b25e5ac
Changed SplitFPAccuracyVal to be a static function instead of a lambda.
zahiraam Nov 13, 2024
ce00296
Restricting the use of the options to sycl only.
zahiraam Nov 15, 2024
bc01759
Remove restriction on Cuda/Hip and changed the code so that the div
zahiraam Nov 18, 2024
c5fffc5
Removed unused lines in CodeGenSYC/offload-fp32-div-sqrt.cpp.
zahiraam Nov 21, 2024
f2fb8b2
Renamed div to fdiv to avoid confusion.
zahiraam Nov 22, 2024
83c9b31
This is an attempt to fix the DeviceLib/cmath_test.cpp issue.
zahiraam Nov 25, 2024
0efc825
Removing the latest change that attempted to fix the LIT issue.
zahiraam Dec 2, 2024
e1de775
Merge remote-tracking branch 'origin/sycl' into TargetPrecOption
zahiraam Jan 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/DiagnosticCommonKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,11 @@ def err_ppc_impossible_musttail: Error<
def err_aix_musttail_unsupported: Error<
"'musttail' attribute is not supported on AIX">;

def warn_acuracy_conflicts_with_explicit_offload_fp32_prec_option : Warning<
"floating point accuracy control '%0' conflicts with explicit target "
"precision option '%1'">,
InGroup<DiagGroup<"accuracy-conflicts-with-explicit-offload-fp32-prec-option">>;

// Source manager
def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;
def err_file_modified : Error<
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/FPOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,6 @@ OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, Float16Exce
OPTION(FPAccuracy, LangOptions::FPAccuracyKind, 3, BFloat16ExcessPrecision)
OPTION(MathErrno, bool, 1, FPAccuracy)
OPTION(ComplexRange, LangOptions::ComplexRangeKind, 2, MathErrno)
OPTION(OffloadFP32PrecDi, bool, 1, ComplexRange)
OPTION(OffloadFP32PrecSqrt, bool, 1, OffloadFP32PrecDi)
#undef OPTION
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,8 @@ BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "
ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for Float16 arithmetic")
ENUM_LANGOPT(BFloat16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for BFloat16 arithmetic")
BENIGN_ENUM_LANGOPT(FPAccuracy, FPAccuracyKind, 3, FPA_Default, "Accuracy for floating point operations and library functions")
LANGOPT(OffloadFP32PrecDiv, 1, 1, "Return correctly rounded results of fdiv")
LANGOPT(OffloadFP32PrecSqrt, 1, 1, "Return correctly rounded results of sqrt")
LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment")
LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility")
LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting")
Expand Down
16 changes: 16 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1157,6 +1157,22 @@ defm cx_fortran_rules: BoolOptionWithoutMarshalling<"f", "cx-fortran-rules",
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Range reduction is disabled "
"for complex arithmetic operations">>;

defm offload_fp32_prec_div: BoolOption<"f", "offload-fp32-prec-div",
LangOpts<"OffloadFP32PrecDiv">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;

defm offload_fp32_prec_sqrt: BoolOption<"f", "offload-fp32-prec-sqrt",
LangOpts<"OffloadFP32PrecSqrt">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;

// OpenCL-only Options
def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>,
Visibility<[ClangOption, CC1Option]>,
Expand Down
46 changes: 9 additions & 37 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,29 +490,6 @@ static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {
return Store;
}

static CallInst *CreateBuiltinCallWithAttr(CodeGenFunction &CGF, StringRef Name,
llvm::Function *FPBuiltinF,
ArrayRef<Value *> Args,
unsigned ID) {
llvm::CallInst *CI = CGF.Builder.CreateCall(FPBuiltinF, Args);
// TODO: Replace AttrList with a single attribute. The call can only have a
// single FPAccuracy attribute.
llvm::AttributeList AttrList;
// "sycl_used_aspects" metadata associated with the call.
llvm::Metadata *AspectMD = nullptr;
// sincos() doesn't return a value, but it still has a type associated with
// it that corresponds to the operand type.
CGF.CGM.getFPAccuracyFuncAttributes(
Name, AttrList, AspectMD, ID,
Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
CI->setAttributes(AttrList);

if (CGF.getLangOpts().SYCLIsDevice && AspectMD)
CI->setMetadata("sycl_used_aspects",
llvm::MDNode::get(CGF.CGM.getLLVMContext(), AspectMD));
return CI;
}

static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0,
unsigned FPIntrinsicID, unsigned IntrinsicID,
bool HasAccuracyRequirement) {
Expand All @@ -521,13 +498,6 @@ static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0,
: CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
}

static bool hasAccuracyRequirement(CodeGenFunction &CGF, StringRef Name) {
if (!CGF.getLangOpts().FPAccuracyVal.empty())
return true;
auto FuncMapIt = CGF.getLangOpts().FPAccuracyFuncMap.find(Name.str());
return FuncMapIt != CGF.getLangOpts().FPAccuracyFuncMap.end();
}

static Function *emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
unsigned FPAccuracyIntrinsicID,
unsigned IntrinsicID, llvm::Value *Src0,
Expand All @@ -546,7 +516,7 @@ static Function *emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
CGF.CGM.getContext().BuiltinInfo.getName(CGF.getCurrentBuiltinID());
// Use fpbuiltin intrinsic only when needed.
Func = getIntrinsic(CGF, Src0, FPAccuracyIntrinsicID, IntrinsicID,
hasAccuracyRequirement(CGF, Name));
CGF.hasAccuracyRequirement(Name));
}
}
}
Expand All @@ -565,8 +535,8 @@ static Value *emitUnaryMaybeConstrainedFPBuiltin(
Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
IntrinsicID, Src0, Name);
if (Func)
return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0},
FPAccuracyIntrinsicID);
return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0},
FPAccuracyIntrinsicID);

CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
if (CGF.Builder.getIsFPConstrained()) {
Expand All @@ -590,8 +560,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(
Function *Func = emitMaybeIntrinsic(CGF, E, FPAccuracyIntrinsicID,
IntrinsicID, Src0, Name);
if (Func)
return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0, Src1},
FPAccuracyIntrinsicID);
return CGF.CreateBuiltinCallWithAttr(Name, Func, {Src0, Src1},
FPAccuracyIntrinsicID);

CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
if (CGF.Builder.getIsFPConstrained()) {
Expand Down Expand Up @@ -24099,6 +24069,7 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
.Case("sincos", llvm::Intrinsic::fpbuiltin_sincos)
.Case("exp10", llvm::Intrinsic::fpbuiltin_exp10)
.Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt)
.Case("sqrt", llvm::Intrinsic::fpbuiltin_sqrt)
.Default(0);
} else {
// The function has a clang builtin. Create an attribute for it
Expand Down Expand Up @@ -24200,10 +24171,11 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
// a TU fp-accuracy requested.
const LangOptions &LangOpts = getLangOpts();
if (hasFuncNameRequestedFPAccuracy(Name, LangOpts) ||
!LangOpts.FPAccuracyVal.empty()) {
!LangOpts.FPAccuracyVal.empty() || !LangOpts.OffloadFP32PrecDiv ||
!LangOpts.OffloadFP32PrecSqrt) {
llvm::Function *Func =
CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType());
return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs),
return CreateBuiltinCallWithAttr(Name, Func, ArrayRef(IRArgs),
FPAccuracyIntrinsicID);
}
return nullptr;
Expand Down
43 changes: 34 additions & 9 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1879,25 +1879,44 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
// the 'FPAccuracyFuncMap'; if no accuracy is mapped to Name (FuncAttrs
// is empty), then set its accuracy from the TU's accuracy value.
if (!getLangOpts().FPAccuracyFuncMap.empty()) {
StringRef FPAccuracyVal;
auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
if (FuncMapIt != getLangOpts().FPAccuracyFuncMap.end()) {
StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
ID, FuncType, convertFPAccuracy(FuncMapIt->second));
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
FPAccuracyVal = "2.5";
else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
FPAccuracyVal = "3.0";
else
FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
ID, FuncType, convertFPAccuracy(FuncMapIt->second));
assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
Int32Ty, convertFPAccuracyToAspect(FuncMapIt->second)));
}
}
if (FuncAttrs.attrs().size() == 0)
if (FuncAttrs.attrs().size() == 0) {
if (!getLangOpts().FPAccuracyVal.empty()) {
StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
StringRef FPAccuracyVal;
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
FPAccuracyVal = "2.5";
else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
FPAccuracyVal = "3.0";
else
FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)));
} else {
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv") {
FuncAttrs.addAttribute("fpbuiltin-max-error", "2.5");
} else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt") {
FuncAttrs.addAttribute("fpbuiltin-max-error", "3.0");
}
}
}
}

/// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the
Expand Down Expand Up @@ -5790,10 +5809,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Emit the actual call/invoke instruction.
llvm::CallBase *CI;
if (!InvokeDest) {
if (!getLangOpts().FPAccuracyFuncMap.empty() ||
!getLangOpts().FPAccuracyVal.empty()) {
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
if (FD && FD->getNameInfo().getName().isIdentifier()) {
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
if (FD && FD->getNameInfo().getName().isIdentifier()) {
StringRef FuncName = FD->getName();
const bool IsFloat32Type = FD->getReturnType()->isFloat32Type();
bool hasFPAccuracyFuncMap = hasAccuracyRequirement(FuncName);
bool hasFPAccuracyVal = !getLangOpts().FPAccuracyVal.empty();
bool isFp32SqrtFunction =
(FuncName == "sqrt" && !getLangOpts().OffloadFP32PrecSqrt &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we compare with un-mangled sqrt?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FuncName is the output of FD->getName() which returns a simple identifier. https://github.com/intel/llvm/blob/sycl/clang/include/clang/AST/Decl.h#L280

Copy link
Contributor

@MrSidims MrSidims Nov 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So clang/test/CodeGenSYCL/offload-fp32-div-sqrt.cpp will pass even with extern "C" removed from sqrt function declaration?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if the user has a function in their own namespace that happens to be named "sqrt"?

IsFloat32Type);
if (hasFPAccuracyFuncMap || hasFPAccuracyVal || isFp32SqrtFunction) {
CI = MaybeEmitFPBuiltinofFD(IRFuncTy, IRCallArgs, CalleePtr,
FD->getName(), FD->getBuiltinID());
if (CI)
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/CodeGen/CGExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3783,6 +3783,16 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
if (Ops.LHS->getType()->isFPOrFPVectorTy()) {
llvm::Value *Val;
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
if (Ops.LHS->getType()->isFloatTy()) {
if (!CGF.getLangOpts().OffloadFP32PrecDiv) {
unsigned FPAccuracyIntrinsicID = llvm::Intrinsic::fpbuiltin_fdiv;
llvm::Function *Func =
CGF.CGM.getIntrinsic(FPAccuracyIntrinsicID, Ops.LHS->getType());
llvm::Value *Val = CGF.CreateBuiltinCallWithAttr(
"fdiv", Func, {Ops.LHS, Ops.RHS}, FPAccuracyIntrinsicID);
return Val;
}
}
Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
CGF.SetDivFPAccuracy(Val);
return Val;
Expand Down
29 changes: 29 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,35 @@ clang::ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) {
}
}

bool CodeGenFunction::hasAccuracyRequirement(StringRef Name) {
if (!getLangOpts().FPAccuracyVal.empty())
return true;
auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
return FuncMapIt != getLangOpts().FPAccuracyFuncMap.end();
}

llvm::CallInst *CodeGenFunction::CreateBuiltinCallWithAttr(
StringRef Name, llvm::Function *FPBuiltinF, ArrayRef<llvm::Value *> Args,
unsigned ID) {
llvm::CallInst *CI = Builder.CreateCall(FPBuiltinF, Args);
// TODO: Replace AttrList with a single attribute. The call can only have a
// single FPAccuracy attribute.
llvm::AttributeList AttrList;
// "sycl_used_aspects" metadata associated with the call.
llvm::Metadata *AspectMD = nullptr;
// sincos() doesn't return a value, but it still has a type associated with
// it that corresponds to the operand type.
CGM.getFPAccuracyFuncAttributes(
Name, AttrList, AspectMD, ID,
Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType());
CI->setAttributes(AttrList);

if (getLangOpts().SYCLIsDevice && AspectMD)
CI->setMetadata("sycl_used_aspects",
llvm::MDNode::get(CGM.getLLVMContext(), AspectMD));
return CI;
}

void CodeGenFunction::SetFastMathFlags(FPOptions FPFeatures) {
llvm::FastMathFlags FMF;
FMF.setAllowReassoc(FPFeatures.getAllowFPReassociate());
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -5213,6 +5213,13 @@ class CodeGenFunction : public CodeGenTypeCache {
/// CodeGenOpts.
void SetDivFPAccuracy(llvm::Value *Val);

bool hasAccuracyRequirement(StringRef Name);

llvm::CallInst *CreateBuiltinCallWithAttr(StringRef Name,
llvm::Function *FPBuiltinF,
ArrayRef<llvm::Value *> Args,
unsigned ID);

/// Set the codegen fast-math flags.
void SetFastMathFlags(FPOptions FPFeatures);

Expand Down
Loading
Loading