Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Add support for -foffload-fp32-prec-div/sqrt options. #15836

Open
wants to merge 27 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
f8caf83
Add support for -ftarget-prec-div/sqrt options.
zahiraam Oct 23, 2024
00ffb5a
Added fast-math run lines to LIT tests.
zahiraam Oct 23, 2024
795dd38
Renamed the options accordingly.
zahiraam Oct 24, 2024
78a9005
Fix format.
zahiraam Oct 24, 2024
50e71c0
Changed the place where the options are added in order for the options
zahiraam Oct 28, 2024
54f2409
Fix format.
zahiraam Oct 28, 2024
bdf78d7
Addresed review comments.
zahiraam Oct 29, 2024
8cd6d8b
Put the code to handle the options in RenderFloatingPointOptions
zahiraam Oct 30, 2024
755d630
Addressed review comments.
zahiraam Oct 30, 2024
27011c8
Fixed up condition to clearer code.
zahiraam Oct 31, 2024
ff2b3d9
Addressed review comments.
zahiraam Nov 4, 2024
07752e2
Add extension SPV_INTEL_fp_max_error.
zahiraam Nov 5, 2024
aa909d2
Fixed LIT test.
zahiraam Nov 5, 2024
fcc4786
Addressed review comment.
zahiraam Nov 5, 2024
24711fd
Addressed review comments.
zahiraam Nov 8, 2024
56314b7
Renamed function.
zahiraam Nov 12, 2024
e643027
Addressed review comments.
zahiraam Nov 13, 2024
b25e5ac
Changed SplitFPAccuracyVal to be a static function instead of a lambda.
zahiraam Nov 13, 2024
ce00296
Restricting the use of the options to sycl only.
zahiraam Nov 15, 2024
bc01759
Remove restriction on Cuda/Hip and changed the code so that the div
zahiraam Nov 18, 2024
c5fffc5
Removed unused lines in CodeGenSYC/offload-fp32-div-sqrt.cpp.
zahiraam Nov 21, 2024
f2fb8b2
Renamed div to fdiv to avoid confusion.
zahiraam Nov 22, 2024
83c9b31
This is an attempt to fix the DeviceLib/cmath_test.cpp issue.
zahiraam Nov 25, 2024
0efc825
Removing the latest change that attempted to fix the LIT issue.
zahiraam Dec 2, 2024
e1de775
Merge remote-tracking branch 'origin/sycl' into TargetPrecOption
zahiraam Jan 15, 2025
34f07cc
Merge remote-tracking branch 'origin/sycl' into TargetPrecOption
zahiraam Jan 15, 2025
e18930f
Fix sync error.
zahiraam Jan 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/DiagnosticCommonKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,11 @@ def err_ppc_impossible_musttail: Error<
def err_aix_musttail_unsupported: Error<
"'musttail' attribute is not supported on AIX">;

def warn_acuracy_conflicts_with_explicit_offload_fp32_prec_option : Warning<
"floating point accuracy control '%0' conflicts with explicit target "
"precision option '%1'">,
InGroup<DiagGroup<"accuracy-conflicts-with-explicit-offload-fp32-prec-option">>;

// Source manager
def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;
def err_file_modified : Error<
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/FPOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,6 @@ OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, Float16Exce
OPTION(FPAccuracy, LangOptions::FPAccuracyKind, 3, BFloat16ExcessPrecision)
OPTION(MathErrno, bool, 1, FPAccuracy)
OPTION(ComplexRange, LangOptions::ComplexRangeKind, 2, MathErrno)
OPTION(OffloadFP32PrecDi, bool, 1, ComplexRange)
OPTION(OffloadFP32PrecSqrt, bool, 1, OffloadFP32PrecDi)
#undef OPTION
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,8 @@ BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "
ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for Float16 arithmetic")
ENUM_LANGOPT(BFloat16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for BFloat16 arithmetic")
BENIGN_ENUM_LANGOPT(FPAccuracy, FPAccuracyKind, 3, FPA_Default, "Accuracy for floating point operations and library functions")
LANGOPT(OffloadFP32PrecDiv, 1, 1, "Return correctly rounded results of fdiv")
LANGOPT(OffloadFP32PrecSqrt, 1, 1, "Return correctly rounded results of sqrt")
LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment")
LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility")
LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting")
Expand Down
16 changes: 16 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1157,6 +1157,22 @@ defm cx_fortran_rules: BoolOptionWithoutMarshalling<"f", "cx-fortran-rules",
NegFlag<SetFalse, [], [ClangOption, CC1Option], "Range reduction is disabled "
"for complex arithmetic operations">>;

defm offload_fp32_prec_div: BoolOption<"f", "offload-fp32-prec-div",
LangOpts<"OffloadFP32PrecDiv">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "fdiv operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;

defm offload_fp32_prec_sqrt: BoolOption<"f", "offload-fp32-prec-sqrt",
LangOpts<"OffloadFP32PrecSqrt">, DefaultTrue,
PosFlag<SetTrue, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are required to return correctly rounded results.">,
NegFlag<SetFalse, [], [ClangOption, CC1Option], "sqrt operations in offload device "
"code are not required to return correctly rounded results.">>,
Group<f_Group>;

// OpenCL-only Options
def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>,
Visibility<[ClangOption, CC1Option]>,
Expand Down
13 changes: 4 additions & 9 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -521,13 +521,6 @@ static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0,
: CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
}

static bool hasAccuracyRequirement(CodeGenFunction &CGF, StringRef Name) {
if (!CGF.getLangOpts().FPAccuracyVal.empty())
return true;
auto FuncMapIt = CGF.getLangOpts().FPAccuracyFuncMap.find(Name.str());
return FuncMapIt != CGF.getLangOpts().FPAccuracyFuncMap.end();
}

static Function *emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
unsigned FPAccuracyIntrinsicID,
unsigned IntrinsicID, llvm::Value *Src0,
Expand All @@ -546,7 +539,7 @@ static Function *emitMaybeIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
CGF.CGM.getContext().BuiltinInfo.getName(CGF.getCurrentBuiltinID());
// Use fpbuiltin intrinsic only when needed.
Func = getIntrinsic(CGF, Src0, FPAccuracyIntrinsicID, IntrinsicID,
hasAccuracyRequirement(CGF, Name));
CGF.hasAccuracyRequirement(Name));
}
}
}
Expand Down Expand Up @@ -24099,6 +24092,7 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
.Case("sincos", llvm::Intrinsic::fpbuiltin_sincos)
.Case("exp10", llvm::Intrinsic::fpbuiltin_exp10)
.Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt)
.Case("sqrt", llvm::Intrinsic::fpbuiltin_sqrt)
.Default(0);
} else {
// The function has a clang builtin. Create an attribute for it
Expand Down Expand Up @@ -24200,7 +24194,8 @@ llvm::CallInst *CodeGenFunction::MaybeEmitFPBuiltinofFD(
// a TU fp-accuracy requested.
const LangOptions &LangOpts = getLangOpts();
if (hasFuncNameRequestedFPAccuracy(Name, LangOpts) ||
!LangOpts.FPAccuracyVal.empty()) {
!LangOpts.FPAccuracyVal.empty() || !LangOpts.OffloadFP32PrecDiv ||
!LangOpts.OffloadFP32PrecSqrt) {
llvm::Function *Func =
CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType());
return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs),
Expand Down
47 changes: 38 additions & 9 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1879,25 +1879,44 @@ void CodeGenModule::getDefaultFunctionFPAccuracyAttributes(
// the 'FPAccuracyFuncMap'; if no accuracy is mapped to Name (FuncAttrs
// is empty), then set its accuracy from the TU's accuracy value.
if (!getLangOpts().FPAccuracyFuncMap.empty()) {
StringRef FPAccuracyVal;
auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
if (FuncMapIt != getLangOpts().FPAccuracyFuncMap.end()) {
StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
ID, FuncType, convertFPAccuracy(FuncMapIt->second));
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
FPAccuracyVal = "2.5";
else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
FPAccuracyVal = "3.0";
else
FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
ID, FuncType, convertFPAccuracy(FuncMapIt->second));
assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
Int32Ty, convertFPAccuracyToAspect(FuncMapIt->second)));
}
}
if (FuncAttrs.attrs().size() == 0)
if (FuncAttrs.attrs().size() == 0) {
if (!getLangOpts().FPAccuracyVal.empty()) {
StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
StringRef FPAccuracyVal;
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv")
FPAccuracyVal = "2.5";
else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt")
FPAccuracyVal = "3.0";
else
FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin(
ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal));
assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected");
FuncAttrs.addAttribute("fpbuiltin-max-error", FPAccuracyVal);
MD = llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
Int32Ty, convertFPAccuracyToAspect(getLangOpts().FPAccuracyVal)));
} else {
if (!getLangOpts().OffloadFP32PrecDiv && Name == "fdiv") {
FuncAttrs.addAttribute("fpbuiltin-max-error", "2.5");
} else if (!getLangOpts().OffloadFP32PrecSqrt && Name == "sqrt") {
FuncAttrs.addAttribute("fpbuiltin-max-error", "3.0");
}
}
}
}

/// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the
Expand Down Expand Up @@ -5790,10 +5809,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Emit the actual call/invoke instruction.
llvm::CallBase *CI;
if (!InvokeDest) {
if (!getLangOpts().FPAccuracyFuncMap.empty() ||
!getLangOpts().FPAccuracyVal.empty()) {
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
if (FD && FD->getNameInfo().getName().isIdentifier()) {
const auto *FD = dyn_cast_if_present<FunctionDecl>(TargetDecl);
if (FD && FD->getNameInfo().getName().isIdentifier()) {
StringRef FuncName = FD->getName();
const bool IsFloat32Type = FD->getReturnType()->isFloat32Type();
bool hasFPAccuracyFuncMap = hasAccuracyRequirement(FuncName);
bool hasFPAccuracyVal = !getLangOpts().FPAccuracyVal.empty();
bool isFp32SqrtFunction =
(FuncName == "sqrt" && !getLangOpts().OffloadFP32PrecSqrt &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we compare with un-mangled sqrt?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FuncName is the output of FD->getName() which returns a simple identifier. https://github.com/intel/llvm/blob/sycl/clang/include/clang/AST/Decl.h#L280

Copy link
Contributor

@MrSidims MrSidims Nov 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So clang/test/CodeGenSYCL/offload-fp32-div-sqrt.cpp will pass even with extern "C" removed from sqrt function declaration?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if the user has a function in their own namespace that happens to be named "sqrt"?

IsFloat32Type);
bool isFP32FdivFunction =
(FuncName == "fdiv" && !getLangOpts().OffloadFP32PrecDiv &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually though, that the request is done to replace fdiv instruction with the intrinsic, not fdiv function. Do we know if users actually use such function? I don't see any mentioning of it in SYCL or OpenCL specifications.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gmlueck could you please comment on that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The intent of -foffload-fp32-prev-div is to affect the native divide operation (i.e. /). There is no SYCL function named fdiv. Is there a standard C / C++ function with that name?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK there is no standard function float FP division. There is std::div, but it works only on integers.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no C/C++ fdiv function.

IsFloat32Type);
if (hasFPAccuracyFuncMap || hasFPAccuracyVal || isFp32SqrtFunction ||
isFP32FdivFunction) {
CI = MaybeEmitFPBuiltinofFD(IRFuncTy, IRCallArgs, CalleePtr,
FD->getName(), FD->getBuiltinID());
if (CI)
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@ clang::ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) {
}
}

bool CodeGenFunction::hasAccuracyRequirement(StringRef Name) {
if (!getLangOpts().FPAccuracyVal.empty())
return true;
auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str());
return FuncMapIt != getLangOpts().FPAccuracyFuncMap.end();
}

void CodeGenFunction::SetFastMathFlags(FPOptions FPFeatures) {
llvm::FastMathFlags FMF;
FMF.setAllowReassoc(FPFeatures.getAllowFPReassociate());
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -5213,6 +5213,8 @@ class CodeGenFunction : public CodeGenTypeCache {
/// CodeGenOpts.
void SetDivFPAccuracy(llvm::Value *Val);

bool hasAccuracyRequirement(StringRef Name);

/// Set the codegen fast-math flags.
void SetFastMathFlags(FPOptions FPFeatures);

Expand Down
122 changes: 116 additions & 6 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2950,10 +2950,32 @@ RenderComplexRangeOption(LangOptions::ComplexRangeKind Range) {
return ComplexRangeStr;
}

static void EmitAccuracyDiag(const Driver &D, const JobAction &JA,
StringRef AccuracValStr, StringRef TargetPrecStr) {
if (JA.isDeviceOffloading(Action::OFK_SYCL)) {
D.Diag(clang::diag::
warn_acuracy_conflicts_with_explicit_offload_fp32_prec_option)
<< AccuracValStr << TargetPrecStr;
}
}

static SmallVector<StringRef, 8> SplitFPAccuracyVal(StringRef Val) {
SmallVector<StringRef, 8> ValuesArr;
SmallVector<StringRef, 8> FuncsArr;
Val.split(ValuesArr, ":");
if (ValuesArr.size() > 1) {
StringRef x = ValuesArr[1];
x.split(FuncsArr, ",");
}
return FuncsArr;
}

static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
bool OFastEnabled, const ArgList &Args,
ArgStringList &CmdArgs,
const JobAction &JA) {
const JobAction &JA,
bool &NoOffloadFP32PrecDiv,
bool &NoOffloadFP32PrecSqrt) {
// Handle various floating point optimization flags, mapping them to the
// appropriate LLVM code generation flags. This is complicated by several
// "umbrella" flags, so we do this by stepping through the flags incrementally
Expand Down Expand Up @@ -2998,6 +3020,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
LangOptions::ComplexRangeKind Range = LangOptions::ComplexRangeKind::CX_None;
std::string ComplexRangeStr = "";
std::string GccRangeComplexOption = "";
bool IsFp32PrecDivSqrtAllowed = JA.isDeviceOffloading(Action::OFK_SYCL) &&
!JA.isDeviceOffloading(Action::OFK_Cuda) &&
!JA.isOffloading(Action::OFK_HIP);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed offlne, something like:

Suggested change
bool IsFp32PrecDivSqrtAllowed = JA.isDeviceOffloading(Action::OFK_SYCL) &&
!JA.isDeviceOffloading(Action::OFK_Cuda) &&
!JA.isOffloading(Action::OFK_HIP);
bool IsFp32PrecDivSqrtAllowed = JA.isDeviceOffloading(Action::OFK_SYCL) &&
TC.getTriple().isSPIROrSPIRV();


// Lambda to set fast-math options. This is also used by -ffp-model=fast
auto applyFastMath = [&]() {
Expand Down Expand Up @@ -3027,6 +3052,12 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
: ComplexArithmeticStr(LangOptions::ComplexRangeKind::CX_Basic));
Range = LangOptions::ComplexRangeKind::CX_Basic;
SeenUnsafeMathModeOption = true;
if (IsFp32PrecDivSqrtAllowed) {
// when fp-model=fast is used the default precision for division and
// sqrt is not precise.
NoOffloadFP32PrecDiv = true;
NoOffloadFP32PrecSqrt = true;
}
};

// Lambda to consolidate common handling for fp-contract
Expand Down Expand Up @@ -3055,11 +3086,48 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
CmdArgs.push_back(A->getValue());
}

auto addSPIRVArgs = [&](StringRef SPIRVArg) {
if (IsFp32PrecDivSqrtAllowed) {
if (!FPAccuracy.empty())
EmitAccuracyDiag(D, JA, FPAccuracy, SPIRVArg);
if (SPIRVArg == "-fno-offload-fp32-prec-div")
NoOffloadFP32PrecDiv = true;
else if (SPIRVArg == "-fno-offload-fp32-prec-sqrt")
NoOffloadFP32PrecSqrt = true;
else if (SPIRVArg == "-foffload-fp32-prec-sqrt")
NoOffloadFP32PrecSqrt = false;
else if (SPIRVArg == "-foffload-fp32-prec-div")
NoOffloadFP32PrecDiv = false;
}
};

auto parseFPAccOption = [&](StringRef Val, bool &NoOffloadFlag) {
SmallVector<StringRef, 8> FuncsArr = SplitFPAccuracyVal(Val);
for (const auto &V : FuncsArr) {
if (V == "fdiv")
NoOffloadFlag = false;
else if (V == "sqrt")
NoOffloadFlag = false;
}
};

for (const Arg *A : Args) {
switch (A->getOption().getID()) {
// If this isn't an FP option skip the claim below
default: continue;

case options::OPT_foffload_fp32_prec_div:
addSPIRVArgs("-foffload-fp32-prec-div");
break;
case options::OPT_foffload_fp32_prec_sqrt:
addSPIRVArgs("-foffload-fp32-prec-sqrt");
break;
case options::OPT_fno_offload_fp32_prec_div:
addSPIRVArgs("-fno-offload-fp32-prec-div");
break;
case options::OPT_fno_offload_fp32_prec_sqrt:
addSPIRVArgs("-fno-offload-fp32-prec-sqrt");
break;
case options::OPT_fcx_limited_range:
if (GccRangeComplexOption.empty()) {
if (Range != LangOptions::ComplexRangeKind::CX_Basic)
Expand Down Expand Up @@ -3144,6 +3212,14 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
case options::OPT_ffp_accuracy_EQ: {
StringRef Val = A->getValue();
FPAccuracy = Val;
if (NoOffloadFP32PrecDiv) {
EmitAccuracyDiag(D, JA, FPAccuracy, "-fno-offload-fp32-prec-div");
parseFPAccOption(Val, NoOffloadFP32PrecDiv);
}
if (NoOffloadFP32PrecSqrt) {
EmitAccuracyDiag(D, JA, FPAccuracy, "-fno-offload-fp32-prec-sqrt");
parseFPAccOption(Val, NoOffloadFP32PrecSqrt);
}
break;
}
case options::OPT_ffp_model_EQ: {
Expand Down Expand Up @@ -3557,6 +3633,12 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
CmdArgs.push_back("-fno-cx-limited-range");
if (Args.hasArg(options::OPT_fno_cx_fortran_rules))
CmdArgs.push_back("-fno-cx-fortran-rules");
if (IsFp32PrecDivSqrtAllowed) {
if (NoOffloadFP32PrecDiv)
CmdArgs.push_back("-fno-offload-fp32-prec-div");
if (NoOffloadFP32PrecSqrt)
CmdArgs.push_back("-fno-offload-fp32-prec-sqrt");
}
}

static void RenderAnalyzerOptions(const ArgList &Args, ArgStringList &CmdArgs,
Expand Down Expand Up @@ -5311,6 +5393,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
const Driver &D = TC.getDriver();
ArgStringList CmdArgs;
bool NoOffloadFP32PrecDiv = false;
bool NoOffloadFP32PrecSqrt = false;

assert(Inputs.size() >= 1 && "Must have at least one input.");
// CUDA/HIP compilation may have multiple inputs (source file + results of
Expand Down Expand Up @@ -6119,7 +6203,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_optimize_sibling_calls);

RenderFloatingPointOptions(TC, D, isOptimizationLevelFast(Args), Args,
CmdArgs, JA);
CmdArgs, JA, NoOffloadFP32PrecDiv,
NoOffloadFP32PrecSqrt);

// Render ABI arguments
switch (TC.getArch()) {
Expand Down Expand Up @@ -6593,7 +6678,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_protect_parens, false))
CmdArgs.push_back("-fprotect-parens");

RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA);
RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA,
NoOffloadFP32PrecDiv, NoOffloadFP32PrecSqrt);

if (Arg *A = Args.getLastArg(options::OPT_fextend_args_EQ)) {
const llvm::Triple::ArchType Arch = TC.getArch();
Expand Down Expand Up @@ -6644,8 +6730,18 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
FpAccuracyAttr += OptStr.str();
}
};
for (StringRef A : Args.getAllArgValues(options::OPT_ffp_accuracy_EQ))
RenderFPAccuracyOptions(A);
auto shouldAddFpAccuracyOption = [&](StringRef Val, StringRef Func) {
SmallVector<StringRef, 8> FuncsArr = SplitFPAccuracyVal(Val);
for (const auto &V : FuncsArr)
return (V == Func);
return false;
};

for (StringRef A : Args.getAllArgValues(options::OPT_ffp_accuracy_EQ)) {
if (!(NoOffloadFP32PrecDiv && shouldAddFpAccuracyOption(A, "fdiv")) &&
!(NoOffloadFP32PrecSqrt && shouldAddFpAccuracyOption(A, "sqrt")))
RenderFPAccuracyOptions(A);
}
if (!FpAccuracyAttr.empty())
CmdArgs.push_back(Args.MakeArgString(FpAccuracyAttr));

Expand Down Expand Up @@ -10603,8 +10699,22 @@ static void getTripleBasedSPIRVTransOpts(Compilation &C,
",+SPV_KHR_non_semantic_info"
",+SPV_KHR_cooperative_matrix"
",+SPV_EXT_shader_atomic_float16_add";
if (IsCPU)
auto hasNoOffloadFP32PrecOption = [](const llvm::opt::ArgList &TCArgs) {
return !TCArgs.hasFlag(options::OPT_foffload_fp32_prec_sqrt,
options::OPT_fno_offload_fp32_prec_sqrt, false) &&
!TCArgs.hasFlag(options::OPT_foffload_fp32_prec_div,
options::OPT_fno_offload_fp32_prec_div, false);
};
auto shouldUseOffloadFP32PrecOption = [](const llvm::opt::ArgList &TCArgs) {
return (TCArgs.hasFlag(options::OPT_fno_offload_fp32_prec_sqrt,
options::OPT_foffload_fp32_prec_sqrt, false) ||
TCArgs.hasFlag(options::OPT_fno_offload_fp32_prec_div,
options::OPT_foffload_fp32_prec_div, false));
};
if ((IsCPU && hasNoOffloadFP32PrecOption(TCArgs)) ||
shouldUseOffloadFP32PrecOption(TCArgs)) {
ExtArg += ",+SPV_INTEL_fp_max_error";
}

TranslatorArgs.push_back(TCArgs.MakeArgString(ExtArg));
}
Expand Down
Loading
Loading