-
Notifications
You must be signed in to change notification settings - Fork 13k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Auto merge of #124874 - jedbrown:float-mul-add-fast, r=saethlin
intrinsics fmuladdf{32,64}: expose llvm.fmuladd.* semantics Add intrinsics `fmuladd{f32,f64}`. This computes `(a * b) + c`, to be fused if the code generator determines that (i) the target instruction set has support for a fused operation, and (ii) that the fused operation is more efficient than the equivalent, separate pair of `mul` and `add` instructions. https://llvm.org/docs/LangRef.html#llvm-fmuladd-intrinsic The codegen_cranelift uses the `fma` function from libc, which is a correct implementation, but without the desired performance semantic. I think this requires an update to cranelift to expose a suitable instruction in its IR. I have not tested with codegen_gcc, but it should behave the same way (using `fma` from libc). --- This topic has been discussed a few times on Zulip and was suggested, for example, by `@workingjubilee` in [Effect of fma disabled](https://rust-lang.zulipchat.com/#narrow/stream/122651-general/topic/Effect.20of.20fma.20disabled/near/274179331).
- Loading branch information
Showing
11 changed files
with
196 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
44 changes: 44 additions & 0 deletions
44
src/tools/miri/tests/pass/intrinsics/fmuladd_nondeterministic.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#![feature(core_intrinsics)] | ||
use std::intrinsics::{fmuladdf32, fmuladdf64}; | ||
|
||
fn main() { | ||
let mut saw_zero = false; | ||
let mut saw_nonzero = false; | ||
for _ in 0..50 { | ||
let a = std::hint::black_box(0.1_f64); | ||
let b = std::hint::black_box(0.2); | ||
let c = std::hint::black_box(-a * b); | ||
// It is unspecified whether the following operation is fused or not. The | ||
// following evaluates to 0.0 if unfused, and nonzero (-1.66e-18) if fused. | ||
let x = unsafe { fmuladdf64(a, b, c) }; | ||
if x == 0.0 { | ||
saw_zero = true; | ||
} else { | ||
saw_nonzero = true; | ||
} | ||
} | ||
assert!( | ||
saw_zero && saw_nonzero, | ||
"`fmuladdf64` failed to be evaluated as both fused and unfused" | ||
); | ||
|
||
let mut saw_zero = false; | ||
let mut saw_nonzero = false; | ||
for _ in 0..50 { | ||
let a = std::hint::black_box(0.1_f32); | ||
let b = std::hint::black_box(0.2); | ||
let c = std::hint::black_box(-a * b); | ||
// It is unspecified whether the following operation is fused or not. The | ||
// following evaluates to 0.0 if unfused, and nonzero (-8.1956386e-10) if fused. | ||
let x = unsafe { fmuladdf32(a, b, c) }; | ||
if x == 0.0 { | ||
saw_zero = true; | ||
} else { | ||
saw_nonzero = true; | ||
} | ||
} | ||
assert!( | ||
saw_zero && saw_nonzero, | ||
"`fmuladdf32` failed to be evaluated as both fused and unfused" | ||
); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
//@ run-pass | ||
#![feature(core_intrinsics)] | ||
|
||
use std::intrinsics::*; | ||
|
||
macro_rules! assert_approx_eq { | ||
($a:expr, $b:expr) => {{ | ||
let (a, b) = (&$a, &$b); | ||
assert!((*a - *b).abs() < 1.0e-6, "{} is not approximately equal to {}", *a, *b); | ||
}}; | ||
} | ||
|
||
fn main() { | ||
unsafe { | ||
let nan: f32 = f32::NAN; | ||
let inf: f32 = f32::INFINITY; | ||
let neg_inf: f32 = f32::NEG_INFINITY; | ||
assert_approx_eq!(fmuladdf32(12.3, 4.5, 6.7), 62.05); | ||
assert_approx_eq!(fmuladdf32(-12.3, -4.5, -6.7), 48.65); | ||
assert_approx_eq!(fmuladdf32(0.0, 8.9, 1.2), 1.2); | ||
assert_approx_eq!(fmuladdf32(3.4, -0.0, 5.6), 5.6); | ||
assert!(fmuladdf32(nan, 7.8, 9.0).is_nan()); | ||
assert_eq!(fmuladdf32(inf, 7.8, 9.0), inf); | ||
assert_eq!(fmuladdf32(neg_inf, 7.8, 9.0), neg_inf); | ||
assert_eq!(fmuladdf32(8.9, inf, 3.2), inf); | ||
assert_eq!(fmuladdf32(-3.2, 2.4, neg_inf), neg_inf); | ||
} | ||
unsafe { | ||
let nan: f64 = f64::NAN; | ||
let inf: f64 = f64::INFINITY; | ||
let neg_inf: f64 = f64::NEG_INFINITY; | ||
assert_approx_eq!(fmuladdf64(12.3, 4.5, 6.7), 62.05); | ||
assert_approx_eq!(fmuladdf64(-12.3, -4.5, -6.7), 48.65); | ||
assert_approx_eq!(fmuladdf64(0.0, 8.9, 1.2), 1.2); | ||
assert_approx_eq!(fmuladdf64(3.4, -0.0, 5.6), 5.6); | ||
assert!(fmuladdf64(nan, 7.8, 9.0).is_nan()); | ||
assert_eq!(fmuladdf64(inf, 7.8, 9.0), inf); | ||
assert_eq!(fmuladdf64(neg_inf, 7.8, 9.0), neg_inf); | ||
assert_eq!(fmuladdf64(8.9, inf, 3.2), inf); | ||
assert_eq!(fmuladdf64(-3.2, 2.4, neg_inf), neg_inf); | ||
} | ||
} |