Skip to content

Commit

Permalink
Auto merge of rust-lang#130325 - workingjubilee:plus-minus-zero-redux…
Browse files Browse the repository at this point in the history
…, r=<try>

Use -0.0 in `intrinsics::simd::reduce_add_unordered`

-0.0 is the actual neutral additive float, not +0.0, and this matters to codegen.

try-job: aarch64-gnu
  • Loading branch information
bors committed Sep 14, 2024
2 parents 23b04c0 + 3e350fb commit 5e4b391
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2090,14 +2090,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
};
}

arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, 0.0);
arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, -0.0);
arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
arith_red!(
simd_reduce_add_unordered: vector_reduce_add,
vector_reduce_fadd_reassoc,
false,
add,
0.0
-0.0
);
arith_red!(
simd_reduce_mul_unordered: vector_reduce_mul,
Expand Down
38 changes: 38 additions & 0 deletions tests/assembly/simd/reduce-fadd-unordered.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//@ revisions: x86_64 x86_64-avx2 aarch64
//@ assembly-output: emit-asm
//@ compile-flags: --crate-type=lib -O
//@[aarch64] only-aarch64
//@[x86_64] only-x86_64
//@[x86_64-avx2] only-x86_64
//@[x86_64-avx2] compile-flags: -Ctarget-cpu=x86-64-v3
#![feature(portable_simd)]
#![feature(core_intrinsics)]
use std::intrinsics::simd as intrinsics;
use std::simd::*;

// Regression test for https://github.com/rust-lang/rust/issues/130028
// This intrinsic produces much worse code if you use +0.0 instead of -0.0 because
// +0.0 isn't as easy to algebraically reassociate, even using LLVM's reassoc attribute!
// It would emit about an extra fadd, depending on the architecture.


// CHECK-LABEL: reduce_fadd_negative_zero
pub unsafe fn reduce_fadd_negative_zero(v: f32x4) -> f32 {
// x86_64: addps
// x86_64-NEXT: movaps
// x86_64-NEXT: shufps
// x86_64-NEXT: addss
// x86_64-NOT: xorps

// x86_64-avx2: vaddps
// x86_64-avx2-NEXT: vmovshdup
// x86_64-avx2-NEXT: vaddss
// x86_64-avx2-NOT: vxorps

// aarch64: faddp
// aarch64-NEXT: faddp

// CHECK-NOT: {{f?}}add{{p?s*}}
// CHECK: ret
intrinsics::simd_reduce_add_unordered(v)
}

0 comments on commit 5e4b391

Please sign in to comment.