From 377730efe4ab8c4bca6c4d6058bcd413c9e67dfa Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 28 Sep 2023 16:34:07 +0200 Subject: [PATCH 01/11] update ui_test --- src/tools/miri/Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock index d236e4bbcb401..5628d0697ed99 100644 --- a/src/tools/miri/Cargo.lock +++ b/src/tools/miri/Cargo.lock @@ -947,9 +947,9 @@ dependencies = [ [[package]] name = "ui_test" -version = "0.21.1" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accffe020b57a6dd50014d457b5842c5a2ca73cd84f07d86d0a19c460a6509ae" +checksum = "aaf4bf7c184b8dfc7a4d3b90df789b1eb992ee42811cd115f32a7a1eb781058d" dependencies = [ "annotate-snippets", "anyhow", From 2f7fdd1ee9f0473ef893fe79329429406f193893 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 29 Sep 2023 07:25:22 +0200 Subject: [PATCH 02/11] add tests for '%' sign on floats --- src/tools/miri/tests/pass/float.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/tools/miri/tests/pass/float.rs b/src/tools/miri/tests/pass/float.rs index fee5ca44ffb34..70c64485fe937 100644 --- a/src/tools/miri/tests/pass/float.rs +++ b/src/tools/miri/tests/pass/float.rs @@ -168,6 +168,16 @@ fn basic() { let x: u32 = unsafe { std::mem::transmute(42.0_f32) }; let y: f32 = unsafe { std::mem::transmute(x) }; assert_eq(y, 42.0_f32); + + // `%` sign behavior, some of this used to be buggy + assert!((black_box(1.0f32) % 1.0).is_sign_positive()); + assert!((black_box(1.0f32) % -1.0).is_sign_positive()); + assert!((black_box(-1.0f32) % 1.0).is_sign_negative()); + assert!((black_box(-1.0f32) % -1.0).is_sign_negative()); + assert!((black_box(1.0f64) % 1.0).is_sign_positive()); + assert!((black_box(1.0f64) % -1.0).is_sign_positive()); + assert!((black_box(-1.0f64) % 1.0).is_sign_negative()); + assert!((black_box(-1.0f64) % -1.0).is_sign_negative()); } /// Many of these test values are taken from From 71030bae75307ea174cbf31a46888b584236476b Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 29 Sep 2023 07:31:52 +0200 Subject: [PATCH 03/11] add FMA test that would fail with apfloat --- src/tools/miri/src/shims/intrinsics/mod.rs | 4 +-- src/tools/miri/tests/pass/intrinsics-math.rs | 27 +++++++++++++------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/tools/miri/src/shims/intrinsics/mod.rs b/src/tools/miri/src/shims/intrinsics/mod.rs index d54145dbdc7f6..a9cf4bb0e23eb 100644 --- a/src/tools/miri/src/shims/intrinsics/mod.rs +++ b/src/tools/miri/src/shims/intrinsics/mod.rs @@ -324,7 +324,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { "fmaf32" => { let [a, b, c] = check_arg_count(args)?; - // FIXME: Using host floats, to work around https://github.com/rust-lang/miri/issues/2468. + // FIXME: Using host floats, to work around https://github.com/rust-lang/rustc_apfloat/issues/11 let a = f32::from_bits(this.read_scalar(a)?.to_u32()?); let b = f32::from_bits(this.read_scalar(b)?.to_u32()?); let c = f32::from_bits(this.read_scalar(c)?.to_u32()?); @@ -334,7 +334,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { "fmaf64" => { let [a, b, c] = check_arg_count(args)?; - // FIXME: Using host floats, to work around https://github.com/rust-lang/miri/issues/2468. + // FIXME: Using host floats, to work around https://github.com/rust-lang/rustc_apfloat/issues/11 let a = f64::from_bits(this.read_scalar(a)?.to_u64()?); let b = f64::from_bits(this.read_scalar(b)?.to_u64()?); let c = f64::from_bits(this.read_scalar(c)?.to_u64()?); diff --git a/src/tools/miri/tests/pass/intrinsics-math.rs b/src/tools/miri/tests/pass/intrinsics-math.rs index e0e4f5654d6a4..5f7730a3e86af 100644 --- a/src/tools/miri/tests/pass/intrinsics-math.rs +++ b/src/tools/miri/tests/pass/intrinsics-math.rs @@ -1,4 +1,5 @@ #![feature(float_gamma)] +use std::{f32, f64}; macro_rules! assert_approx_eq { ($a:expr, $b:expr) => {{ @@ -15,8 +16,7 @@ fn ldexp(a: f64, b: i32) -> f64 { } pub fn main() { - use std::f32; - use std::f64; + mul_add(); assert_approx_eq!(64f32.sqrt(), 8f32); assert_approx_eq!(64f64.sqrt(), 8f64); @@ -48,13 +48,6 @@ pub fn main() { assert_approx_eq!(8f32.log2(), 3f32); assert_approx_eq!(f64::consts::E.log2(), f64::consts::LOG2_E); - assert_approx_eq!(3.0f32.mul_add(2.0f32, 5.0f32), 11.0); - assert_eq!(0.0f32.mul_add(-2.0, f32::consts::E), f32::consts::E); - assert_approx_eq!(3.0f64.mul_add(2.0, 5.0), 11.0); - assert_eq!(0.0f64.mul_add(-2.0f64, f64::consts::E), f64::consts::E); - assert_eq!((-3.2f32).mul_add(2.4, f32::NEG_INFINITY), f32::NEG_INFINITY); - assert_eq!((-3.2f64).mul_add(2.4, f64::NEG_INFINITY), f64::NEG_INFINITY); - assert_approx_eq!((-1.0f32).abs(), 1.0f32); assert_approx_eq!(34.2f64.abs(), 34.2f64); @@ -146,3 +139,19 @@ pub fn main() { assert_approx_eq!(val, (2.0 * f64::consts::PI.sqrt()).ln()); assert_eq!(sign, -1); } + +fn mul_add() { + assert_approx_eq!(3.0f32.mul_add(2.0f32, 5.0f32), 11.0); + assert_eq!(0.0f32.mul_add(-2.0, f32::consts::E), f32::consts::E); + assert_approx_eq!(3.0f64.mul_add(2.0, 5.0), 11.0); + assert_eq!(0.0f64.mul_add(-2.0f64, f64::consts::E), f64::consts::E); + assert_eq!((-3.2f32).mul_add(2.4, f32::NEG_INFINITY), f32::NEG_INFINITY); + assert_eq!((-3.2f64).mul_add(2.4, f64::NEG_INFINITY), f64::NEG_INFINITY); + + let f = f32::mul_add( + -0.000000000000000000000000000000000000014728589, + 0.0000037105144, + 0.000000000000000000000000000000000000000000055, + ); + assert_eq!(f.to_bits(), f32::to_bits(-0.0)); +} From 0c0c088e57c2dbb7a4145bf2bee1ca1cc137bf50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Thu, 28 Sep 2023 18:13:59 +0200 Subject: [PATCH 04/11] Implement the `llvm.x86.sse2.pmadd.wd` intrinsic --- src/tools/miri/src/shims/x86/sse2.rs | 36 +++++++++++++++++++ .../miri/tests/pass/intrinsics-x86-sse2.rs | 18 ++++++++++ 2 files changed, 54 insertions(+) diff --git a/src/tools/miri/src/shims/x86/sse2.rs b/src/tools/miri/src/shims/x86/sse2.rs index 2ca882167bf4b..2ef6a9b59ede7 100644 --- a/src/tools/miri/src/shims/x86/sse2.rs +++ b/src/tools/miri/src/shims/x86/sse2.rs @@ -82,6 +82,42 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: this.write_immediate(*res, &dest)?; } } + // Used to implement the _mm_madd_epi16 function. + // Multiplies packed signed 16-bit integers in `left` and `right`, producing + // intermediate signed 32-bit integers. Horizontally add adjacent pairs of + // intermediate 32-bit integers, and pack the results in `dest`. + "pmadd.wd" => { + let [left, right] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (left, left_len) = this.operand_to_simd(left)?; + let (right, right_len) = this.operand_to_simd(right)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(left_len, right_len); + assert_eq!(dest_len.checked_mul(2).unwrap(), left_len); + + for i in 0..dest_len { + let j1 = i.checked_mul(2).unwrap(); + let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?; + let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?; + + let j2 = j1.checked_add(1).unwrap(); + let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?; + let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?; + + let dest = this.project_index(&dest, i)?; + + // Multiplications are i16*i16->i32, which will not overflow. + let mul1 = i32::from(left1).checked_mul(right1.into()).unwrap(); + let mul2 = i32::from(left2).checked_mul(right2.into()).unwrap(); + // However, this addition can overflow in the most extreme case + // (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000 + let res = mul1.wrapping_add(mul2); + + this.write_scalar(Scalar::from_i32(res), &dest)?; + } + } // Used to implement the _mm_mulhi_epi16 and _mm_mulhi_epu16 functions. "pmulh.w" | "pmulhu.w" => { let [left, right] = diff --git a/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs b/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs index fa9df04d36843..2c7665bc73631 100644 --- a/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs +++ b/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs @@ -70,6 +70,24 @@ mod tests { } test_mm_avg_epu16(); + #[target_feature(enable = "sse2")] + unsafe fn test_mm_madd_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm_madd_epi16(a, b); + let e = _mm_setr_epi32(29, 81, 149, 233); + assert_eq_m128i(r, e); + + let a = + _mm_setr_epi16(i16::MAX, i16::MAX, i16::MIN, i16::MIN, i16::MIN, i16::MAX, 0, 0); + let b = + _mm_setr_epi16(i16::MAX, i16::MAX, i16::MIN, i16::MIN, i16::MAX, i16::MIN, 0, 0); + let r = _mm_madd_epi16(a, b); + let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0); + assert_eq_m128i(r, e); + } + test_mm_madd_epi16(); + #[target_feature(enable = "sse2")] unsafe fn test_mm_mulhi_epi16() { let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); From 4b2b0eaff239c630242bd44552fba3125b1f59c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Tue, 26 Sep 2023 19:45:06 +0200 Subject: [PATCH 05/11] Implement `llvm.x86.sse3.*` intrinsics --- src/tools/miri/src/shims/x86/mod.rs | 6 + src/tools/miri/src/shims/x86/sse3.rs | 119 ++++++++++++++++++ .../miri/tests/pass/intrinsics-x86-sse3.rs | 114 +++++++++++++++++ 3 files changed, 239 insertions(+) create mode 100644 src/tools/miri/src/shims/x86/sse3.rs create mode 100644 src/tools/miri/tests/pass/intrinsics-x86-sse3.rs diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs index fbfe00e03dbcb..8b3805ae55fbb 100644 --- a/src/tools/miri/src/shims/x86/mod.rs +++ b/src/tools/miri/src/shims/x86/mod.rs @@ -9,6 +9,7 @@ use shims::foreign_items::EmulateByNameResult; mod sse; mod sse2; +mod sse3; impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: @@ -88,6 +89,11 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: this, link_name, abi, args, dest, ); } + name if name.starts_with("sse3.") => { + return sse3::EvalContextExt::emulate_x86_sse3_intrinsic( + this, link_name, abi, args, dest, + ); + } _ => return Ok(EmulateByNameResult::NotSupported), } Ok(EmulateByNameResult::NeedsJumping) diff --git a/src/tools/miri/src/shims/x86/sse3.rs b/src/tools/miri/src/shims/x86/sse3.rs new file mode 100644 index 0000000000000..309d3e186b13e --- /dev/null +++ b/src/tools/miri/src/shims/x86/sse3.rs @@ -0,0 +1,119 @@ +use rustc_middle::mir; +use rustc_span::Symbol; +use rustc_target::abi::Align; +use rustc_target::spec::abi::Abi; + +use crate::*; +use shims::foreign_items::EmulateByNameResult; + +impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} +pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: + crate::MiriInterpCxExt<'mir, 'tcx> +{ + fn emulate_x86_sse3_intrinsic( + &mut self, + link_name: Symbol, + abi: Abi, + args: &[OpTy<'tcx, Provenance>], + dest: &PlaceTy<'tcx, Provenance>, + ) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> { + let this = self.eval_context_mut(); + // Prefix should have already been checked. + let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse3.").unwrap(); + + match unprefixed_name { + // Used to implement the _mm_addsub_ps and _mm_addsub_pd functions. + // Alternatingly add and subtract floating point (f32 or f64) from + // `left` and `right` + "addsub.ps" | "addsub.pd" => { + let [left, right] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (left, left_len) = this.operand_to_simd(left)?; + let (right, right_len) = this.operand_to_simd(right)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(dest_len, left_len); + assert_eq!(dest_len, right_len); + + for i in 0..dest_len { + let left = this.read_immediate(&this.project_index(&left, i)?)?; + let right = this.read_immediate(&this.project_index(&right, i)?)?; + let dest = this.project_index(&dest, i)?; + + // Even elements are subtracted and odd elements are added. + let op = if i % 2 == 0 { mir::BinOp::Sub } else { mir::BinOp::Add }; + let res = this.wrapping_binary_op(op, &left, &right)?; + + this.write_immediate(*res, &dest)?; + } + } + // Used to implement the _mm_h{add,sub}_p{s,d} functions. + // Horizontally add/subtract adjacent floating point values + // in `left` and `right`. + "hadd.ps" | "hadd.pd" | "hsub.ps" | "hsub.pd" => { + let [left, right] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (left, left_len) = this.operand_to_simd(left)?; + let (right, right_len) = this.operand_to_simd(right)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(dest_len, left_len); + assert_eq!(dest_len, right_len); + assert_eq!(dest_len % 2, 0); + + let op = match unprefixed_name { + "hadd.ps" | "hadd.pd" => mir::BinOp::Add, + "hsub.ps" | "hsub.pd" => mir::BinOp::Sub, + _ => unreachable!(), + }; + + let middle = dest_len / 2; + for i in 0..dest_len { + let (lhs, rhs) = if i < middle { + let base_i = i.checked_mul(2).unwrap(); + ( + this.read_immediate(&this.project_index(&left, base_i)?)?, + this.read_immediate( + &this.project_index(&left, base_i.checked_add(1).unwrap())?, + )?, + ) + } else { + let base_i = i.checked_sub(middle).unwrap().checked_mul(2).unwrap(); + ( + this.read_immediate(&this.project_index(&right, base_i)?)?, + this.read_immediate( + &this.project_index(&right, base_i.checked_add(1).unwrap())?, + )?, + ) + }; + let (res, _overflow) = this.overflowing_binary_op(op, &lhs, &rhs)?; + + this.write_immediate(*res, &this.project_index(&dest, i)?)?; + } + } + // Used to implement the _mm_lddqu_si128 function. + // Reads a 128-bit vector from an unaligned pointer. This intrinsic + // is expected to perform better than a regular unaligned read when + // the data crosses a cache line, but for Miri this is just a regular + // unaligned read. + "ldu.dq" => { + let [src_ptr] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + let src_ptr = this.read_pointer(src_ptr)?; + let dest = dest.force_mplace(this)?; + + this.mem_copy( + src_ptr, + Align::ONE, + dest.ptr(), + Align::ONE, + dest.layout.size, + /*nonoverlapping*/ true, + )?; + } + _ => return Ok(EmulateByNameResult::NotSupported), + } + Ok(EmulateByNameResult::NeedsJumping) + } +} diff --git a/src/tools/miri/tests/pass/intrinsics-x86-sse3.rs b/src/tools/miri/tests/pass/intrinsics-x86-sse3.rs new file mode 100644 index 0000000000000..ea92bf45acbd5 --- /dev/null +++ b/src/tools/miri/tests/pass/intrinsics-x86-sse3.rs @@ -0,0 +1,114 @@ +// Ignore everything except x86 and x86_64 +// Any additional target are added to CI should be ignored here +// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.) +//@ignore-target-aarch64 +//@ignore-target-arm +//@ignore-target-avr +//@ignore-target-s390x +//@ignore-target-thumbv7em +//@ignore-target-wasm32 +//@compile-flags: -C target-feature=+sse3 + +use core::mem::transmute; +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +fn main() { + assert!(is_x86_feature_detected!("sse3")); + + unsafe { + test_sse3(); + } +} + +#[target_feature(enable = "sse3")] +unsafe fn test_sse3() { + // Mostly copied from library/stdarch/crates/core_arch/src/x86/sse3.rs + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_addsub_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_addsub_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(99.0, 25.0, 0.0, -15.0)); + } + test_mm_addsub_ps(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_addsub_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let b = _mm_setr_pd(-100.0, 20.0); + let r = _mm_addsub_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(99.0, 25.0)); + } + test_mm_addsub_pd(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_hadd_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_hadd_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(4.0, -10.0, -80.0, -5.0)); + } + test_mm_hadd_ps(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_hadd_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let b = _mm_setr_pd(-100.0, 20.0); + let r = _mm_hadd_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(4.0, -80.0)); + } + test_mm_hadd_pd(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_hsub_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_hsub_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(-6.0, 10.0, -120.0, 5.0)); + } + test_mm_hsub_ps(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_hsub_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let b = _mm_setr_pd(-100.0, 20.0); + let r = _mm_hsub_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(-6.0, -120.0)); + } + test_mm_hsub_pd(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_lddqu_si128() { + let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm_lddqu_si128(&a); + assert_eq_m128i(a, r); + } + test_mm_lddqu_si128(); +} + +#[track_caller] +#[target_feature(enable = "sse")] +unsafe fn assert_eq_m128(a: __m128, b: __m128) { + let r = _mm_cmpeq_ps(a, b); + if _mm_movemask_ps(r) != 0b1111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[track_caller] +#[target_feature(enable = "sse2")] +unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { + if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 { + panic!("{:?} != {:?}", a, b); + } +} + +#[track_caller] +#[target_feature(enable = "sse2")] +pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { + assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) +} From 2c13713de462f5464d40056940f4e69140208a46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Tue, 26 Sep 2023 21:12:45 +0200 Subject: [PATCH 06/11] Implement `llvm.x86.ssse3.*` intrinsics --- src/tools/miri/src/shims/x86/mod.rs | 47 +++ src/tools/miri/src/shims/x86/sse3.rs | 35 +- src/tools/miri/src/shims/x86/ssse3.rs | 199 +++++++++ .../tests/pass/intrinsics-x86-sse3-ssse3.rs | 395 ++++++++++++++++++ .../miri/tests/pass/intrinsics-x86-sse3.rs | 114 ----- 5 files changed, 644 insertions(+), 146 deletions(-) create mode 100644 src/tools/miri/src/shims/x86/ssse3.rs create mode 100644 src/tools/miri/tests/pass/intrinsics-x86-sse3-ssse3.rs delete mode 100644 src/tools/miri/tests/pass/intrinsics-x86-sse3.rs diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs index 8b3805ae55fbb..7c280109cb011 100644 --- a/src/tools/miri/src/shims/x86/mod.rs +++ b/src/tools/miri/src/shims/x86/mod.rs @@ -10,6 +10,7 @@ use shims::foreign_items::EmulateByNameResult; mod sse; mod sse2; mod sse3; +mod ssse3; impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: @@ -94,6 +95,11 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: this, link_name, abi, args, dest, ); } + name if name.starts_with("ssse3.") => { + return ssse3::EvalContextExt::emulate_x86_ssse3_intrinsic( + this, link_name, abi, args, dest, + ); + } _ => return Ok(EmulateByNameResult::NotSupported), } Ok(EmulateByNameResult::NeedsJumping) @@ -292,3 +298,44 @@ fn bin_op_simd_float_all<'tcx, F: rustc_apfloat::Float>( Ok(()) } + +/// Horizontaly performs `which` operation on adjacent values of +/// `left` and `right` SIMD vectors and stores the result in `dest`. +fn horizontal_bin_op<'tcx>( + this: &mut crate::MiriInterpCx<'_, 'tcx>, + which: mir::BinOp, + saturating: bool, + left: &OpTy<'tcx, Provenance>, + right: &OpTy<'tcx, Provenance>, + dest: &PlaceTy<'tcx, Provenance>, +) -> InterpResult<'tcx, ()> { + let (left, left_len) = this.operand_to_simd(left)?; + let (right, right_len) = this.operand_to_simd(right)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(dest_len, left_len); + assert_eq!(dest_len, right_len); + assert_eq!(dest_len % 2, 0); + + let middle = dest_len / 2; + for i in 0..dest_len { + // `i` is the index in `dest` + // `j` is the index of the 2-item chunk in `src` + let (j, src) = + if i < middle { (i, &left) } else { (i.checked_sub(middle).unwrap(), &right) }; + // `base_i` is the index of the first item of the 2-item chunk in `src` + let base_i = j.checked_mul(2).unwrap(); + let lhs = this.read_immediate(&this.project_index(src, base_i)?)?; + let rhs = this.read_immediate(&this.project_index(src, base_i.checked_add(1).unwrap())?)?; + + let res = if saturating { + Immediate::from(this.saturating_arith(which, &lhs, &rhs)?) + } else { + *this.wrapping_binary_op(which, &lhs, &rhs)? + }; + + this.write_immediate(res, &this.project_index(&dest, i)?)?; + } + + Ok(()) +} diff --git a/src/tools/miri/src/shims/x86/sse3.rs b/src/tools/miri/src/shims/x86/sse3.rs index 309d3e186b13e..f5c30a521fa8b 100644 --- a/src/tools/miri/src/shims/x86/sse3.rs +++ b/src/tools/miri/src/shims/x86/sse3.rs @@ -3,6 +3,7 @@ use rustc_span::Symbol; use rustc_target::abi::Align; use rustc_target::spec::abi::Abi; +use super::horizontal_bin_op; use crate::*; use shims::foreign_items::EmulateByNameResult; @@ -55,43 +56,13 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: let [left, right] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; - let (left, left_len) = this.operand_to_simd(left)?; - let (right, right_len) = this.operand_to_simd(right)?; - let (dest, dest_len) = this.place_to_simd(dest)?; - - assert_eq!(dest_len, left_len); - assert_eq!(dest_len, right_len); - assert_eq!(dest_len % 2, 0); - - let op = match unprefixed_name { + let which = match unprefixed_name { "hadd.ps" | "hadd.pd" => mir::BinOp::Add, "hsub.ps" | "hsub.pd" => mir::BinOp::Sub, _ => unreachable!(), }; - let middle = dest_len / 2; - for i in 0..dest_len { - let (lhs, rhs) = if i < middle { - let base_i = i.checked_mul(2).unwrap(); - ( - this.read_immediate(&this.project_index(&left, base_i)?)?, - this.read_immediate( - &this.project_index(&left, base_i.checked_add(1).unwrap())?, - )?, - ) - } else { - let base_i = i.checked_sub(middle).unwrap().checked_mul(2).unwrap(); - ( - this.read_immediate(&this.project_index(&right, base_i)?)?, - this.read_immediate( - &this.project_index(&right, base_i.checked_add(1).unwrap())?, - )?, - ) - }; - let (res, _overflow) = this.overflowing_binary_op(op, &lhs, &rhs)?; - - this.write_immediate(*res, &this.project_index(&dest, i)?)?; - } + horizontal_bin_op(this, which, /*saturating*/ false, left, right, dest)?; } // Used to implement the _mm_lddqu_si128 function. // Reads a 128-bit vector from an unaligned pointer. This intrinsic diff --git a/src/tools/miri/src/shims/x86/ssse3.rs b/src/tools/miri/src/shims/x86/ssse3.rs new file mode 100644 index 0000000000000..b01546722346b --- /dev/null +++ b/src/tools/miri/src/shims/x86/ssse3.rs @@ -0,0 +1,199 @@ +use rustc_middle::mir; +use rustc_span::Symbol; +use rustc_target::spec::abi::Abi; + +use super::horizontal_bin_op; +use crate::*; +use shims::foreign_items::EmulateByNameResult; + +impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} +pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: + crate::MiriInterpCxExt<'mir, 'tcx> +{ + fn emulate_x86_ssse3_intrinsic( + &mut self, + link_name: Symbol, + abi: Abi, + args: &[OpTy<'tcx, Provenance>], + dest: &PlaceTy<'tcx, Provenance>, + ) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> { + let this = self.eval_context_mut(); + // Prefix should have already been checked. + let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.ssse3.").unwrap(); + + match unprefixed_name { + // Used to implement the _mm_abs_epi{8,16,32} functions. + // Calculates the absolute value of packed 8/16/32-bit integers. + "pabs.b.128" | "pabs.w.128" | "pabs.d.128" => { + let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (op, op_len) = this.operand_to_simd(op)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(op_len, dest_len); + + for i in 0..dest_len { + let op = this.read_scalar(&this.project_index(&op, i)?)?; + let dest = this.project_index(&dest, i)?; + + // Converting to a host "i128" works since the input is always signed. + let res = op.to_int(dest.layout.size)?.unsigned_abs(); + + this.write_scalar(Scalar::from_uint(res, dest.layout.size), &dest)?; + } + } + // Used to implement the _mm_shuffle_epi8 intrinsic. + // Shuffles bytes from `left` using `right` as pattern. + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8 + "pshuf.b.128" => { + let [left, right] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (left, left_len) = this.operand_to_simd(left)?; + let (right, right_len) = this.operand_to_simd(right)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(dest_len, left_len); + assert_eq!(dest_len, right_len); + + for i in 0..dest_len { + let right = this.read_scalar(&this.project_index(&right, i)?)?.to_u8()?; + let dest = this.project_index(&dest, i)?; + + let res = if right & 0x80 == 0 { + let j = right % 16; // index wraps around + this.read_scalar(&this.project_index(&left, j.into())?)? + } else { + // If the highest bit in `right` is 1, write zero. + Scalar::from_u8(0) + }; + + this.write_scalar(res, &dest)?; + } + } + // Used to implement the _mm_h{add,adds,sub}_epi{16,32} functions. + // Horizontally add / add with saturation / subtract adjacent 16/32-bit + // integer values in `left` and `right`. + "phadd.w.128" | "phadd.sw.128" | "phadd.d.128" | "phsub.w.128" | "phsub.sw.128" + | "phsub.d.128" => { + let [left, right] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (which, saturating) = match unprefixed_name { + "phadd.w.128" | "phadd.d.128" => (mir::BinOp::Add, false), + "phadd.sw.128" => (mir::BinOp::Add, true), + "phsub.w.128" | "phsub.d.128" => (mir::BinOp::Sub, false), + "phsub.sw.128" => (mir::BinOp::Sub, true), + _ => unreachable!(), + }; + + horizontal_bin_op(this, which, saturating, left, right, dest)?; + } + // Used to implement the _mm_maddubs_epi16 function. + // Multiplies packed 8-bit unsigned integers from `left` and packed + // signed 8-bit integers from `right` into 16-bit signed integers. Then, + // the saturating sum of the products with indices `2*i` and `2*i+1` + // produces the output at index `i`. + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16 + "pmadd.ub.sw.128" => { + let [left, right] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (left, left_len) = this.operand_to_simd(left)?; + let (right, right_len) = this.operand_to_simd(right)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(left_len, right_len); + assert_eq!(dest_len.checked_mul(2).unwrap(), left_len); + + for i in 0..dest_len { + let j1 = i.checked_mul(2).unwrap(); + let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_u8()?; + let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i8()?; + + let j2 = j1.checked_add(1).unwrap(); + let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_u8()?; + let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i8()?; + + let dest = this.project_index(&dest, i)?; + + // Multiplication of a u8 and an i8 into an i16 cannot overflow. + let mul1 = i16::from(left1).checked_mul(right1.into()).unwrap(); + let mul2 = i16::from(left2).checked_mul(right2.into()).unwrap(); + let res = mul1.saturating_add(mul2); + + this.write_scalar(Scalar::from_i16(res), &dest)?; + } + } + // Used to implement the _mm_mulhrs_epi16 function. + // Multiplies packed 16-bit signed integer values, truncates the 32-bit + // product to the 18 most significant bits by right-shifting, and then + // divides the 18-bit value by 2 (rounding to nearest) by first adding + // 1 and then taking the bits `1..=16`. + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16 + "pmul.hr.sw.128" => { + let [left, right] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (left, left_len) = this.operand_to_simd(left)?; + let (right, right_len) = this.operand_to_simd(right)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(dest_len, left_len); + assert_eq!(dest_len, right_len); + + for i in 0..dest_len { + let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i16()?; + let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i16()?; + let dest = this.project_index(&dest, i)?; + + let res = (i32::from(left).checked_mul(right.into()).unwrap() >> 14) + .checked_add(1) + .unwrap() + >> 1; + + // The result of this operation can overflow a signed 16-bit integer. + // When `left` and `right` are -0x8000, the result is 0x8000. + #[allow(clippy::cast_possible_truncation)] + let res = res as i16; + + this.write_scalar(Scalar::from_i16(res), &dest)?; + } + } + // Used to implement the _mm_sign_epi{8,16,32} functions. + // Negates elements from `left` when the corresponding element in + // `right` is negative. If an element from `right` is zero, zero + // is writen to the corresponding output element. + // Basically, we multiply `left` with `right.signum()`. + "psign.b.128" | "psign.w.128" | "psign.d.128" => { + let [left, right] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let (left, left_len) = this.operand_to_simd(left)?; + let (right, right_len) = this.operand_to_simd(right)?; + let (dest, dest_len) = this.place_to_simd(dest)?; + + assert_eq!(dest_len, left_len); + assert_eq!(dest_len, right_len); + + for i in 0..dest_len { + let dest = this.project_index(&dest, i)?; + let left = this.read_immediate(&this.project_index(&left, i)?)?; + let right = this + .read_scalar(&this.project_index(&right, i)?)? + .to_int(dest.layout.size)?; + + let res = this.wrapping_binary_op( + mir::BinOp::Mul, + &left, + &ImmTy::from_int(right.signum(), dest.layout), + )?; + + this.write_immediate(*res, &dest)?; + } + } + _ => return Ok(EmulateByNameResult::NotSupported), + } + Ok(EmulateByNameResult::NeedsJumping) + } +} diff --git a/src/tools/miri/tests/pass/intrinsics-x86-sse3-ssse3.rs b/src/tools/miri/tests/pass/intrinsics-x86-sse3-ssse3.rs new file mode 100644 index 0000000000000..0805d9bc30039 --- /dev/null +++ b/src/tools/miri/tests/pass/intrinsics-x86-sse3-ssse3.rs @@ -0,0 +1,395 @@ +// Ignore everything except x86 and x86_64 +// Any additional target are added to CI should be ignored here +// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.) +//@ignore-target-aarch64 +//@ignore-target-arm +//@ignore-target-avr +//@ignore-target-s390x +//@ignore-target-thumbv7em +//@ignore-target-wasm32 +// SSSE3 implicitly enables SSE3 +//@compile-flags: -C target-feature=+ssse3 + +use core::mem::transmute; +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +fn main() { + // SSSE3 implicitly enables SSE3, still check it to be sure + assert!(is_x86_feature_detected!("sse3")); + assert!(is_x86_feature_detected!("ssse3")); + + unsafe { + test_sse3(); + test_ssse3(); + } +} + +#[target_feature(enable = "sse3")] +unsafe fn test_sse3() { + // Mostly copied from library/stdarch/crates/core_arch/src/x86/sse3.rs + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_addsub_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_addsub_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(99.0, 25.0, 0.0, -15.0)); + } + test_mm_addsub_ps(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_addsub_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let b = _mm_setr_pd(-100.0, 20.0); + let r = _mm_addsub_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(99.0, 25.0)); + } + test_mm_addsub_pd(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_hadd_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_hadd_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(4.0, -10.0, -80.0, -5.0)); + } + test_mm_hadd_ps(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_hadd_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let b = _mm_setr_pd(-100.0, 20.0); + let r = _mm_hadd_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(4.0, -80.0)); + } + test_mm_hadd_pd(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_hsub_ps() { + let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); + let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); + let r = _mm_hsub_ps(a, b); + assert_eq_m128(r, _mm_setr_ps(-6.0, 10.0, -120.0, 5.0)); + } + test_mm_hsub_ps(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_hsub_pd() { + let a = _mm_setr_pd(-1.0, 5.0); + let b = _mm_setr_pd(-100.0, 20.0); + let r = _mm_hsub_pd(a, b); + assert_eq_m128d(r, _mm_setr_pd(-6.0, -120.0)); + } + test_mm_hsub_pd(); + + #[target_feature(enable = "sse3")] + unsafe fn test_mm_lddqu_si128() { + let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm_lddqu_si128(&a); + assert_eq_m128i(a, r); + } + test_mm_lddqu_si128(); +} + +#[target_feature(enable = "ssse3")] +unsafe fn test_ssse3() { + // Mostly copied from library/stdarch/crates/core_arch/src/x86/ssse3.rs + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_abs_epi8() { + let r = _mm_abs_epi8(_mm_set1_epi8(-5)); + assert_eq_m128i(r, _mm_set1_epi8(5)); + } + test_mm_abs_epi8(); + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_abs_epi16() { + let r = _mm_abs_epi16(_mm_set1_epi16(-5)); + assert_eq_m128i(r, _mm_set1_epi16(5)); + } + test_mm_abs_epi16(); + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_abs_epi32() { + let r = _mm_abs_epi32(_mm_set1_epi32(-5)); + assert_eq_m128i(r, _mm_set1_epi32(5)); + } + test_mm_abs_epi32(); + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_shuffle_epi8() { + let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm_setr_epi8(4, 128_u8 as i8, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0); + let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1); + let r = _mm_shuffle_epi8(a, b); + assert_eq_m128i(r, expected); + + // Test indices greater than 15 wrapping around + let b = _mm_add_epi8(b, _mm_set1_epi8(32)); + let r = _mm_shuffle_epi8(a, b); + assert_eq_m128i(r, expected); + } + test_mm_shuffle_epi8(); + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_hadd_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19); + let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25); + let r = _mm_hadd_epi16(a, b); + assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4); + let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4); + let expected = _mm_setr_epi16( + i16::MIN, + i16::MIN + 1, + i16::MIN + 2, + i16::MIN + 3, + i16::MAX, + i16::MAX - 1, + i16::MAX - 2, + i16::MAX - 3, + ); + let r = _mm_hadd_epi16(a, b); + assert_eq_m128i(r, expected); + } + test_mm_hadd_epi16(); + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_hadds_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1); + let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768); + let r = _mm_hadds_epi16(a, b); + assert_eq_m128i(r, expected); + + // Test saturating on overflow + let a = _mm_setr_epi16(i16::MAX, 1, i16::MAX, 2, i16::MAX, 3, i16::MAX, 4); + let b = _mm_setr_epi16(i16::MIN, -1, i16::MIN, -2, i16::MIN, -3, i16::MIN, -4); + let expected = _mm_setr_epi16( + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + ); + let r = _mm_hadds_epi16(a, b); + assert_eq_m128i(r, expected); + } + test_mm_hadds_epi16(); + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_hadd_epi32() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let b = _mm_setr_epi32(4, 128, 4, 3); + let expected = _mm_setr_epi32(3, 7, 132, 7); + let r = _mm_hadd_epi32(a, b); + assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi32(i32::MAX, 1, i32::MAX, 2); + let b = _mm_setr_epi32(i32::MIN, -1, i32::MIN, -2); + let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1); + let r = _mm_hadd_epi32(a, b); + assert_eq_m128i(r, expected); + } + test_mm_hadd_epi32(); + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_hsub_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19); + let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13); + let r = _mm_hsub_epi16(a, b); + assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4); + let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4); + let expected = _mm_setr_epi16( + i16::MIN, + i16::MIN + 1, + i16::MIN + 2, + i16::MIN + 3, + i16::MAX, + i16::MAX - 1, + i16::MAX - 2, + i16::MAX - 3, + ); + let r = _mm_hsub_epi16(a, b); + assert_eq_m128i(r, expected); + } + test_mm_hsub_epi16(); + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_hsubs_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1); + let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768); + let r = _mm_hsubs_epi16(a, b); + assert_eq_m128i(r, expected); + + // Test saturating on overflow + let a = _mm_setr_epi16(i16::MAX, -1, i16::MAX, -2, i16::MAX, -3, i16::MAX, -4); + let b = _mm_setr_epi16(i16::MIN, 1, i16::MIN, 2, i16::MIN, 3, i16::MIN, 4); + let expected = _mm_setr_epi16( + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + ); + let r = _mm_hsubs_epi16(a, b); + assert_eq_m128i(r, expected); + } + test_mm_hsubs_epi16(); + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_hsub_epi32() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let b = _mm_setr_epi32(4, 128, 4, 3); + let expected = _mm_setr_epi32(-1, -1, -124, 1); + let r = _mm_hsub_epi32(a, b); + assert_eq_m128i(r, expected); + + // Test wrapping on overflow + let a = _mm_setr_epi32(i32::MAX, -1, i32::MAX, -2); + let b = _mm_setr_epi32(i32::MIN, 1, i32::MIN, 2); + let expected = _mm_setr_epi32(i32::MIN, i32::MIN + 1, i32::MAX, i32::MAX - 1); + let r = _mm_hsub_epi32(a, b); + assert_eq_m128i(r, expected); + } + test_mm_hsub_epi32(); + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_maddubs_epi16() { + let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = _mm_setr_epi8(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0); + let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120); + let r = _mm_maddubs_epi16(a, b); + assert_eq_m128i(r, expected); + + // Test widening and saturation + let a = _mm_setr_epi8( + u8::MAX as i8, + u8::MAX as i8, + u8::MAX as i8, + u8::MAX as i8, + u8::MAX as i8, + u8::MAX as i8, + 100, + 100, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); + let b = _mm_setr_epi8( + i8::MAX, + i8::MAX, + i8::MAX, + i8::MIN, + i8::MIN, + i8::MIN, + 50, + 15, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); + let expected = _mm_setr_epi16(i16::MAX, -255, i16::MIN, 6500, 0, 0, 0, 0); + let r = _mm_maddubs_epi16(a, b); + assert_eq_m128i(r, expected); + } + test_mm_maddubs_epi16(); + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_mulhrs_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1); + let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0); + let r = _mm_mulhrs_epi16(a, b); + assert_eq_m128i(r, expected); + + // Test extreme values + let a = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MIN, 0, 0, 0, 0, 0); + let b = _mm_setr_epi16(i16::MAX, i16::MIN, i16::MAX, 0, 0, 0, 0, 0); + let expected = _mm_setr_epi16(i16::MAX - 1, i16::MIN, -i16::MAX, 0, 0, 0, 0, 0); + let r = _mm_mulhrs_epi16(a, b); + assert_eq_m128i(r, expected); + } + test_mm_mulhrs_epi16(); + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_sign_epi8() { + let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -14, -15, 16); + let b = _mm_setr_epi8(4, 63, -4, 3, 24, 12, -6, -19, 12, 5, -5, 10, 4, 1, -8, 0); + let expected = _mm_setr_epi8(1, 2, -3, 4, 5, 6, -7, -8, 9, 10, -11, 12, 13, -14, 15, 0); + let r = _mm_sign_epi8(a, b); + assert_eq_m128i(r, expected); + } + test_mm_sign_epi8(); + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_sign_epi16() { + let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8); + let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1); + let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8); + let r = _mm_sign_epi16(a, b); + assert_eq_m128i(r, expected); + } + test_mm_sign_epi16(); + + #[target_feature(enable = "ssse3")] + unsafe fn test_mm_sign_epi32() { + let a = _mm_setr_epi32(-1, 2, 3, 4); + let b = _mm_setr_epi32(1, -1, 1, 0); + let expected = _mm_setr_epi32(-1, -2, 3, 0); + let r = _mm_sign_epi32(a, b); + assert_eq_m128i(r, expected); + } + test_mm_sign_epi32(); +} + +#[track_caller] +#[target_feature(enable = "sse")] +unsafe fn assert_eq_m128(a: __m128, b: __m128) { + let r = _mm_cmpeq_ps(a, b); + if _mm_movemask_ps(r) != 0b1111 { + panic!("{:?} != {:?}", a, b); + } +} + +#[track_caller] +#[target_feature(enable = "sse2")] +unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { + if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 { + panic!("{:?} != {:?}", a, b); + } +} + +#[track_caller] +#[target_feature(enable = "sse2")] +pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { + assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) +} diff --git a/src/tools/miri/tests/pass/intrinsics-x86-sse3.rs b/src/tools/miri/tests/pass/intrinsics-x86-sse3.rs deleted file mode 100644 index ea92bf45acbd5..0000000000000 --- a/src/tools/miri/tests/pass/intrinsics-x86-sse3.rs +++ /dev/null @@ -1,114 +0,0 @@ -// Ignore everything except x86 and x86_64 -// Any additional target are added to CI should be ignored here -// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.) -//@ignore-target-aarch64 -//@ignore-target-arm -//@ignore-target-avr -//@ignore-target-s390x -//@ignore-target-thumbv7em -//@ignore-target-wasm32 -//@compile-flags: -C target-feature=+sse3 - -use core::mem::transmute; -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; - -fn main() { - assert!(is_x86_feature_detected!("sse3")); - - unsafe { - test_sse3(); - } -} - -#[target_feature(enable = "sse3")] -unsafe fn test_sse3() { - // Mostly copied from library/stdarch/crates/core_arch/src/x86/sse3.rs - - #[target_feature(enable = "sse3")] - unsafe fn test_mm_addsub_ps() { - let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); - let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); - let r = _mm_addsub_ps(a, b); - assert_eq_m128(r, _mm_setr_ps(99.0, 25.0, 0.0, -15.0)); - } - test_mm_addsub_ps(); - - #[target_feature(enable = "sse3")] - unsafe fn test_mm_addsub_pd() { - let a = _mm_setr_pd(-1.0, 5.0); - let b = _mm_setr_pd(-100.0, 20.0); - let r = _mm_addsub_pd(a, b); - assert_eq_m128d(r, _mm_setr_pd(99.0, 25.0)); - } - test_mm_addsub_pd(); - - #[target_feature(enable = "sse3")] - unsafe fn test_mm_hadd_ps() { - let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); - let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); - let r = _mm_hadd_ps(a, b); - assert_eq_m128(r, _mm_setr_ps(4.0, -10.0, -80.0, -5.0)); - } - test_mm_hadd_ps(); - - #[target_feature(enable = "sse3")] - unsafe fn test_mm_hadd_pd() { - let a = _mm_setr_pd(-1.0, 5.0); - let b = _mm_setr_pd(-100.0, 20.0); - let r = _mm_hadd_pd(a, b); - assert_eq_m128d(r, _mm_setr_pd(4.0, -80.0)); - } - test_mm_hadd_pd(); - - #[target_feature(enable = "sse3")] - unsafe fn test_mm_hsub_ps() { - let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); - let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); - let r = _mm_hsub_ps(a, b); - assert_eq_m128(r, _mm_setr_ps(-6.0, 10.0, -120.0, 5.0)); - } - test_mm_hsub_ps(); - - #[target_feature(enable = "sse3")] - unsafe fn test_mm_hsub_pd() { - let a = _mm_setr_pd(-1.0, 5.0); - let b = _mm_setr_pd(-100.0, 20.0); - let r = _mm_hsub_pd(a, b); - assert_eq_m128d(r, _mm_setr_pd(-6.0, -120.0)); - } - test_mm_hsub_pd(); - - #[target_feature(enable = "sse3")] - unsafe fn test_mm_lddqu_si128() { - let a = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let r = _mm_lddqu_si128(&a); - assert_eq_m128i(a, r); - } - test_mm_lddqu_si128(); -} - -#[track_caller] -#[target_feature(enable = "sse")] -unsafe fn assert_eq_m128(a: __m128, b: __m128) { - let r = _mm_cmpeq_ps(a, b); - if _mm_movemask_ps(r) != 0b1111 { - panic!("{:?} != {:?}", a, b); - } -} - -#[track_caller] -#[target_feature(enable = "sse2")] -unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { - if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 { - panic!("{:?} != {:?}", a, b); - } -} - -#[track_caller] -#[target_feature(enable = "sse2")] -pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { - assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b)) -} From 97bb8c80ed79f74bf669ffebef53326d1fc1aabb Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sat, 30 Sep 2023 15:49:40 +0200 Subject: [PATCH 07/11] Bump libloading to 0.8 --- src/tools/miri/Cargo.lock | 6 +++--- src/tools/miri/Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tools/miri/Cargo.lock b/src/tools/miri/Cargo.lock index 5628d0697ed99..3f11e4db327e6 100644 --- a/src/tools/miri/Cargo.lock +++ b/src/tools/miri/Cargo.lock @@ -409,12 +409,12 @@ dependencies = [ [[package]] name = "libloading" -version = "0.7.4" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +checksum = "d580318f95776505201b28cf98eb1fa5e4be3b689633ba6a3e6cd880ff22d8cb" dependencies = [ "cfg-if", - "winapi", + "windows-sys 0.48.0", ] [[package]] diff --git a/src/tools/miri/Cargo.toml b/src/tools/miri/Cargo.toml index 0df2c160e2f23..c911a153c1373 100644 --- a/src/tools/miri/Cargo.toml +++ b/src/tools/miri/Cargo.toml @@ -32,7 +32,7 @@ libc = "0.2" [target.'cfg(target_os = "linux")'.dependencies] libffi = "3.2.0" -libloading = "0.7" +libloading = "0.8" [dev-dependencies] colored = "2" From d62833861500038cdceb3259fece79d9a9b67e9f Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 30 Sep 2023 23:29:13 +0200 Subject: [PATCH 08/11] Preparing for merge from rustc --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index 07dd52ce94159..b60de8344d948 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -2ba4eb2d49e774b5fbc2a06258ac7b0f60b92b7e +bb6c66be3793ac5c738eeac91ecdc4b99388d0b4 From 45d5733ccbd6300ece6b7bf18111e4a5b4daf774 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 30 Sep 2023 23:30:51 +0200 Subject: [PATCH 09/11] fmt --- src/tools/miri/src/shims/intrinsics/simd.rs | 11 +++++++---- src/tools/miri/tests/pass/portable-simd.rs | 14 ++++---------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/tools/miri/src/shims/intrinsics/simd.rs b/src/tools/miri/src/shims/intrinsics/simd.rs index 5e488679b81d6..0dc669849fe0c 100644 --- a/src/tools/miri/src/shims/intrinsics/simd.rs +++ b/src/tools/miri/src/shims/intrinsics/simd.rs @@ -495,16 +495,19 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { let (right, right_len) = this.operand_to_simd(right)?; let (dest, dest_len) = this.place_to_simd(dest)?; - let index = generic_args[2].expect_const().eval(*this.tcx, this.param_env(), Some(this.tcx.span)).unwrap().unwrap_branch(); + let index = generic_args[2] + .expect_const() + .eval(*this.tcx, this.param_env(), Some(this.tcx.span)) + .unwrap() + .unwrap_branch(); let index_len = index.len(); assert_eq!(left_len, right_len); assert_eq!(index_len as u64, dest_len); for i in 0..dest_len { - let src_index: u64 = index[i as usize].unwrap_leaf() - .try_to_u32().unwrap() - .into(); + let src_index: u64 = + index[i as usize].unwrap_leaf().try_to_u32().unwrap().into(); let dest = this.project_index(&dest, i)?; let val = if src_index < left_len { diff --git a/src/tools/miri/tests/pass/portable-simd.rs b/src/tools/miri/tests/pass/portable-simd.rs index c1f5618c16346..969162e2c1e9e 100644 --- a/src/tools/miri/tests/pass/portable-simd.rs +++ b/src/tools/miri/tests/pass/portable-simd.rs @@ -416,20 +416,14 @@ fn simd_intrinsics() { simd_select(i8x4::from_array([0, -1, -1, 0]), b, a), i32x4::from_array([10, 2, 10, 10]) ); + assert_eq!(simd_shuffle_generic::<_, i32x4, { &[3, 1, 0, 2] }>(a, b), a,); + assert_eq!(simd_shuffle::<_, _, i32x4>(a, b, const { [3, 1, 0, 2] }), a,); assert_eq!( - simd_shuffle_generic::<_, i32x4, {&[3, 1, 0, 2]}>(a, b), - a, - ); - assert_eq!( - simd_shuffle::<_, _, i32x4>(a, b, const {[3, 1, 0, 2]}), - a, - ); - assert_eq!( - simd_shuffle_generic::<_, i32x4, {&[7, 5, 4, 6]}>(a, b), + simd_shuffle_generic::<_, i32x4, { &[7, 5, 4, 6] }>(a, b), i32x4::from_array([4, 2, 1, 10]), ); assert_eq!( - simd_shuffle::<_, _, i32x4>(a, b, const {[7, 5, 4, 6]}), + simd_shuffle::<_, _, i32x4>(a, b, const { [7, 5, 4, 6] }), i32x4::from_array([4, 2, 1, 10]), ); } From 119113114cfe77ccec1fd69d6b6fd42422181978 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 30 Sep 2023 23:32:13 +0200 Subject: [PATCH 10/11] clippy --- src/tools/miri/src/shims/intrinsics/simd.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/tools/miri/src/shims/intrinsics/simd.rs b/src/tools/miri/src/shims/intrinsics/simd.rs index 0dc669849fe0c..200f37efa27b1 100644 --- a/src/tools/miri/src/shims/intrinsics/simd.rs +++ b/src/tools/miri/src/shims/intrinsics/simd.rs @@ -506,8 +506,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { assert_eq!(index_len as u64, dest_len); for i in 0..dest_len { - let src_index: u64 = - index[i as usize].unwrap_leaf().try_to_u32().unwrap().into(); + let src_index: u64 = index[usize::try_from(i).unwrap()] + .unwrap_leaf() + .try_to_u32() + .unwrap() + .into(); let dest = this.project_index(&dest, i)?; let val = if src_index < left_len { From 6687c075dfb861f0c51439a34d9bd674b7ef54a9 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 1 Oct 2023 07:03:42 +0200 Subject: [PATCH 11/11] update lockfile --- Cargo.lock | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 476f5578187b4..7f91d12a419c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2136,6 +2136,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "libloading" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "libm" version = "0.1.4" @@ -2411,7 +2421,7 @@ dependencies = [ "lazy_static", "libc", "libffi", - "libloading", + "libloading 0.8.1", "log", "measureme", "rand", @@ -3914,7 +3924,7 @@ dependencies = [ name = "rustc_interface" version = "0.0.0" dependencies = [ - "libloading", + "libloading 0.7.4", "rustc-rayon", "rustc-rayon-core", "rustc_ast", @@ -4045,7 +4055,7 @@ name = "rustc_metadata" version = "0.0.0" dependencies = [ "bitflags 1.3.2", - "libloading", + "libloading 0.7.4", "odht", "rustc_ast", "rustc_attr", @@ -4260,7 +4270,7 @@ dependencies = [ name = "rustc_plugin_impl" version = "0.0.0" dependencies = [ - "libloading", + "libloading 0.7.4", "rustc_ast", "rustc_errors", "rustc_fluent_macro",