From aba9587a3064024aa2ffc2f7d96ba576f11b267a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Sun, 28 Feb 2021 03:48:43 +0100 Subject: [PATCH] Convert some SSE2 intrinsics to const generics (#1021) --- crates/core_arch/src/macros.rs | 58 ++----- crates/core_arch/src/x86/avx512bw.rs | 12 +- crates/core_arch/src/x86/avx512f.rs | 8 +- crates/core_arch/src/x86/sse.rs | 2 +- crates/core_arch/src/x86/sse2.rs | 226 +++++++-------------------- 5 files changed, 87 insertions(+), 219 deletions(-) diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index ab643d9a29..710f8531fd 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -1,5 +1,21 @@ //! Utility macros. +// Helper struct used to trigger const eval errors when a const generic immediate value is +// out of range. +pub(crate) struct ValidateConstImm8(); +impl ValidateConstImm8 { + pub(crate) const VALID: () = { + let _ = 1 / ((imm8 >= 0 && imm8 <= 255) as usize); + }; +} + +#[allow(unused)] +macro_rules! static_assert_imm8 { + ($imm:ident) => { + let _ = $crate::core_arch::macros::ValidateConstImm8::<$imm>::VALID; + }; +} + #[allow(unused)] macro_rules! static_assert { ($imm:ident : $ty:ty where $e:expr) => { @@ -320,48 +336,6 @@ macro_rules! constify_imm5 { }; } -//immediate value: -16:15 -#[allow(unused)] -macro_rules! constify_imm5 { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b1_1111 { - 0 => $expand!(0), - 1 => $expand!(1), - 2 => $expand!(2), - 3 => $expand!(3), - 4 => $expand!(4), - 5 => $expand!(5), - 6 => $expand!(6), - 7 => $expand!(7), - 8 => $expand!(8), - 9 => $expand!(9), - 10 => $expand!(10), - 11 => $expand!(11), - 12 => $expand!(12), - 13 => $expand!(13), - 14 => $expand!(14), - 15 => $expand!(15), - 16 => $expand!(16), - 17 => $expand!(17), - 18 => $expand!(18), - 19 => $expand!(19), - 20 => $expand!(20), - 21 => $expand!(21), - 22 => $expand!(22), - 23 => $expand!(23), - 24 => $expand!(24), - 25 => $expand!(25), - 26 => $expand!(26), - 27 => $expand!(27), - 28 => $expand!(28), - 29 => $expand!(29), - 30 => $expand!(30), - _ => $expand!(31), - } - }; -} - //immediate value: 0:16 #[allow(unused)] macro_rules! constify_imm4 { diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index f4dc00bd87..2abb335434 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -5858,7 +5858,7 @@ pub unsafe fn _mm256_maskz_srai_epi16(k: __mmask16, a: __m256i, imm8: u32) -> __ pub unsafe fn _mm_mask_srai_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i { macro_rules! call { ($imm8:expr) => { - _mm_srai_epi16(a, $imm8) + _mm_srai_epi16::<$imm8>(a) }; } let shf = constify_imm8_sae!(imm8, call); @@ -5875,7 +5875,7 @@ pub unsafe fn _mm_mask_srai_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: u pub unsafe fn _mm_maskz_srai_epi16(k: __mmask8, a: __m128i, imm8: u32) -> __m128i { macro_rules! call { ($imm8:expr) => { - _mm_srai_epi16(a, $imm8) + _mm_srai_epi16::<$imm8>(a) }; } let shf = constify_imm8_sae!(imm8, call); @@ -7414,7 +7414,7 @@ pub unsafe fn _mm_mask_shufflelo_epi16( ) -> __m128i { macro_rules! call { ($imm8:expr) => { - _mm_shufflelo_epi16(a, $imm8) + _mm_shufflelo_epi16::<$imm8>(a) }; } let shuffle = constify_imm8_sae!(imm8, call); @@ -7431,7 +7431,7 @@ pub unsafe fn _mm_mask_shufflelo_epi16( pub unsafe fn _mm_maskz_shufflelo_epi16(k: __mmask8, a: __m128i, imm8: i32) -> __m128i { macro_rules! call { ($imm8:expr) => { - _mm_shufflelo_epi16(a, $imm8) + _mm_shufflelo_epi16::<$imm8>(a) }; } let shuffle = constify_imm8_sae!(imm8, call); @@ -7592,7 +7592,7 @@ pub unsafe fn _mm_mask_shufflehi_epi16( ) -> __m128i { macro_rules! call { ($imm8:expr) => { - _mm_shufflehi_epi16(a, $imm8) + _mm_shufflehi_epi16::<$imm8>(a) }; } let shuffle = constify_imm8_sae!(imm8, call); @@ -7609,7 +7609,7 @@ pub unsafe fn _mm_mask_shufflehi_epi16( pub unsafe fn _mm_maskz_shufflehi_epi16(k: __mmask8, a: __m128i, imm8: i32) -> __m128i { macro_rules! call { ($imm8:expr) => { - _mm_shufflehi_epi16(a, $imm8) + _mm_shufflehi_epi16::<$imm8>(a) }; } let shuffle = constify_imm8_sae!(imm8, call); diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 6ba96989ab..9fbfb209db 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -19238,7 +19238,7 @@ pub unsafe fn _mm256_maskz_srai_epi32(k: __mmask8, a: __m256i, imm8: u32) -> __m pub unsafe fn _mm_mask_srai_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i { macro_rules! call { ($imm8:expr) => { - _mm_srai_epi32(a, $imm8) + _mm_srai_epi32::<$imm8>(a) }; } let shf = constify_imm8_sae!(imm8, call); @@ -19255,7 +19255,7 @@ pub unsafe fn _mm_mask_srai_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u pub unsafe fn _mm_maskz_srai_epi32(k: __mmask8, a: __m128i, imm8: u32) -> __m128i { macro_rules! call { ($imm8:expr) => { - _mm_srai_epi32(a, $imm8) + _mm_srai_epi32::<$imm8>(a) }; } let shf = constify_imm8_sae!(imm8, call); @@ -22495,7 +22495,7 @@ pub unsafe fn _mm_mask_shuffle_epi32( ) -> __m128i { macro_rules! call { ($imm8:expr) => { - _mm_shuffle_epi32(a, $imm8) + _mm_shuffle_epi32::<$imm8>(a) }; } let r = constify_imm8_sae!(imm8, call); @@ -22512,7 +22512,7 @@ pub unsafe fn _mm_mask_shuffle_epi32( pub unsafe fn _mm_maskz_shuffle_epi32(k: __mmask8, a: __m128i, imm8: _MM_PERM_ENUM) -> __m128i { macro_rules! call { ($imm8:expr) => { - _mm_shuffle_epi32(a, $imm8) + _mm_shuffle_epi32::<$imm8>(a) }; } let r = constify_imm8_sae!(imm8, call); diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 3e7b54e302..98836518da 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -1010,7 +1010,7 @@ pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 { #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128) -> __m128 { - static_assert!(mask: i32 where mask >= 0 && mask <= 255); + static_assert_imm8!(mask); simd_shuffle4( a, b, diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index 12e7506f02..ae98df5288 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -594,16 +594,11 @@ pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psraw, imm8 = 1))] -#[rustc_args_required_const(1)] +#[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_srai_epi16(a: __m128i, imm8: i32) -> __m128i { - let a = a.as_i16x8(); - macro_rules! call { - ($imm8:expr) => { - transmute(psraiw(a, $imm8)) - }; - } - constify_imm8!(imm8, call) +pub unsafe fn _mm_srai_epi16(a: __m128i) -> __m128i { + static_assert_imm8!(imm8); + transmute(psraiw(a.as_i16x8(), imm8)) } /// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign @@ -625,16 +620,11 @@ pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrad, imm8 = 1))] -#[rustc_args_required_const(1)] +#[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_srai_epi32(a: __m128i, imm8: i32) -> __m128i { - let a = a.as_i32x4(); - macro_rules! call { - ($imm8:expr) => { - transmute(psraid(a, $imm8)) - }; - } - constify_imm8!(imm8, call) +pub unsafe fn _mm_srai_epi32(a: __m128i) -> __m128i { + static_assert_imm8!(imm8); + transmute(psraid(a.as_i32x4(), imm8)) } /// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign @@ -1461,60 +1451,21 @@ pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pshufd, imm8 = 9))] -#[rustc_args_required_const(1)] +#[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i { - // simd_shuffleX requires that its selector parameter be made up of - // constant values, but we can't enforce that here. In spirit, we need - // to write a `match` on all possible values of a byte, and for each value, - // hard-code the correct `simd_shuffleX` call using only constants. We - // then hope for LLVM to do the rest. - // - // Of course, that's... awful. So we try to use macros to do it for us. - let imm8 = (imm8 & 0xFF) as u8; +pub unsafe fn _mm_shuffle_epi32(a: __m128i) -> __m128i { + static_assert_imm8!(imm8); let a = a.as_i32x4(); - - macro_rules! shuffle_done { - ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { - simd_shuffle4(a, a, [$x01, $x23, $x45, $x67]) - }; - } - macro_rules! shuffle_x67 { - ($x01:expr, $x23:expr, $x45:expr) => { - match (imm8 >> 6) & 0b11 { - 0b00 => shuffle_done!($x01, $x23, $x45, 0), - 0b01 => shuffle_done!($x01, $x23, $x45, 1), - 0b10 => shuffle_done!($x01, $x23, $x45, 2), - _ => shuffle_done!($x01, $x23, $x45, 3), - } - }; - } - macro_rules! shuffle_x45 { - ($x01:expr, $x23:expr) => { - match (imm8 >> 4) & 0b11 { - 0b00 => shuffle_x67!($x01, $x23, 0), - 0b01 => shuffle_x67!($x01, $x23, 1), - 0b10 => shuffle_x67!($x01, $x23, 2), - _ => shuffle_x67!($x01, $x23, 3), - } - }; - } - macro_rules! shuffle_x23 { - ($x01:expr) => { - match (imm8 >> 2) & 0b11 { - 0b00 => shuffle_x45!($x01, 0), - 0b01 => shuffle_x45!($x01, 1), - 0b10 => shuffle_x45!($x01, 2), - _ => shuffle_x45!($x01, 3), - } - }; - } - let x: i32x4 = match imm8 & 0b11 { - 0b00 => shuffle_x23!(0), - 0b01 => shuffle_x23!(1), - 0b10 => shuffle_x23!(2), - _ => shuffle_x23!(3), - }; + let x: i32x4 = simd_shuffle4( + a, + a, + [ + imm8 as u32 & 0b11, + (imm8 as u32 >> 2) & 0b11, + (imm8 as u32 >> 4) & 0b11, + (imm8 as u32 >> 6) & 0b11, + ], + ); transmute(x) } @@ -1528,53 +1479,25 @@ pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pshufhw, imm8 = 9))] -#[rustc_args_required_const(1)] +#[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i { - // See _mm_shuffle_epi32. - let imm8 = (imm8 & 0xFF) as u8; +pub unsafe fn _mm_shufflehi_epi16(a: __m128i) -> __m128i { + static_assert_imm8!(imm8); let a = a.as_i16x8(); - macro_rules! shuffle_done { - ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { - simd_shuffle8(a, a, [0, 1, 2, 3, $x01 + 4, $x23 + 4, $x45 + 4, $x67 + 4]) - }; - } - macro_rules! shuffle_x67 { - ($x01:expr, $x23:expr, $x45:expr) => { - match (imm8 >> 6) & 0b11 { - 0b00 => shuffle_done!($x01, $x23, $x45, 0), - 0b01 => shuffle_done!($x01, $x23, $x45, 1), - 0b10 => shuffle_done!($x01, $x23, $x45, 2), - _ => shuffle_done!($x01, $x23, $x45, 3), - } - }; - } - macro_rules! shuffle_x45 { - ($x01:expr, $x23:expr) => { - match (imm8 >> 4) & 0b11 { - 0b00 => shuffle_x67!($x01, $x23, 0), - 0b01 => shuffle_x67!($x01, $x23, 1), - 0b10 => shuffle_x67!($x01, $x23, 2), - _ => shuffle_x67!($x01, $x23, 3), - } - }; - } - macro_rules! shuffle_x23 { - ($x01:expr) => { - match (imm8 >> 2) & 0b11 { - 0b00 => shuffle_x45!($x01, 0), - 0b01 => shuffle_x45!($x01, 1), - 0b10 => shuffle_x45!($x01, 2), - _ => shuffle_x45!($x01, 3), - } - }; - } - let x: i16x8 = match imm8 & 0b11 { - 0b00 => shuffle_x23!(0), - 0b01 => shuffle_x23!(1), - 0b10 => shuffle_x23!(2), - _ => shuffle_x23!(3), - }; + let x: i16x8 = simd_shuffle8( + a, + a, + [ + 0, + 1, + 2, + 3, + (imm8 as u32 & 0b11) + 4, + ((imm8 as u32 >> 2) & 0b11) + 4, + ((imm8 as u32 >> 4) & 0b11) + 4, + ((imm8 as u32 >> 6) & 0b11) + 4, + ], + ); transmute(x) } @@ -1588,54 +1511,25 @@ pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i { #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pshuflw, imm8 = 9))] -#[rustc_args_required_const(1)] +#[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm_shufflelo_epi16(a: __m128i, imm8: i32) -> __m128i { - // See _mm_shuffle_epi32. - let imm8 = (imm8 & 0xFF) as u8; +pub unsafe fn _mm_shufflelo_epi16(a: __m128i) -> __m128i { + static_assert_imm8!(imm8); let a = a.as_i16x8(); - - macro_rules! shuffle_done { - ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { - simd_shuffle8(a, a, [$x01, $x23, $x45, $x67, 4, 5, 6, 7]) - }; - } - macro_rules! shuffle_x67 { - ($x01:expr, $x23:expr, $x45:expr) => { - match (imm8 >> 6) & 0b11 { - 0b00 => shuffle_done!($x01, $x23, $x45, 0), - 0b01 => shuffle_done!($x01, $x23, $x45, 1), - 0b10 => shuffle_done!($x01, $x23, $x45, 2), - _ => shuffle_done!($x01, $x23, $x45, 3), - } - }; - } - macro_rules! shuffle_x45 { - ($x01:expr, $x23:expr) => { - match (imm8 >> 4) & 0b11 { - 0b00 => shuffle_x67!($x01, $x23, 0), - 0b01 => shuffle_x67!($x01, $x23, 1), - 0b10 => shuffle_x67!($x01, $x23, 2), - _ => shuffle_x67!($x01, $x23, 3), - } - }; - } - macro_rules! shuffle_x23 { - ($x01:expr) => { - match (imm8 >> 2) & 0b11 { - 0b00 => shuffle_x45!($x01, 0), - 0b01 => shuffle_x45!($x01, 1), - 0b10 => shuffle_x45!($x01, 2), - _ => shuffle_x45!($x01, 3), - } - }; - } - let x: i16x8 = match imm8 & 0b11 { - 0b00 => shuffle_x23!(0), - 0b01 => shuffle_x23!(1), - 0b10 => shuffle_x23!(2), - _ => shuffle_x23!(3), - }; + let x: i16x8 = simd_shuffle8( + a, + a, + [ + imm8 as u32 & 0b11, + (imm8 as u32 >> 2) & 0b11, + (imm8 as u32 >> 4) & 0b11, + (imm8 as u32 >> 6) & 0b11, + 4, + 5, + 6, + 7, + ], + ); transmute(x) } @@ -3594,7 +3488,7 @@ mod tests { #[simd_test(enable = "sse2")] unsafe fn test_mm_srai_epi16() { - let r = _mm_srai_epi16(_mm_set1_epi16(-1), 1); + let r = _mm_srai_epi16::<1>(_mm_set1_epi16(-1)); assert_eq_m128i(r, _mm_set1_epi16(-1)); } @@ -3608,7 +3502,7 @@ mod tests { #[simd_test(enable = "sse2")] unsafe fn test_mm_srai_epi32() { - let r = _mm_srai_epi32(_mm_set1_epi32(-1), 1); + let r = _mm_srai_epi32::<1>(_mm_set1_epi32(-1)); assert_eq_m128i(r, _mm_set1_epi32(-1)); } @@ -4107,7 +4001,7 @@ mod tests { #[simd_test(enable = "sse2")] unsafe fn test_mm_shuffle_epi32() { let a = _mm_setr_epi32(5, 10, 15, 20); - let r = _mm_shuffle_epi32(a, 0b00_01_01_11); + let r = _mm_shuffle_epi32::<0b00_01_01_11>(a); let e = _mm_setr_epi32(20, 10, 10, 5); assert_eq_m128i(r, e); } @@ -4115,7 +4009,7 @@ mod tests { #[simd_test(enable = "sse2")] unsafe fn test_mm_shufflehi_epi16() { let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20); - let r = _mm_shufflehi_epi16(a, 0b00_01_01_11); + let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a); let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5); assert_eq_m128i(r, e); } @@ -4123,7 +4017,7 @@ mod tests { #[simd_test(enable = "sse2")] unsafe fn test_mm_shufflelo_epi16() { let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4); - let r = _mm_shufflelo_epi16(a, 0b00_01_01_11); + let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a); let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4); assert_eq_m128i(r, e); }