From f67eb4e18c005273a1c891e6c79a5cb343a1c548 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 19 Jan 2018 20:15:07 -0600 Subject: [PATCH] Remove Into/From between x86 and portable types (#292) This is primarily doing to avoid falling into a portability trap by accident, and in general makes the vendor types (on x86) going towards as minimal as they can be. Along the way some tests were cleaned up which were still using the portable types. --- coresimd/src/v128.rs | 33 +--- coresimd/src/v256.rs | 33 +--- coresimd/src/v64.rs | 13 +- coresimd/src/x86/i586/sse3.rs | 2 +- coresimd/src/x86/i586/sse42.rs | 168 +++++++++---------- coresimd/src/x86/i686/mmx.rs | 291 ++++++++++++++++----------------- coresimd/src/x86/i686/ssse3.rs | 153 +++++++++-------- examples/play.rs | 68 -------- 8 files changed, 316 insertions(+), 445 deletions(-) delete mode 100644 examples/play.rs diff --git a/coresimd/src/v128.rs b/coresimd/src/v128.rs index 870b8615705c7..21426a904a721 100644 --- a/coresimd/src/v128.rs +++ b/coresimd/src/v128.rs @@ -55,8 +55,7 @@ define_from!( u16x8, i16x8, u8x16, - i8x16, - __m128i + i8x16 ); define_from!( i64x2, @@ -66,8 +65,7 @@ define_from!( u16x8, i16x8, u8x16, - i8x16, - __m128i + i8x16 ); define_from!( u32x4, @@ -77,8 +75,7 @@ define_from!( u16x8, i16x8, u8x16, - i8x16, - __m128i + i8x16 ); define_from!( i32x4, @@ -88,8 +85,7 @@ define_from!( u16x8, i16x8, u8x16, - i8x16, - __m128i + i8x16 ); define_from!( u16x8, @@ -99,8 +95,7 @@ define_from!( i32x4, i16x8, u8x16, - i8x16, - __m128i + i8x16 ); define_from!( i16x8, @@ -110,8 +105,7 @@ define_from!( i32x4, u16x8, u8x16, - i8x16, - __m128i + i8x16 ); define_from!( u8x16, @@ -121,22 +115,9 @@ define_from!( i32x4, u16x8, i16x8, - i8x16, - __m128i -); -define_from!( - i8x16, - u64x2, - i64x2, - u32x4, - i32x4, - u16x8, - i16x8, - u8x16, - __m128i + i8x16 ); define_from!( - __m128i, i8x16, u64x2, i64x2, diff --git a/coresimd/src/v256.rs b/coresimd/src/v256.rs index f21d7e99f5ed4..8f1993108430a 100644 --- a/coresimd/src/v256.rs +++ b/coresimd/src/v256.rs @@ -80,8 +80,7 @@ define_from!( u16x16, i16x16, u8x32, - i8x32, - __m256i + i8x32 ); define_from!( i64x4, @@ -91,8 +90,7 @@ define_from!( u16x16, i16x16, u8x32, - i8x32, - __m256i + i8x32 ); define_from!( u32x8, @@ -102,8 +100,7 @@ define_from!( u16x16, i16x16, u8x32, - i8x32, - __m256i + i8x32 ); define_from!( i32x8, @@ -113,8 +110,7 @@ define_from!( u16x16, i16x16, u8x32, - i8x32, - __m256i + i8x32 ); define_from!( u16x16, @@ -124,8 +120,7 @@ define_from!( i32x8, i16x16, u8x32, - i8x32, - __m256i + i8x32 ); define_from!( i16x16, @@ -135,8 +130,7 @@ define_from!( i32x8, u16x16, u8x32, - i8x32, - __m256i + i8x32 ); define_from!( u8x32, @@ -146,22 +140,9 @@ define_from!( i32x8, u16x16, i16x16, - i8x32, - __m256i -); -define_from!( - i8x32, - u64x4, - i64x4, - u32x8, - i32x8, - u16x16, - i16x16, - u8x32, - __m256i + i8x32 ); define_from!( - __m256i, i8x32, u64x4, i64x4, diff --git a/coresimd/src/v64.rs b/coresimd/src/v64.rs index 132276c0a3e05..f125fad7ab9b1 100644 --- a/coresimd/src/v64.rs +++ b/coresimd/src/v64.rs @@ -35,13 +35,12 @@ define_ty_doc! { /// 64-bit wide integer vector type. } -define_from!(u32x2, i32x2, u16x4, i16x4, u8x8, i8x8, __m64); -define_from!(i32x2, u32x2, u16x4, i16x4, u8x8, i8x8, __m64); -define_from!(u16x4, u32x2, i32x2, i16x4, u8x8, i8x8, __m64); -define_from!(i16x4, u32x2, i32x2, u16x4, u8x8, i8x8, __m64); -define_from!(u8x8, u32x2, i32x2, u16x4, i16x4, i8x8, __m64); -define_from!(i8x8, u32x2, i32x2, u16x4, i16x4, u8x8, __m64); -define_from!(__m64, i8x8, u32x2, i32x2, u16x4, i16x4, u8x8); +define_from!(u32x2, i32x2, u16x4, i16x4, u8x8, i8x8); +define_from!(i32x2, u32x2, u16x4, i16x4, u8x8, i8x8); +define_from!(u16x4, u32x2, i32x2, i16x4, u8x8, i8x8); +define_from!(i16x4, u32x2, i32x2, u16x4, u8x8, i8x8); +define_from!(u8x8, u32x2, i32x2, u16x4, i16x4, i8x8); +define_from!(i8x8, u32x2, i32x2, u16x4, i16x4, u8x8); define_common_ops!(f32x2, u32x2, i32x2, u16x4, i16x4, u8x8, i8x8); define_float_ops!(f32x2); diff --git a/coresimd/src/x86/i586/sse3.rs b/coresimd/src/x86/i586/sse3.rs index 212a3395c1326..35525b3ea80fc 100644 --- a/coresimd/src/x86/i586/sse3.rs +++ b/coresimd/src/x86/i586/sse3.rs @@ -68,7 +68,7 @@ pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(lddqu))] pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i { - __m128i::from(lddqu(mem_addr as *const _)) + mem::transmute(lddqu(mem_addr as *const _)) } /// Duplicate the low double-precision (64-bit) floating-point element diff --git a/coresimd/src/x86/i586/sse42.rs b/coresimd/src/x86/i586/sse42.rs index d55f3f5645b13..81dfa53adfada 100644 --- a/coresimd/src/x86/i586/sse42.rs +++ b/coresimd/src/x86/i586/sse42.rs @@ -105,24 +105,21 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { /// -/// use stdsimd::simd::u8x16; -/// use stdsimd::vendor::{_mm_cmpistri, _SIDD_CMP_EQUAL_ORDERED}; +/// use stdsimd::vendor::*; /// /// let haystack = b"This is a long string of text data\r\n\tthat extends /// multiple lines"; /// let needle = b"\r\n\t\0\0\0\0\0\0\0\0\0\0\0\0\0"; /// -/// let a = u8x16::load(needle, 0); +/// let a = _mm_loadu_si128(needle.as_ptr() as *const _); /// let hop = 16; /// let mut indexes = Vec::new(); /// /// // Chunk the haystack into 16 byte chunks and find /// // the first "\r\n\t" in the chunk. /// for (i, chunk) in haystack.chunks(hop).enumerate() { -/// let b = u8x16::load(chunk, 0); -/// let idx = unsafe { -/// _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_EQUAL_ORDERED) -/// }; +/// let b = _mm_loadu_si128(chunk.as_ptr() as *const _); +/// let idx = _mm_cmpistri(a, b, _SIDD_CMP_EQUAL_ORDERED); /// if idx != 16 { /// indexes.push((idx as usize) + (i * hop)); /// } @@ -147,21 +144,18 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// # if cfg_feature_enabled!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { -/// use stdsimd::simd::u8x16; -/// use stdsimd::vendor::{_mm_cmpistri, _SIDD_CMP_EQUAL_ANY}; +/// use stdsimd::vendor::*; /// /// // Ensure your input is 16 byte aligned /// let password = b"hunter2\0\0\0\0\0\0\0\0\0"; /// let special_chars = b"!@#$%^&*()[]:;<>"; /// /// // Load the input -/// let a = u8x16::load(special_chars, 0); -/// let b = u8x16::load(password, 0); +/// let a = _mm_loadu_si128(special_chars.as_ptr() as *const _); +/// let b = _mm_loadu_si128(password.as_ptr() as *const _); /// /// // Use _SIDD_CMP_EQUAL_ANY to find the index of any bytes in b -/// let idx = unsafe { -/// _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_EQUAL_ANY) -/// }; +/// let idx = _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_EQUAL_ANY); /// /// if idx < 16 { /// println!("Congrats! Your password contains a special character"); @@ -188,20 +182,18 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// # if cfg_feature_enabled!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { -/// use stdsimd::simd::u8x16; -/// use stdsimd::vendor::{_mm_cmpistri, _SIDD_CMP_RANGES}; -/// # let b = u8x16::load(b":;<=>?@[\\]^_`abc", 0); +/// use stdsimd::vendor::*; +/// # let b = b":;<=>?@[\\]^_`abc"; +/// # let b = _mm_loadu_si128(b.as_ptr() as *const _); /// /// // Specify the ranges of values to be searched for [A-Za-z0-9]. -/// let a = u8x16::load(b"AZaz09\0\0\0\0\0\0\0\0\0\0", 0); +/// let a = b"AZaz09\0\0\0\0\0\0\0\0\0\0"; +/// let a = _mm_loadu_si128(a.as_ptr() as *const _); /// /// // Use _SIDD_CMP_RANGES to find the index of first byte in ranges. /// // Which in this case will be the first alpha numeric byte found /// // in the string. -/// let idx = unsafe { -/// _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_RANGES) -/// }; -/// +/// let idx = _mm_cmpistri(a, b, _SIDD_CMP_RANGES); /// /// if idx < 16 { /// println!("Found an alpha numeric character"); @@ -227,23 +219,19 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// # if cfg_feature_enabled!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { -/// use stdsimd::simd::u16x8; -/// use stdsimd::vendor::{_mm_cmpistri}; -/// use stdsimd::vendor::{_SIDD_UWORD_OPS, _SIDD_CMP_EQUAL_EACH}; +/// use stdsimd::vendor::*; /// /// # let mut some_utf16_words = [0u16; 8]; /// # let mut more_utf16_words = [0u16; 8]; /// # '❤'.encode_utf16(&mut some_utf16_words); /// # '𝕊'.encode_utf16(&mut more_utf16_words); /// // Load the input -/// let a = u16x8::load(&some_utf16_words, 0); -/// let b = u16x8::load(&more_utf16_words, 0); +/// let a = _mm_loadu_si128(some_utf16_words.as_ptr() as *const _); +/// let b = _mm_loadu_si128(more_utf16_words.as_ptr() as *const _); /// /// // Specify _SIDD_UWORD_OPS to compare words instead of bytes, and /// // use _SIDD_CMP_EQUAL_EACH to compare the two strings. -/// let idx = unsafe { -/// _mm_cmpistri(a.into(), b.into(), _SIDD_UWORD_OPS | -/// _SIDD_CMP_EQUAL_EACH) }; +/// let idx = _mm_cmpistri(a, b, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_EACH); /// /// if idx == 0 { /// println!("16-bit unicode strings were equal!"); @@ -367,9 +355,9 @@ pub unsafe fn _mm_cmpestrm( let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { - ($imm8:expr) => { __m128i::from(pcmpestrm128(a, la, b, lb, $imm8)) } + ($imm8:expr) => { pcmpestrm128(a, la, b, lb, $imm8) } } - constify_imm8!(imm8, call) + mem::transmute(constify_imm8!(imm8, call)) } /// Compare packed strings `a` and `b` with lengths `la` and `lb` using the @@ -415,8 +403,7 @@ pub unsafe fn _mm_cmpestrm( /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { /// -/// use stdsimd::simd::u8x16; -/// use stdsimd::vendor::{_mm_cmpestri, _SIDD_CMP_EQUAL_ORDERED}; +/// use stdsimd::vendor::*; /// /// // The string we want to find a substring in /// let haystack = b"Split \r\n\t line "; @@ -425,14 +412,12 @@ pub unsafe fn _mm_cmpestrm( /// // extra bytes we do not want to search for. /// let needle = b"\r\n\t ignore this "; /// -/// let a = u8x16::load(needle, 0); -/// let b = u8x16::load(haystack, 0); +/// let a = _mm_loadu_si128(needle.as_ptr() as *const _); +/// let b = _mm_loadu_si128(haystack.as_ptr() as *const _); /// /// // Note: We explicitly specify we only want to search `b` for the /// // first 3 characters of a. -/// let idx = unsafe { -/// _mm_cmpestri(a.into(), 3, b.into(), 15, _SIDD_CMP_EQUAL_ORDERED) -/// }; +/// let idx = _mm_cmpestri(a, 3, b, 15, _SIDD_CMP_EQUAL_ORDERED); /// /// assert_eq!(idx, 6); /// # } @@ -626,8 +611,7 @@ mod tests { use stdsimd_test::simd_test; use std::ptr; - use v128::*; - use x86::i586::sse42; + use x86::*; // Currently one cannot `load` a &[u8] that is is less than 16 // in length. This makes loading strings less than 16 in length @@ -643,179 +627,179 @@ mod tests { slice.get_unchecked_mut(0) as *mut u8 as *mut u8, s.len(), ); - __m128i::from(u8x16::load(slice, 0)) + _mm_loadu_si128(slice.as_ptr() as *const _) } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpistrm() { + unsafe fn test_mm_cmpistrm() { let a = str_to_m128i(b"Hello! Good-Bye!"); let b = str_to_m128i(b"hello! good-bye!"); - let i = sse42::_mm_cmpistrm(a, b, sse42::_SIDD_UNIT_MASK); + let i = _mm_cmpistrm(a, b, _SIDD_UNIT_MASK); #[cfg_attr(rustfmt, rustfmt_skip)] - let res = u8x16::new( - 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, - 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, + let res = _mm_setr_epi8( + 0x00, !0, !0, !0, !0, !0, !0, 0x00, + !0, !0, !0, !0, 0x00, !0, !0, !0, ); - assert_eq!(i, __m128i::from(res)); + assert_eq!(i, res); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpistri() { + unsafe fn test_mm_cmpistri() { let a = str_to_m128i(b"Hello"); let b = str_to_m128i(b" Hello "); - let i = sse42::_mm_cmpistri(a, b, sse42::_SIDD_CMP_EQUAL_ORDERED); + let i = _mm_cmpistri(a, b, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(3, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpistrz() { + unsafe fn test_mm_cmpistrz() { let a = str_to_m128i(b""); let b = str_to_m128i(b"Hello"); - let i = sse42::_mm_cmpistrz(a, b, sse42::_SIDD_CMP_EQUAL_ORDERED); + let i = _mm_cmpistrz(a, b, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(1, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpistrc() { + unsafe fn test_mm_cmpistrc() { let a = str_to_m128i(b" "); let b = str_to_m128i(b" ! "); - let i = sse42::_mm_cmpistrc(a, b, sse42::_SIDD_UNIT_MASK); + let i = _mm_cmpistrc(a, b, _SIDD_UNIT_MASK); assert_eq!(1, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpistrs() { + unsafe fn test_mm_cmpistrs() { let a = str_to_m128i(b"Hello"); let b = str_to_m128i(b""); - let i = sse42::_mm_cmpistrs(a, b, sse42::_SIDD_CMP_EQUAL_ORDERED); + let i = _mm_cmpistrs(a, b, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(1, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpistro() { + unsafe fn test_mm_cmpistro() { #[cfg_attr(rustfmt, rustfmt_skip)] - let a_bytes = u8x16::new( + let a_bytes = _mm_setr_epi8( 0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ); #[cfg_attr(rustfmt, rustfmt_skip)] - let b_bytes = u8x16::new( + let b_bytes = _mm_setr_epi8( 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ); - let a = __m128i::from(a_bytes); - let b = __m128i::from(b_bytes); - let i = sse42::_mm_cmpistro( + let a = a_bytes; + let b = b_bytes; + let i = _mm_cmpistro( a, b, - sse42::_SIDD_UWORD_OPS | sse42::_SIDD_UNIT_MASK, + _SIDD_UWORD_OPS | _SIDD_UNIT_MASK, ); assert_eq!(0, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpistra() { + unsafe fn test_mm_cmpistra() { let a = str_to_m128i(b""); let b = str_to_m128i(b"Hello!!!!!!!!!!!"); - let i = sse42::_mm_cmpistra(a, b, sse42::_SIDD_UNIT_MASK); + let i = _mm_cmpistra(a, b, _SIDD_UNIT_MASK); assert_eq!(1, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpestrm() { + unsafe fn test_mm_cmpestrm() { let a = str_to_m128i(b"Hello!"); let b = str_to_m128i(b"Hello."); - let i = sse42::_mm_cmpestrm(a, 5, b, 5, sse42::_SIDD_UNIT_MASK); + let i = _mm_cmpestrm(a, 5, b, 5, _SIDD_UNIT_MASK); #[cfg_attr(rustfmt, rustfmt_skip)] - let r = u8x16::new( - 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, + let r = _mm_setr_epi8( + !0, !0, !0, !0, !0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ); - assert_eq!(i, __m128i::from(r)); + assert_eq!(i, r); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpestri() { + unsafe fn test_mm_cmpestri() { let a = str_to_m128i(b"bar - garbage"); let b = str_to_m128i(b"foobar"); let i = - sse42::_mm_cmpestri(a, 3, b, 6, sse42::_SIDD_CMP_EQUAL_ORDERED); + _mm_cmpestri(a, 3, b, 6, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(3, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpestrz() { + unsafe fn test_mm_cmpestrz() { let a = str_to_m128i(b""); let b = str_to_m128i(b"Hello"); let i = - sse42::_mm_cmpestrz(a, 16, b, 6, sse42::_SIDD_CMP_EQUAL_ORDERED); + _mm_cmpestrz(a, 16, b, 6, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(1, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpestrc() { + unsafe fn test_mm_cmpestrc() { let va = str_to_m128i(b"!!!!!!!!"); let vb = str_to_m128i(b" "); - let i = sse42::_mm_cmpestrc(va, 7, vb, 7, sse42::_SIDD_UNIT_MASK); + let i = _mm_cmpestrc(va, 7, vb, 7, _SIDD_UNIT_MASK); assert_eq!(0, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpestrs() { + unsafe fn test_mm_cmpestrs() { #[cfg_attr(rustfmt, rustfmt_skip)] - let a_bytes = u8x16::new( + let a_bytes = _mm_setr_epi8( 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ); - let a = __m128i::from(a_bytes); - let b = __m128i::from(u8x16::splat(0x00)); - let i = sse42::_mm_cmpestrs(a, 8, b, 0, sse42::_SIDD_UWORD_OPS); + let a = a_bytes; + let b = _mm_set1_epi8(0x00); + let i = _mm_cmpestrs(a, 8, b, 0, _SIDD_UWORD_OPS); assert_eq!(0, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpestro() { + unsafe fn test_mm_cmpestro() { let a = str_to_m128i(b"Hello"); let b = str_to_m128i(b"World"); - let i = sse42::_mm_cmpestro(a, 5, b, 5, sse42::_SIDD_UBYTE_OPS); + let i = _mm_cmpestro(a, 5, b, 5, _SIDD_UBYTE_OPS); assert_eq!(0, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpestra() { + unsafe fn test_mm_cmpestra() { let a = str_to_m128i(b"Cannot match a"); let b = str_to_m128i(b"Null after 14"); - let i = sse42::_mm_cmpestra( + let i = _mm_cmpestra( a, 14, b, 16, - sse42::_SIDD_CMP_EQUAL_EACH | sse42::_SIDD_UNIT_MASK, + _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK, ); assert_eq!(1, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_crc32_u8() { + unsafe fn test_mm_crc32_u8() { let crc = 0x2aa1e72b; let v = 0x2a; - let i = sse42::_mm_crc32_u8(crc, v); + let i = _mm_crc32_u8(crc, v); assert_eq!(i, 0xf24122e4); } #[simd_test = "sse4.2"] - unsafe fn _mm_crc32_u16() { + unsafe fn test_mm_crc32_u16() { let crc = 0x8ecec3b5; let v = 0x22b; - let i = sse42::_mm_crc32_u16(crc, v); + let i = _mm_crc32_u16(crc, v); assert_eq!(i, 0x13bb2fb); } #[simd_test = "sse4.2"] - unsafe fn _mm_crc32_u32() { + unsafe fn test_mm_crc32_u32() { let crc = 0xae2912c8; let v = 0x845fed; - let i = sse42::_mm_crc32_u32(crc, v); + let i = _mm_crc32_u32(crc, v); assert_eq!(i, 0xffae2ed1); } } diff --git a/coresimd/src/x86/i686/mmx.rs b/coresimd/src/x86/i686/mmx.rs index 38c79415a22e2..bc946db016132 100644 --- a/coresimd/src/x86/i686/mmx.rs +++ b/coresimd/src/x86/i686/mmx.rs @@ -486,117 +486,116 @@ extern "C" { #[cfg(test)] mod tests { - use v64::{__m64, i16x4, i32x2, i8x8, u16x4, u8x8}; - use x86::i686::mmx; + use x86::*; use stdsimd_test::simd_test; #[simd_test = "mmx"] - unsafe fn _mm_setzero_si64() { + unsafe fn test_mm_setzero_si64() { let r: __m64 = ::std::mem::transmute(0_i64); - assert_eq!(r, mmx::_mm_setzero_si64()); + assert_eq!(r, _mm_setzero_si64()); } #[simd_test = "mmx"] - unsafe fn _mm_add_pi8() { - let a = i8x8::new(-1, -1, 1, 1, -1, 0, 1, 0); - let b = i8x8::new(-127, 101, 99, 126, 0, -1, 0, 1); - let e = i8x8::new(-128, 100, 100, 127, -1, -1, 1, 1); - assert_eq!(e, i8x8::from(mmx::_mm_add_pi8(a.into(), b.into()))); - assert_eq!(e, i8x8::from(mmx::_m_paddb(a.into(), b.into()))); + unsafe fn test_mm_add_pi8() { + let a = _mm_setr_pi8(-1, -1, 1, 1, -1, 0, 1, 0); + let b = _mm_setr_pi8(-127, 101, 99, 126, 0, -1, 0, 1); + let e = _mm_setr_pi8(-128, 100, 100, 127, -1, -1, 1, 1); + assert_eq!(e, _mm_add_pi8(a, b)); + assert_eq!(e, _m_paddb(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_add_pi16() { - let a = i16x4::new(-1, -1, 1, 1); - let b = i16x4::new( + unsafe fn test_mm_add_pi16() { + let a = _mm_setr_pi16(-1, -1, 1, 1); + let b = _mm_setr_pi16( i16::min_value() + 1, 30001, -30001, i16::max_value() - 1, ); - let e = i16x4::new(i16::min_value(), 30000, -30000, i16::max_value()); - assert_eq!(e, i16x4::from(mmx::_mm_add_pi16(a.into(), b.into()))); - assert_eq!(e, i16x4::from(mmx::_m_paddw(a.into(), b.into()))); + let e = _mm_setr_pi16(i16::min_value(), 30000, -30000, i16::max_value()); + assert_eq!(e, _mm_add_pi16(a, b)); + assert_eq!(e, _m_paddw(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_add_pi32() { - let a = i32x2::new(1, -1); - let b = i32x2::new(i32::max_value() - 1, i32::min_value() + 1); - let e = i32x2::new(i32::max_value(), i32::min_value()); - assert_eq!(e, i32x2::from(mmx::_mm_add_pi32(a.into(), b.into()))); - assert_eq!(e, i32x2::from(mmx::_m_paddd(a.into(), b.into()))); + unsafe fn test_mm_add_pi32() { + let a = _mm_setr_pi32(1, -1); + let b = _mm_setr_pi32(i32::max_value() - 1, i32::min_value() + 1); + let e = _mm_setr_pi32(i32::max_value(), i32::min_value()); + assert_eq!(e, _mm_add_pi32(a, b)); + assert_eq!(e, _m_paddd(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_adds_pi8() { - let a = i8x8::new(-100, -1, 1, 100, -1, 0, 1, 0); - let b = i8x8::new(-100, 1, -1, 100, 0, -1, 0, 1); + unsafe fn test_mm_adds_pi8() { + let a = _mm_setr_pi8(-100, -1, 1, 100, -1, 0, 1, 0); + let b = _mm_setr_pi8(-100, 1, -1, 100, 0, -1, 0, 1); let e = - i8x8::new(i8::min_value(), 0, 0, i8::max_value(), -1, -1, 1, 1); - assert_eq!(e, i8x8::from(mmx::_mm_adds_pi8(a.into(), b.into()))); - assert_eq!(e, i8x8::from(mmx::_m_paddsb(a.into(), b.into()))); + _mm_setr_pi8(i8::min_value(), 0, 0, i8::max_value(), -1, -1, 1, 1); + assert_eq!(e, _mm_adds_pi8(a, b)); + assert_eq!(e, _m_paddsb(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_adds_pi16() { - let a = i16x4::new(-32000, 32000, 4, 0); - let b = i16x4::new(-32000, 32000, -5, 1); - let e = i16x4::new(i16::min_value(), i16::max_value(), -1, 1); - assert_eq!(e, i16x4::from(mmx::_mm_adds_pi16(a.into(), b.into()))); - assert_eq!(e, i16x4::from(mmx::_m_paddsw(a.into(), b.into()))); + unsafe fn test_mm_adds_pi16() { + let a = _mm_setr_pi16(-32000, 32000, 4, 0); + let b = _mm_setr_pi16(-32000, 32000, -5, 1); + let e = _mm_setr_pi16(i16::min_value(), i16::max_value(), -1, 1); + assert_eq!(e, _mm_adds_pi16(a, b)); + assert_eq!(e, _m_paddsw(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_adds_pu8() { - let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 200); - let b = u8x8::new(0, 10, 20, 30, 40, 50, 60, 200); - let e = u8x8::new(0, 11, 22, 33, 44, 55, 66, u8::max_value()); - assert_eq!(e, u8x8::from(mmx::_mm_adds_pu8(a.into(), b.into()))); - assert_eq!(e, u8x8::from(mmx::_m_paddusb(a.into(), b.into()))); + unsafe fn test_mm_adds_pu8() { + let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 200u8 as i8); + let b = _mm_setr_pi8(0, 10, 20, 30, 40, 50, 60, 200u8 as i8); + let e = _mm_setr_pi8(0, 11, 22, 33, 44, 55, 66, u8::max_value() as i8); + assert_eq!(e, _mm_adds_pu8(a, b)); + assert_eq!(e, _m_paddusb(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_adds_pu16() { - let a = u16x4::new(0, 1, 2, 60000); - let b = u16x4::new(0, 10, 20, 60000); - let e = u16x4::new(0, 11, 22, u16::max_value()); - assert_eq!(e, u16x4::from(mmx::_mm_adds_pu16(a.into(), b.into()))); - assert_eq!(e, u16x4::from(mmx::_m_paddusw(a.into(), b.into()))); + unsafe fn test_mm_adds_pu16() { + let a = _mm_setr_pi16(0, 1, 2, 60000u16 as i16); + let b = _mm_setr_pi16(0, 10, 20, 60000u16 as i16); + let e = _mm_setr_pi16(0, 11, 22, u16::max_value() as i16); + assert_eq!(e, _mm_adds_pu16(a, b)); + assert_eq!(e, _m_paddusw(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_sub_pi8() { - let a = i8x8::new(0, 0, 1, 1, -1, -1, 0, 0); - let b = i8x8::new(-1, 1, -2, 2, 100, -100, -127, 127); - let e = i8x8::new(1, -1, 3, -1, -101, 99, 127, -127); - assert_eq!(e, i8x8::from(mmx::_mm_sub_pi8(a.into(), b.into()))); - assert_eq!(e, i8x8::from(mmx::_m_psubb(a.into(), b.into()))); + unsafe fn test_mm_sub_pi8() { + let a = _mm_setr_pi8(0, 0, 1, 1, -1, -1, 0, 0); + let b = _mm_setr_pi8(-1, 1, -2, 2, 100, -100, -127, 127); + let e = _mm_setr_pi8(1, -1, 3, -1, -101, 99, 127, -127); + assert_eq!(e, _mm_sub_pi8(a, b)); + assert_eq!(e, _m_psubb(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_sub_pi16() { - let a = i16x4::new(-20000, -20000, 20000, 30000); - let b = i16x4::new(-10000, 10000, -10000, 30000); - let e = i16x4::new(-10000, -30000, 30000, 0); - assert_eq!(e, i16x4::from(mmx::_mm_sub_pi16(a.into(), b.into()))); - assert_eq!(e, i16x4::from(mmx::_m_psubw(a.into(), b.into()))); + unsafe fn test_mm_sub_pi16() { + let a = _mm_setr_pi16(-20000, -20000, 20000, 30000); + let b = _mm_setr_pi16(-10000, 10000, -10000, 30000); + let e = _mm_setr_pi16(-10000, -30000, 30000, 0); + assert_eq!(e, _mm_sub_pi16(a, b)); + assert_eq!(e, _m_psubw(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_sub_pi32() { - let a = i32x2::new(500_000, -500_000); - let b = i32x2::new(500_000, 500_000); - let e = i32x2::new(0, -1_000_000); - assert_eq!(e, i32x2::from(mmx::_mm_sub_pi32(a.into(), b.into()))); - assert_eq!(e, i32x2::from(mmx::_m_psubd(a.into(), b.into()))); + unsafe fn test_mm_sub_pi32() { + let a = _mm_setr_pi32(500_000, -500_000); + let b = _mm_setr_pi32(500_000, 500_000); + let e = _mm_setr_pi32(0, -1_000_000); + assert_eq!(e, _mm_sub_pi32(a, b)); + assert_eq!(e, _m_psubd(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_subs_pi8() { - let a = i8x8::new(-100, 100, 0, 0, 0, 0, -5, 5); - let b = i8x8::new(100, -100, i8::min_value(), 127, -1, 1, 3, -3); - let e = i8x8::new( + unsafe fn test_mm_subs_pi8() { + let a = _mm_setr_pi8(-100, 100, 0, 0, 0, 0, -5, 5); + let b = _mm_setr_pi8(100, -100, i8::min_value(), 127, -1, 1, 3, -3); + let e = _mm_setr_pi8( i8::min_value(), i8::max_value(), i8::max_value(), @@ -606,128 +605,128 @@ mod tests { -8, 8, ); - assert_eq!(e, i8x8::from(mmx::_mm_subs_pi8(a.into(), b.into()))); - assert_eq!(e, i8x8::from(mmx::_m_psubsb(a.into(), b.into()))); + assert_eq!(e, _mm_subs_pi8(a, b)); + assert_eq!(e, _m_psubsb(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_subs_pi16() { - let a = i16x4::new(-20000, 20000, 0, 0); - let b = i16x4::new(20000, -20000, -1, 1); - let e = i16x4::new(i16::min_value(), i16::max_value(), 1, -1); - assert_eq!(e, i16x4::from(mmx::_mm_subs_pi16(a.into(), b.into()))); - assert_eq!(e, i16x4::from(mmx::_m_psubsw(a.into(), b.into()))); + unsafe fn test_mm_subs_pi16() { + let a = _mm_setr_pi16(-20000, 20000, 0, 0); + let b = _mm_setr_pi16(20000, -20000, -1, 1); + let e = _mm_setr_pi16(i16::min_value(), i16::max_value(), 1, -1); + assert_eq!(e, _mm_subs_pi16(a, b)); + assert_eq!(e, _m_psubsw(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_subs_pu8() { - let a = u8x8::new(50, 10, 20, 30, 40, 60, 70, 80); - let b = u8x8::new(60, 20, 30, 40, 30, 20, 10, 0); - let e = u8x8::new(0, 0, 0, 0, 10, 40, 60, 80); - assert_eq!(e, u8x8::from(mmx::_mm_subs_pu8(a.into(), b.into()))); - assert_eq!(e, u8x8::from(mmx::_m_psubusb(a.into(), b.into()))); + unsafe fn test_mm_subs_pu8() { + let a = _mm_setr_pi8(50, 10, 20, 30, 40, 60, 70, 80); + let b = _mm_setr_pi8(60, 20, 30, 40, 30, 20, 10, 0); + let e = _mm_setr_pi8(0, 0, 0, 0, 10, 40, 60, 80); + assert_eq!(e, _mm_subs_pu8(a, b)); + assert_eq!(e, _m_psubusb(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_subs_pu16() { - let a = u16x4::new(10000, 200, 0, 44444); - let b = u16x4::new(20000, 300, 1, 11111); - let e = u16x4::new(0, 0, 0, 33333); - assert_eq!(e, u16x4::from(mmx::_mm_subs_pu16(a.into(), b.into()))); - assert_eq!(e, u16x4::from(mmx::_m_psubusw(a.into(), b.into()))); + unsafe fn test_mm_subs_pu16() { + let a = _mm_setr_pi16(10000, 200, 0, 44444u16 as i16); + let b = _mm_setr_pi16(20000, 300, 1, 11111); + let e = _mm_setr_pi16(0, 0, 0, 33333u16 as i16); + assert_eq!(e, _mm_subs_pu16(a, b)); + assert_eq!(e, _m_psubusw(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_packs_pi16() { - let a = i16x4::new(-1, 2, -3, 4); - let b = i16x4::new(-5, 6, -7, 8); - let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8); - assert_eq!(r, i8x8::from(mmx::_mm_packs_pi16(a.into(), b.into()))); + unsafe fn test_mm_packs_pi16() { + let a = _mm_setr_pi16(-1, 2, -3, 4); + let b = _mm_setr_pi16(-5, 6, -7, 8); + let r = _mm_setr_pi8(-1, 2, -3, 4, -5, 6, -7, 8); + assert_eq!(r, _mm_packs_pi16(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_packs_pi32() { - let a = i32x2::new(-1, 2); - let b = i32x2::new(-5, 6); - let r = i16x4::new(-1, 2, -5, 6); - assert_eq!(r, i16x4::from(mmx::_mm_packs_pi32(a.into(), b.into()))); + unsafe fn test_mm_packs_pi32() { + let a = _mm_setr_pi32(-1, 2); + let b = _mm_setr_pi32(-5, 6); + let r = _mm_setr_pi16(-1, 2, -5, 6); + assert_eq!(r, _mm_packs_pi32(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_cmpgt_pi8() { - let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1); - let r = i8x8::new(0, 0, 0, 0, 0, -1, -1, -1); - assert_eq!(r, i8x8::from(mmx::_mm_cmpgt_pi8(a.into(), b.into()))); + unsafe fn test_mm_cmpgt_pi8() { + let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_pi8(8, 7, 6, 5, 4, 3, 2, 1); + let r = _mm_setr_pi8(0, 0, 0, 0, 0, -1, -1, -1); + assert_eq!(r, _mm_cmpgt_pi8(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_cmpgt_pi16() { - let a = i16x4::new(0, 1, 2, 3); - let b = i16x4::new(4, 3, 2, 1); - let r = i16x4::new(0, 0, 0, -1); - assert_eq!(r, i16x4::from(mmx::_mm_cmpgt_pi16(a.into(), b.into()))); + unsafe fn test_mm_cmpgt_pi16() { + let a = _mm_setr_pi16(0, 1, 2, 3); + let b = _mm_setr_pi16(4, 3, 2, 1); + let r = _mm_setr_pi16(0, 0, 0, -1); + assert_eq!(r, _mm_cmpgt_pi16(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_cmpgt_pi32() { - let a = i32x2::new(0, 3); - let b = i32x2::new(1, 2); - let r0 = i32x2::new(0, -1); - let r1 = i32x2::new(-1, 0); - - assert_eq!(r0, mmx::_mm_cmpgt_pi32(a.into(), b.into()).into()); - assert_eq!(r1, mmx::_mm_cmpgt_pi32(b.into(), a.into()).into()); + unsafe fn test_mm_cmpgt_pi32() { + let a = _mm_setr_pi32(0, 3); + let b = _mm_setr_pi32(1, 2); + let r0 = _mm_setr_pi32(0, -1); + let r1 = _mm_setr_pi32(-1, 0); + + assert_eq!(r0, _mm_cmpgt_pi32(a, b)); + assert_eq!(r1, _mm_cmpgt_pi32(b, a)); } #[simd_test = "mmx"] - unsafe fn _mm_unpackhi_pi8() { - let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15); - let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14); - let r = i8x8::new(8, 9, 11, 10, 12, 13, 15, 14); + unsafe fn test_mm_unpackhi_pi8() { + let a = _mm_setr_pi8(0, 3, 4, 7, 8, 11, 12, 15); + let b = _mm_setr_pi8(1, 2, 5, 6, 9, 10, 13, 14); + let r = _mm_setr_pi8(8, 9, 11, 10, 12, 13, 15, 14); - assert_eq!(r, mmx::_mm_unpackhi_pi8(a.into(), b.into()).into()); + assert_eq!(r, _mm_unpackhi_pi8(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_unpacklo_pi8() { - let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b = i8x8::new(8, 9, 10, 11, 12, 13, 14, 15); - let r = i8x8::new(0, 8, 1, 9, 2, 10, 3, 11); - assert_eq!(r, i8x8::from(mmx::_mm_unpacklo_pi8(a.into(), b.into()))); + unsafe fn test_mm_unpacklo_pi8() { + let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_pi8(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_setr_pi8(0, 8, 1, 9, 2, 10, 3, 11); + assert_eq!(r, _mm_unpacklo_pi8(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_unpackhi_pi16() { - let a = i16x4::new(0, 1, 2, 3); - let b = i16x4::new(4, 5, 6, 7); - let r = i16x4::new(2, 6, 3, 7); - assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into()))); + unsafe fn test_mm_unpackhi_pi16() { + let a = _mm_setr_pi16(0, 1, 2, 3); + let b = _mm_setr_pi16(4, 5, 6, 7); + let r = _mm_setr_pi16(2, 6, 3, 7); + assert_eq!(r, _mm_unpackhi_pi16(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_unpacklo_pi16() { - let a = i16x4::new(0, 1, 2, 3); - let b = i16x4::new(4, 5, 6, 7); - let r = i16x4::new(0, 4, 1, 5); - assert_eq!(r, i16x4::from(mmx::_mm_unpacklo_pi16(a.into(), b.into()))); + unsafe fn test_mm_unpacklo_pi16() { + let a = _mm_setr_pi16(0, 1, 2, 3); + let b = _mm_setr_pi16(4, 5, 6, 7); + let r = _mm_setr_pi16(0, 4, 1, 5); + assert_eq!(r, _mm_unpacklo_pi16(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_unpackhi_pi32() { - let a = i32x2::new(0, 3); - let b = i32x2::new(1, 2); - let r = i32x2::new(3, 2); + unsafe fn test_mm_unpackhi_pi32() { + let a = _mm_setr_pi32(0, 3); + let b = _mm_setr_pi32(1, 2); + let r = _mm_setr_pi32(3, 2); - assert_eq!(r, mmx::_mm_unpackhi_pi32(a.into(), b.into()).into()); + assert_eq!(r, _mm_unpackhi_pi32(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_unpacklo_pi32() { - let a = i32x2::new(0, 3); - let b = i32x2::new(1, 2); - let r = i32x2::new(0, 1); + unsafe fn test_mm_unpacklo_pi32() { + let a = _mm_setr_pi32(0, 3); + let b = _mm_setr_pi32(1, 2); + let r = _mm_setr_pi32(0, 1); - assert_eq!(r, mmx::_mm_unpacklo_pi32(a.into(), b.into()).into()); + assert_eq!(r, _mm_unpacklo_pi32(a, b)); } } diff --git a/coresimd/src/x86/i686/ssse3.rs b/coresimd/src/x86/i686/ssse3.rs index 573d067bdefe8..e609150270667 100644 --- a/coresimd/src/x86/i686/ssse3.rs +++ b/coresimd/src/x86/i686/ssse3.rs @@ -222,144 +222,139 @@ extern "C" { mod tests { use stdsimd_test::simd_test; - use v64::*; - use x86::i686::ssse3; + use x86::*; #[simd_test = "ssse3"] - unsafe fn _mm_abs_pi8() { - let r = u8x8::from(ssse3::_mm_abs_pi8(i8x8::splat(-5).into())); - assert_eq!(r, u8x8::splat(5)); + unsafe fn test_mm_abs_pi8() { + let r = _mm_abs_pi8(_mm_set1_pi8(-5)); + assert_eq!(r, _mm_set1_pi8(5)); } #[simd_test = "ssse3"] - unsafe fn _mm_abs_pi16() { - let r = u16x4::from(ssse3::_mm_abs_pi16(i16x4::splat(-5).into())); - assert_eq!(r, u16x4::splat(5)); + unsafe fn test_mm_abs_pi16() { + let r = _mm_abs_pi16(_mm_set1_pi16(-5)); + assert_eq!(r, _mm_set1_pi16(5)); } #[simd_test = "ssse3"] - unsafe fn _mm_abs_pi32() { - let r = u32x2::from(ssse3::_mm_abs_pi32(i32x2::splat(-5).into())); - assert_eq!(r, u32x2::splat(5)); + unsafe fn test_mm_abs_pi32() { + let r = _mm_abs_pi32(_mm_set1_pi32(-5)); + assert_eq!(r, _mm_set1_pi32(5)); } #[simd_test = "ssse3"] - unsafe fn _mm_shuffle_pi8() { - let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b = u8x8::new(4, 128, 4, 3, 24, 12, 6, 19); - let expected = u8x8::new(5, 0, 5, 4, 1, 5, 7, 4); - let r = u8x8::from(ssse3::_mm_shuffle_pi8(a.into(), b.into())); + unsafe fn test_mm_shuffle_pi8() { + let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19); + let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4); + let r = _mm_shuffle_pi8(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_alignr_pi8() { - let a = u32x2::new(0x89ABCDEF_u32, 0x01234567_u32); - let b = u32x2::new(0xBBAA9988_u32, 0xFFDDEECC_u32); - let r = ssse3::_mm_alignr_pi8( - u8x8::from(a).into(), - u8x8::from(b).into(), - 4, - ); + unsafe fn test_mm_alignr_pi8() { + let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32); + let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32); + let r = _mm_alignr_pi8(a, b, 4); assert_eq!(r, ::std::mem::transmute(0x89abcdefffddeecc_u64)); } #[simd_test = "ssse3"] - unsafe fn _mm_hadd_pi16() { - let a = i16x4::new(1, 2, 3, 4); - let b = i16x4::new(4, 128, 4, 3); - let expected = i16x4::new(3, 7, 132, 7); - let r = i16x4::from(ssse3::_mm_hadd_pi16(a.into(), b.into())); + unsafe fn test_mm_hadd_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(4, 128, 4, 3); + let expected = _mm_setr_pi16(3, 7, 132, 7); + let r = _mm_hadd_pi16(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_hadd_pi32() { - let a = i32x2::new(1, 2); - let b = i32x2::new(4, 128); - let expected = i32x2::new(3, 132); - let r = i32x2::from(ssse3::_mm_hadd_pi32(a.into(), b.into())); + unsafe fn test_mm_hadd_pi32() { + let a = _mm_setr_pi32(1, 2); + let b = _mm_setr_pi32(4, 128); + let expected = _mm_setr_pi32(3, 132); + let r = _mm_hadd_pi32(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_hadds_pi16() { - let a = i16x4::new(1, 2, 3, 4); - let b = i16x4::new(32767, 1, -32768, -1); - let expected = i16x4::new(3, 7, 32767, -32768); - let r = i16x4::from(ssse3::_mm_hadds_pi16(a.into(), b.into())); + unsafe fn test_mm_hadds_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(32767, 1, -32768, -1); + let expected = _mm_setr_pi16(3, 7, 32767, -32768); + let r = _mm_hadds_pi16(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_hsub_pi16() { - let a = i16x4::new(1, 2, 3, 4); - let b = i16x4::new(4, 128, 4, 3); - let expected = i16x4::new(-1, -1, -124, 1); - let r = i16x4::from(ssse3::_mm_hsub_pi16(a.into(), b.into())); + unsafe fn test_mm_hsub_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(4, 128, 4, 3); + let expected = _mm_setr_pi16(-1, -1, -124, 1); + let r = _mm_hsub_pi16(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_hsub_pi32() { - let a = i32x2::new(1, 2); - let b = i32x2::new(4, 128); - let expected = i32x2::new(-1, -124); - let r = i32x2::from(ssse3::_mm_hsub_pi32(a.into(), b.into())); + unsafe fn test_mm_hsub_pi32() { + let a = _mm_setr_pi32(1, 2); + let b = _mm_setr_pi32(4, 128); + let expected = _mm_setr_pi32(-1, -124); + let r = _mm_hsub_pi32(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_hsubs_pi16() { - let a = i16x4::new(1, 2, 3, 4); - let b = i16x4::new(4, 128, 4, 3); - let expected = i16x4::new(-1, -1, -124, 1); - let r = i16x4::from(ssse3::_mm_hsubs_pi16(a.into(), b.into())); + unsafe fn test_mm_hsubs_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(4, 128, 4, 3); + let expected = _mm_setr_pi16(-1, -1, -124, 1); + let r = _mm_hsubs_pi16(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_maddubs_pi16() { - let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b = i8x8::new(4, 63, 4, 3, 24, 12, 6, 19); - let expected = i16x4::new(130, 24, 192, 194); - let r = i16x4::from(ssse3::_mm_maddubs_pi16(a.into(), b.into())); + unsafe fn test_mm_maddubs_pi16() { + let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19); + let expected = _mm_setr_pi16(130, 24, 192, 194); + let r = _mm_maddubs_pi16(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_mulhrs_pi16() { - let a = i16x4::new(1, 2, 3, 4); - let b = i16x4::new(4, 32767, -1, -32768); - let expected = i16x4::new(0, 2, 0, -4); - let r = i16x4::from(ssse3::_mm_mulhrs_pi16(a.into(), b.into())); + unsafe fn test_mm_mulhrs_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(4, 32767, -1, -32768); + let expected = _mm_setr_pi16(0, 2, 0, -4); + let r = _mm_mulhrs_pi16(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_sign_pi8() { - let a = i8x8::new(1, 2, 3, 4, -5, -6, 7, 8); - let b = i8x8::new(4, 64, 0, 3, 1, -1, -2, 1); - let expected = i8x8::new(1, 2, 0, 4, -5, 6, -7, 8); - let r = i8x8::from(ssse3::_mm_sign_pi8(a.into(), b.into())); + unsafe fn test_mm_sign_pi8() { + let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8); + let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1); + let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8); + let r = _mm_sign_pi8(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_sign_pi16() { - let a = i16x4::new(-1, 2, 3, 4); - let b = i16x4::new(1, -1, 1, 0); - let expected = i16x4::new(-1, -2, 3, 0); - let r = i16x4::from(ssse3::_mm_sign_pi16(a.into(), b.into())); + unsafe fn test_mm_sign_pi16() { + let a = _mm_setr_pi16(-1, 2, 3, 4); + let b = _mm_setr_pi16(1, -1, 1, 0); + let expected = _mm_setr_pi16(-1, -2, 3, 0); + let r = _mm_sign_pi16(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_sign_pi32() { - let a = i32x2::new(-1, 2); - let b = i32x2::new(1, 0); - let expected = i32x2::new(-1, 0); - let r = i32x2::from(ssse3::_mm_sign_pi32(a.into(), b.into())); + unsafe fn test_mm_sign_pi32() { + let a = _mm_setr_pi32(-1, 2); + let b = _mm_setr_pi32(1, 0); + let expected = _mm_setr_pi32(-1, 0); + let r = _mm_sign_pi32(a, b); assert_eq!(r, expected); } } diff --git a/examples/play.rs b/examples/play.rs deleted file mode 100644 index e4160e2bd3e3c..0000000000000 --- a/examples/play.rs +++ /dev/null @@ -1,68 +0,0 @@ -#![cfg_attr(feature = "strict", deny(warnings))] -#![feature(target_feature)] -#![cfg_attr(feature = "cargo-clippy", - allow(similar_names, missing_docs_in_private_items, - cast_sign_loss, cast_possible_truncation, - cast_possible_wrap, option_unwrap_used, use_debug, - print_stdout))] - -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -mod example { - - extern crate stdsimd; - - use std::env; - use self::stdsimd::simd as s; - use self::stdsimd::vendor; - - #[inline(never)] - #[target_feature(enable = "sse4.2")] - unsafe fn index(needle: &str, haystack: &str) -> usize { - assert!(needle.len() <= 16 && haystack.len() <= 16); - - let (needle_len, hay_len) = (needle.len(), haystack.len()); - - let mut needle = needle.to_string().into_bytes(); - needle.resize(16, 0); - let vneedle = s::__m128i::from(s::u8x16::load(&needle, 0)); - - let mut haystack = haystack.to_string().into_bytes(); - haystack.resize(16, 0); - let vhaystack = s::__m128i::from(s::u8x16::load(&haystack, 0)); - - vendor::_mm_cmpestri( - vneedle, - needle_len as i32, - vhaystack, - hay_len as i32, - vendor::_SIDD_CMP_EQUAL_ORDERED, - ) as usize - } - - pub fn main() { - // let x0: f64 = env::args().nth(1).unwrap().parse().unwrap(); - // let x1: f64 = env::args().nth(2).unwrap().parse().unwrap(); - // let x2: f64 = env::args().nth(3).unwrap().parse().unwrap(); - // let x3: f64 = env::args().nth(4).unwrap().parse().unwrap(); - // let y0: i32 = env::args().nth(5).unwrap().parse().unwrap(); - // let y1: i32 = env::args().nth(6).unwrap().parse().unwrap(); - // let y2: i32 = env::args().nth(7).unwrap().parse().unwrap(); - // let y3: i32 = env::args().nth(8).unwrap().parse().unwrap(); - - // let a = s::f64x2::new(x0, x1); - // let b = s::f64x2::new(x2, x3); - // let r = s::_mm_cmplt_sd(a, b); - // let r = foobar(a, b); - - let needle = env::args().nth(1).unwrap(); - let haystack = env::args().nth(2).unwrap(); - unsafe { - println!("{:?}", index(&needle, &haystack)); - } - } -} - -fn main() { - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - example::main(); -}