diff --git a/coresimd/src/v128.rs b/coresimd/src/v128.rs index 870b8615705c7..21426a904a721 100644 --- a/coresimd/src/v128.rs +++ b/coresimd/src/v128.rs @@ -55,8 +55,7 @@ define_from!( u16x8, i16x8, u8x16, - i8x16, - __m128i + i8x16 ); define_from!( i64x2, @@ -66,8 +65,7 @@ define_from!( u16x8, i16x8, u8x16, - i8x16, - __m128i + i8x16 ); define_from!( u32x4, @@ -77,8 +75,7 @@ define_from!( u16x8, i16x8, u8x16, - i8x16, - __m128i + i8x16 ); define_from!( i32x4, @@ -88,8 +85,7 @@ define_from!( u16x8, i16x8, u8x16, - i8x16, - __m128i + i8x16 ); define_from!( u16x8, @@ -99,8 +95,7 @@ define_from!( i32x4, i16x8, u8x16, - i8x16, - __m128i + i8x16 ); define_from!( i16x8, @@ -110,8 +105,7 @@ define_from!( i32x4, u16x8, u8x16, - i8x16, - __m128i + i8x16 ); define_from!( u8x16, @@ -121,22 +115,9 @@ define_from!( i32x4, u16x8, i16x8, - i8x16, - __m128i -); -define_from!( - i8x16, - u64x2, - i64x2, - u32x4, - i32x4, - u16x8, - i16x8, - u8x16, - __m128i + i8x16 ); define_from!( - __m128i, i8x16, u64x2, i64x2, diff --git a/coresimd/src/v256.rs b/coresimd/src/v256.rs index f21d7e99f5ed4..8f1993108430a 100644 --- a/coresimd/src/v256.rs +++ b/coresimd/src/v256.rs @@ -80,8 +80,7 @@ define_from!( u16x16, i16x16, u8x32, - i8x32, - __m256i + i8x32 ); define_from!( i64x4, @@ -91,8 +90,7 @@ define_from!( u16x16, i16x16, u8x32, - i8x32, - __m256i + i8x32 ); define_from!( u32x8, @@ -102,8 +100,7 @@ define_from!( u16x16, i16x16, u8x32, - i8x32, - __m256i + i8x32 ); define_from!( i32x8, @@ -113,8 +110,7 @@ define_from!( u16x16, i16x16, u8x32, - i8x32, - __m256i + i8x32 ); define_from!( u16x16, @@ -124,8 +120,7 @@ define_from!( i32x8, i16x16, u8x32, - i8x32, - __m256i + i8x32 ); define_from!( i16x16, @@ -135,8 +130,7 @@ define_from!( i32x8, u16x16, u8x32, - i8x32, - __m256i + i8x32 ); define_from!( u8x32, @@ -146,22 +140,9 @@ define_from!( i32x8, u16x16, i16x16, - i8x32, - __m256i -); -define_from!( - i8x32, - u64x4, - i64x4, - u32x8, - i32x8, - u16x16, - i16x16, - u8x32, - __m256i + i8x32 ); define_from!( - __m256i, i8x32, u64x4, i64x4, diff --git a/coresimd/src/v64.rs b/coresimd/src/v64.rs index 132276c0a3e05..f125fad7ab9b1 100644 --- a/coresimd/src/v64.rs +++ b/coresimd/src/v64.rs @@ -35,13 +35,12 @@ define_ty_doc! { /// 64-bit wide integer vector type. } -define_from!(u32x2, i32x2, u16x4, i16x4, u8x8, i8x8, __m64); -define_from!(i32x2, u32x2, u16x4, i16x4, u8x8, i8x8, __m64); -define_from!(u16x4, u32x2, i32x2, i16x4, u8x8, i8x8, __m64); -define_from!(i16x4, u32x2, i32x2, u16x4, u8x8, i8x8, __m64); -define_from!(u8x8, u32x2, i32x2, u16x4, i16x4, i8x8, __m64); -define_from!(i8x8, u32x2, i32x2, u16x4, i16x4, u8x8, __m64); -define_from!(__m64, i8x8, u32x2, i32x2, u16x4, i16x4, u8x8); +define_from!(u32x2, i32x2, u16x4, i16x4, u8x8, i8x8); +define_from!(i32x2, u32x2, u16x4, i16x4, u8x8, i8x8); +define_from!(u16x4, u32x2, i32x2, i16x4, u8x8, i8x8); +define_from!(i16x4, u32x2, i32x2, u16x4, u8x8, i8x8); +define_from!(u8x8, u32x2, i32x2, u16x4, i16x4, i8x8); +define_from!(i8x8, u32x2, i32x2, u16x4, i16x4, u8x8); define_common_ops!(f32x2, u32x2, i32x2, u16x4, i16x4, u8x8, i8x8); define_float_ops!(f32x2); diff --git a/coresimd/src/x86/i586/sse3.rs b/coresimd/src/x86/i586/sse3.rs index 212a3395c1326..35525b3ea80fc 100644 --- a/coresimd/src/x86/i586/sse3.rs +++ b/coresimd/src/x86/i586/sse3.rs @@ -68,7 +68,7 @@ pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 { #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(lddqu))] pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i { - __m128i::from(lddqu(mem_addr as *const _)) + mem::transmute(lddqu(mem_addr as *const _)) } /// Duplicate the low double-precision (64-bit) floating-point element diff --git a/coresimd/src/x86/i586/sse42.rs b/coresimd/src/x86/i586/sse42.rs index d55f3f5645b13..81dfa53adfada 100644 --- a/coresimd/src/x86/i586/sse42.rs +++ b/coresimd/src/x86/i586/sse42.rs @@ -105,24 +105,21 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { /// -/// use stdsimd::simd::u8x16; -/// use stdsimd::vendor::{_mm_cmpistri, _SIDD_CMP_EQUAL_ORDERED}; +/// use stdsimd::vendor::*; /// /// let haystack = b"This is a long string of text data\r\n\tthat extends /// multiple lines"; /// let needle = b"\r\n\t\0\0\0\0\0\0\0\0\0\0\0\0\0"; /// -/// let a = u8x16::load(needle, 0); +/// let a = _mm_loadu_si128(needle.as_ptr() as *const _); /// let hop = 16; /// let mut indexes = Vec::new(); /// /// // Chunk the haystack into 16 byte chunks and find /// // the first "\r\n\t" in the chunk. /// for (i, chunk) in haystack.chunks(hop).enumerate() { -/// let b = u8x16::load(chunk, 0); -/// let idx = unsafe { -/// _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_EQUAL_ORDERED) -/// }; +/// let b = _mm_loadu_si128(chunk.as_ptr() as *const _); +/// let idx = _mm_cmpistri(a, b, _SIDD_CMP_EQUAL_ORDERED); /// if idx != 16 { /// indexes.push((idx as usize) + (i * hop)); /// } @@ -147,21 +144,18 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// # if cfg_feature_enabled!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { -/// use stdsimd::simd::u8x16; -/// use stdsimd::vendor::{_mm_cmpistri, _SIDD_CMP_EQUAL_ANY}; +/// use stdsimd::vendor::*; /// /// // Ensure your input is 16 byte aligned /// let password = b"hunter2\0\0\0\0\0\0\0\0\0"; /// let special_chars = b"!@#$%^&*()[]:;<>"; /// /// // Load the input -/// let a = u8x16::load(special_chars, 0); -/// let b = u8x16::load(password, 0); +/// let a = _mm_loadu_si128(special_chars.as_ptr() as *const _); +/// let b = _mm_loadu_si128(password.as_ptr() as *const _); /// /// // Use _SIDD_CMP_EQUAL_ANY to find the index of any bytes in b -/// let idx = unsafe { -/// _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_EQUAL_ANY) -/// }; +/// let idx = _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_EQUAL_ANY); /// /// if idx < 16 { /// println!("Congrats! Your password contains a special character"); @@ -188,20 +182,18 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// # if cfg_feature_enabled!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { -/// use stdsimd::simd::u8x16; -/// use stdsimd::vendor::{_mm_cmpistri, _SIDD_CMP_RANGES}; -/// # let b = u8x16::load(b":;<=>?@[\\]^_`abc", 0); +/// use stdsimd::vendor::*; +/// # let b = b":;<=>?@[\\]^_`abc"; +/// # let b = _mm_loadu_si128(b.as_ptr() as *const _); /// /// // Specify the ranges of values to be searched for [A-Za-z0-9]. -/// let a = u8x16::load(b"AZaz09\0\0\0\0\0\0\0\0\0\0", 0); +/// let a = b"AZaz09\0\0\0\0\0\0\0\0\0\0"; +/// let a = _mm_loadu_si128(a.as_ptr() as *const _); /// /// // Use _SIDD_CMP_RANGES to find the index of first byte in ranges. /// // Which in this case will be the first alpha numeric byte found /// // in the string. -/// let idx = unsafe { -/// _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_RANGES) -/// }; -/// +/// let idx = _mm_cmpistri(a, b, _SIDD_CMP_RANGES); /// /// if idx < 16 { /// println!("Found an alpha numeric character"); @@ -227,23 +219,19 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { /// # if cfg_feature_enabled!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { -/// use stdsimd::simd::u16x8; -/// use stdsimd::vendor::{_mm_cmpistri}; -/// use stdsimd::vendor::{_SIDD_UWORD_OPS, _SIDD_CMP_EQUAL_EACH}; +/// use stdsimd::vendor::*; /// /// # let mut some_utf16_words = [0u16; 8]; /// # let mut more_utf16_words = [0u16; 8]; /// # '❤'.encode_utf16(&mut some_utf16_words); /// # '𝕊'.encode_utf16(&mut more_utf16_words); /// // Load the input -/// let a = u16x8::load(&some_utf16_words, 0); -/// let b = u16x8::load(&more_utf16_words, 0); +/// let a = _mm_loadu_si128(some_utf16_words.as_ptr() as *const _); +/// let b = _mm_loadu_si128(more_utf16_words.as_ptr() as *const _); /// /// // Specify _SIDD_UWORD_OPS to compare words instead of bytes, and /// // use _SIDD_CMP_EQUAL_EACH to compare the two strings. -/// let idx = unsafe { -/// _mm_cmpistri(a.into(), b.into(), _SIDD_UWORD_OPS | -/// _SIDD_CMP_EQUAL_EACH) }; +/// let idx = _mm_cmpistri(a, b, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_EACH); /// /// if idx == 0 { /// println!("16-bit unicode strings were equal!"); @@ -367,9 +355,9 @@ pub unsafe fn _mm_cmpestrm( let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { - ($imm8:expr) => { __m128i::from(pcmpestrm128(a, la, b, lb, $imm8)) } + ($imm8:expr) => { pcmpestrm128(a, la, b, lb, $imm8) } } - constify_imm8!(imm8, call) + mem::transmute(constify_imm8!(imm8, call)) } /// Compare packed strings `a` and `b` with lengths `la` and `lb` using the @@ -415,8 +403,7 @@ pub unsafe fn _mm_cmpestrm( /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { /// -/// use stdsimd::simd::u8x16; -/// use stdsimd::vendor::{_mm_cmpestri, _SIDD_CMP_EQUAL_ORDERED}; +/// use stdsimd::vendor::*; /// /// // The string we want to find a substring in /// let haystack = b"Split \r\n\t line "; @@ -425,14 +412,12 @@ pub unsafe fn _mm_cmpestrm( /// // extra bytes we do not want to search for. /// let needle = b"\r\n\t ignore this "; /// -/// let a = u8x16::load(needle, 0); -/// let b = u8x16::load(haystack, 0); +/// let a = _mm_loadu_si128(needle.as_ptr() as *const _); +/// let b = _mm_loadu_si128(haystack.as_ptr() as *const _); /// /// // Note: We explicitly specify we only want to search `b` for the /// // first 3 characters of a. -/// let idx = unsafe { -/// _mm_cmpestri(a.into(), 3, b.into(), 15, _SIDD_CMP_EQUAL_ORDERED) -/// }; +/// let idx = _mm_cmpestri(a, 3, b, 15, _SIDD_CMP_EQUAL_ORDERED); /// /// assert_eq!(idx, 6); /// # } @@ -626,8 +611,7 @@ mod tests { use stdsimd_test::simd_test; use std::ptr; - use v128::*; - use x86::i586::sse42; + use x86::*; // Currently one cannot `load` a &[u8] that is is less than 16 // in length. This makes loading strings less than 16 in length @@ -643,179 +627,179 @@ mod tests { slice.get_unchecked_mut(0) as *mut u8 as *mut u8, s.len(), ); - __m128i::from(u8x16::load(slice, 0)) + _mm_loadu_si128(slice.as_ptr() as *const _) } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpistrm() { + unsafe fn test_mm_cmpistrm() { let a = str_to_m128i(b"Hello! Good-Bye!"); let b = str_to_m128i(b"hello! good-bye!"); - let i = sse42::_mm_cmpistrm(a, b, sse42::_SIDD_UNIT_MASK); + let i = _mm_cmpistrm(a, b, _SIDD_UNIT_MASK); #[cfg_attr(rustfmt, rustfmt_skip)] - let res = u8x16::new( - 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, - 0xff, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, + let res = _mm_setr_epi8( + 0x00, !0, !0, !0, !0, !0, !0, 0x00, + !0, !0, !0, !0, 0x00, !0, !0, !0, ); - assert_eq!(i, __m128i::from(res)); + assert_eq!(i, res); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpistri() { + unsafe fn test_mm_cmpistri() { let a = str_to_m128i(b"Hello"); let b = str_to_m128i(b" Hello "); - let i = sse42::_mm_cmpistri(a, b, sse42::_SIDD_CMP_EQUAL_ORDERED); + let i = _mm_cmpistri(a, b, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(3, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpistrz() { + unsafe fn test_mm_cmpistrz() { let a = str_to_m128i(b""); let b = str_to_m128i(b"Hello"); - let i = sse42::_mm_cmpistrz(a, b, sse42::_SIDD_CMP_EQUAL_ORDERED); + let i = _mm_cmpistrz(a, b, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(1, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpistrc() { + unsafe fn test_mm_cmpistrc() { let a = str_to_m128i(b" "); let b = str_to_m128i(b" ! "); - let i = sse42::_mm_cmpistrc(a, b, sse42::_SIDD_UNIT_MASK); + let i = _mm_cmpistrc(a, b, _SIDD_UNIT_MASK); assert_eq!(1, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpistrs() { + unsafe fn test_mm_cmpistrs() { let a = str_to_m128i(b"Hello"); let b = str_to_m128i(b""); - let i = sse42::_mm_cmpistrs(a, b, sse42::_SIDD_CMP_EQUAL_ORDERED); + let i = _mm_cmpistrs(a, b, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(1, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpistro() { + unsafe fn test_mm_cmpistro() { #[cfg_attr(rustfmt, rustfmt_skip)] - let a_bytes = u8x16::new( + let a_bytes = _mm_setr_epi8( 0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ); #[cfg_attr(rustfmt, rustfmt_skip)] - let b_bytes = u8x16::new( + let b_bytes = _mm_setr_epi8( 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ); - let a = __m128i::from(a_bytes); - let b = __m128i::from(b_bytes); - let i = sse42::_mm_cmpistro( + let a = a_bytes; + let b = b_bytes; + let i = _mm_cmpistro( a, b, - sse42::_SIDD_UWORD_OPS | sse42::_SIDD_UNIT_MASK, + _SIDD_UWORD_OPS | _SIDD_UNIT_MASK, ); assert_eq!(0, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpistra() { + unsafe fn test_mm_cmpistra() { let a = str_to_m128i(b""); let b = str_to_m128i(b"Hello!!!!!!!!!!!"); - let i = sse42::_mm_cmpistra(a, b, sse42::_SIDD_UNIT_MASK); + let i = _mm_cmpistra(a, b, _SIDD_UNIT_MASK); assert_eq!(1, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpestrm() { + unsafe fn test_mm_cmpestrm() { let a = str_to_m128i(b"Hello!"); let b = str_to_m128i(b"Hello."); - let i = sse42::_mm_cmpestrm(a, 5, b, 5, sse42::_SIDD_UNIT_MASK); + let i = _mm_cmpestrm(a, 5, b, 5, _SIDD_UNIT_MASK); #[cfg_attr(rustfmt, rustfmt_skip)] - let r = u8x16::new( - 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, + let r = _mm_setr_epi8( + !0, !0, !0, !0, !0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ); - assert_eq!(i, __m128i::from(r)); + assert_eq!(i, r); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpestri() { + unsafe fn test_mm_cmpestri() { let a = str_to_m128i(b"bar - garbage"); let b = str_to_m128i(b"foobar"); let i = - sse42::_mm_cmpestri(a, 3, b, 6, sse42::_SIDD_CMP_EQUAL_ORDERED); + _mm_cmpestri(a, 3, b, 6, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(3, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpestrz() { + unsafe fn test_mm_cmpestrz() { let a = str_to_m128i(b""); let b = str_to_m128i(b"Hello"); let i = - sse42::_mm_cmpestrz(a, 16, b, 6, sse42::_SIDD_CMP_EQUAL_ORDERED); + _mm_cmpestrz(a, 16, b, 6, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(1, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpestrc() { + unsafe fn test_mm_cmpestrc() { let va = str_to_m128i(b"!!!!!!!!"); let vb = str_to_m128i(b" "); - let i = sse42::_mm_cmpestrc(va, 7, vb, 7, sse42::_SIDD_UNIT_MASK); + let i = _mm_cmpestrc(va, 7, vb, 7, _SIDD_UNIT_MASK); assert_eq!(0, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpestrs() { + unsafe fn test_mm_cmpestrs() { #[cfg_attr(rustfmt, rustfmt_skip)] - let a_bytes = u8x16::new( + let a_bytes = _mm_setr_epi8( 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ); - let a = __m128i::from(a_bytes); - let b = __m128i::from(u8x16::splat(0x00)); - let i = sse42::_mm_cmpestrs(a, 8, b, 0, sse42::_SIDD_UWORD_OPS); + let a = a_bytes; + let b = _mm_set1_epi8(0x00); + let i = _mm_cmpestrs(a, 8, b, 0, _SIDD_UWORD_OPS); assert_eq!(0, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpestro() { + unsafe fn test_mm_cmpestro() { let a = str_to_m128i(b"Hello"); let b = str_to_m128i(b"World"); - let i = sse42::_mm_cmpestro(a, 5, b, 5, sse42::_SIDD_UBYTE_OPS); + let i = _mm_cmpestro(a, 5, b, 5, _SIDD_UBYTE_OPS); assert_eq!(0, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_cmpestra() { + unsafe fn test_mm_cmpestra() { let a = str_to_m128i(b"Cannot match a"); let b = str_to_m128i(b"Null after 14"); - let i = sse42::_mm_cmpestra( + let i = _mm_cmpestra( a, 14, b, 16, - sse42::_SIDD_CMP_EQUAL_EACH | sse42::_SIDD_UNIT_MASK, + _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK, ); assert_eq!(1, i); } #[simd_test = "sse4.2"] - unsafe fn _mm_crc32_u8() { + unsafe fn test_mm_crc32_u8() { let crc = 0x2aa1e72b; let v = 0x2a; - let i = sse42::_mm_crc32_u8(crc, v); + let i = _mm_crc32_u8(crc, v); assert_eq!(i, 0xf24122e4); } #[simd_test = "sse4.2"] - unsafe fn _mm_crc32_u16() { + unsafe fn test_mm_crc32_u16() { let crc = 0x8ecec3b5; let v = 0x22b; - let i = sse42::_mm_crc32_u16(crc, v); + let i = _mm_crc32_u16(crc, v); assert_eq!(i, 0x13bb2fb); } #[simd_test = "sse4.2"] - unsafe fn _mm_crc32_u32() { + unsafe fn test_mm_crc32_u32() { let crc = 0xae2912c8; let v = 0x845fed; - let i = sse42::_mm_crc32_u32(crc, v); + let i = _mm_crc32_u32(crc, v); assert_eq!(i, 0xffae2ed1); } } diff --git a/coresimd/src/x86/i686/mmx.rs b/coresimd/src/x86/i686/mmx.rs index 38c79415a22e2..bc946db016132 100644 --- a/coresimd/src/x86/i686/mmx.rs +++ b/coresimd/src/x86/i686/mmx.rs @@ -486,117 +486,116 @@ extern "C" { #[cfg(test)] mod tests { - use v64::{__m64, i16x4, i32x2, i8x8, u16x4, u8x8}; - use x86::i686::mmx; + use x86::*; use stdsimd_test::simd_test; #[simd_test = "mmx"] - unsafe fn _mm_setzero_si64() { + unsafe fn test_mm_setzero_si64() { let r: __m64 = ::std::mem::transmute(0_i64); - assert_eq!(r, mmx::_mm_setzero_si64()); + assert_eq!(r, _mm_setzero_si64()); } #[simd_test = "mmx"] - unsafe fn _mm_add_pi8() { - let a = i8x8::new(-1, -1, 1, 1, -1, 0, 1, 0); - let b = i8x8::new(-127, 101, 99, 126, 0, -1, 0, 1); - let e = i8x8::new(-128, 100, 100, 127, -1, -1, 1, 1); - assert_eq!(e, i8x8::from(mmx::_mm_add_pi8(a.into(), b.into()))); - assert_eq!(e, i8x8::from(mmx::_m_paddb(a.into(), b.into()))); + unsafe fn test_mm_add_pi8() { + let a = _mm_setr_pi8(-1, -1, 1, 1, -1, 0, 1, 0); + let b = _mm_setr_pi8(-127, 101, 99, 126, 0, -1, 0, 1); + let e = _mm_setr_pi8(-128, 100, 100, 127, -1, -1, 1, 1); + assert_eq!(e, _mm_add_pi8(a, b)); + assert_eq!(e, _m_paddb(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_add_pi16() { - let a = i16x4::new(-1, -1, 1, 1); - let b = i16x4::new( + unsafe fn test_mm_add_pi16() { + let a = _mm_setr_pi16(-1, -1, 1, 1); + let b = _mm_setr_pi16( i16::min_value() + 1, 30001, -30001, i16::max_value() - 1, ); - let e = i16x4::new(i16::min_value(), 30000, -30000, i16::max_value()); - assert_eq!(e, i16x4::from(mmx::_mm_add_pi16(a.into(), b.into()))); - assert_eq!(e, i16x4::from(mmx::_m_paddw(a.into(), b.into()))); + let e = _mm_setr_pi16(i16::min_value(), 30000, -30000, i16::max_value()); + assert_eq!(e, _mm_add_pi16(a, b)); + assert_eq!(e, _m_paddw(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_add_pi32() { - let a = i32x2::new(1, -1); - let b = i32x2::new(i32::max_value() - 1, i32::min_value() + 1); - let e = i32x2::new(i32::max_value(), i32::min_value()); - assert_eq!(e, i32x2::from(mmx::_mm_add_pi32(a.into(), b.into()))); - assert_eq!(e, i32x2::from(mmx::_m_paddd(a.into(), b.into()))); + unsafe fn test_mm_add_pi32() { + let a = _mm_setr_pi32(1, -1); + let b = _mm_setr_pi32(i32::max_value() - 1, i32::min_value() + 1); + let e = _mm_setr_pi32(i32::max_value(), i32::min_value()); + assert_eq!(e, _mm_add_pi32(a, b)); + assert_eq!(e, _m_paddd(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_adds_pi8() { - let a = i8x8::new(-100, -1, 1, 100, -1, 0, 1, 0); - let b = i8x8::new(-100, 1, -1, 100, 0, -1, 0, 1); + unsafe fn test_mm_adds_pi8() { + let a = _mm_setr_pi8(-100, -1, 1, 100, -1, 0, 1, 0); + let b = _mm_setr_pi8(-100, 1, -1, 100, 0, -1, 0, 1); let e = - i8x8::new(i8::min_value(), 0, 0, i8::max_value(), -1, -1, 1, 1); - assert_eq!(e, i8x8::from(mmx::_mm_adds_pi8(a.into(), b.into()))); - assert_eq!(e, i8x8::from(mmx::_m_paddsb(a.into(), b.into()))); + _mm_setr_pi8(i8::min_value(), 0, 0, i8::max_value(), -1, -1, 1, 1); + assert_eq!(e, _mm_adds_pi8(a, b)); + assert_eq!(e, _m_paddsb(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_adds_pi16() { - let a = i16x4::new(-32000, 32000, 4, 0); - let b = i16x4::new(-32000, 32000, -5, 1); - let e = i16x4::new(i16::min_value(), i16::max_value(), -1, 1); - assert_eq!(e, i16x4::from(mmx::_mm_adds_pi16(a.into(), b.into()))); - assert_eq!(e, i16x4::from(mmx::_m_paddsw(a.into(), b.into()))); + unsafe fn test_mm_adds_pi16() { + let a = _mm_setr_pi16(-32000, 32000, 4, 0); + let b = _mm_setr_pi16(-32000, 32000, -5, 1); + let e = _mm_setr_pi16(i16::min_value(), i16::max_value(), -1, 1); + assert_eq!(e, _mm_adds_pi16(a, b)); + assert_eq!(e, _m_paddsw(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_adds_pu8() { - let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 200); - let b = u8x8::new(0, 10, 20, 30, 40, 50, 60, 200); - let e = u8x8::new(0, 11, 22, 33, 44, 55, 66, u8::max_value()); - assert_eq!(e, u8x8::from(mmx::_mm_adds_pu8(a.into(), b.into()))); - assert_eq!(e, u8x8::from(mmx::_m_paddusb(a.into(), b.into()))); + unsafe fn test_mm_adds_pu8() { + let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 200u8 as i8); + let b = _mm_setr_pi8(0, 10, 20, 30, 40, 50, 60, 200u8 as i8); + let e = _mm_setr_pi8(0, 11, 22, 33, 44, 55, 66, u8::max_value() as i8); + assert_eq!(e, _mm_adds_pu8(a, b)); + assert_eq!(e, _m_paddusb(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_adds_pu16() { - let a = u16x4::new(0, 1, 2, 60000); - let b = u16x4::new(0, 10, 20, 60000); - let e = u16x4::new(0, 11, 22, u16::max_value()); - assert_eq!(e, u16x4::from(mmx::_mm_adds_pu16(a.into(), b.into()))); - assert_eq!(e, u16x4::from(mmx::_m_paddusw(a.into(), b.into()))); + unsafe fn test_mm_adds_pu16() { + let a = _mm_setr_pi16(0, 1, 2, 60000u16 as i16); + let b = _mm_setr_pi16(0, 10, 20, 60000u16 as i16); + let e = _mm_setr_pi16(0, 11, 22, u16::max_value() as i16); + assert_eq!(e, _mm_adds_pu16(a, b)); + assert_eq!(e, _m_paddusw(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_sub_pi8() { - let a = i8x8::new(0, 0, 1, 1, -1, -1, 0, 0); - let b = i8x8::new(-1, 1, -2, 2, 100, -100, -127, 127); - let e = i8x8::new(1, -1, 3, -1, -101, 99, 127, -127); - assert_eq!(e, i8x8::from(mmx::_mm_sub_pi8(a.into(), b.into()))); - assert_eq!(e, i8x8::from(mmx::_m_psubb(a.into(), b.into()))); + unsafe fn test_mm_sub_pi8() { + let a = _mm_setr_pi8(0, 0, 1, 1, -1, -1, 0, 0); + let b = _mm_setr_pi8(-1, 1, -2, 2, 100, -100, -127, 127); + let e = _mm_setr_pi8(1, -1, 3, -1, -101, 99, 127, -127); + assert_eq!(e, _mm_sub_pi8(a, b)); + assert_eq!(e, _m_psubb(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_sub_pi16() { - let a = i16x4::new(-20000, -20000, 20000, 30000); - let b = i16x4::new(-10000, 10000, -10000, 30000); - let e = i16x4::new(-10000, -30000, 30000, 0); - assert_eq!(e, i16x4::from(mmx::_mm_sub_pi16(a.into(), b.into()))); - assert_eq!(e, i16x4::from(mmx::_m_psubw(a.into(), b.into()))); + unsafe fn test_mm_sub_pi16() { + let a = _mm_setr_pi16(-20000, -20000, 20000, 30000); + let b = _mm_setr_pi16(-10000, 10000, -10000, 30000); + let e = _mm_setr_pi16(-10000, -30000, 30000, 0); + assert_eq!(e, _mm_sub_pi16(a, b)); + assert_eq!(e, _m_psubw(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_sub_pi32() { - let a = i32x2::new(500_000, -500_000); - let b = i32x2::new(500_000, 500_000); - let e = i32x2::new(0, -1_000_000); - assert_eq!(e, i32x2::from(mmx::_mm_sub_pi32(a.into(), b.into()))); - assert_eq!(e, i32x2::from(mmx::_m_psubd(a.into(), b.into()))); + unsafe fn test_mm_sub_pi32() { + let a = _mm_setr_pi32(500_000, -500_000); + let b = _mm_setr_pi32(500_000, 500_000); + let e = _mm_setr_pi32(0, -1_000_000); + assert_eq!(e, _mm_sub_pi32(a, b)); + assert_eq!(e, _m_psubd(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_subs_pi8() { - let a = i8x8::new(-100, 100, 0, 0, 0, 0, -5, 5); - let b = i8x8::new(100, -100, i8::min_value(), 127, -1, 1, 3, -3); - let e = i8x8::new( + unsafe fn test_mm_subs_pi8() { + let a = _mm_setr_pi8(-100, 100, 0, 0, 0, 0, -5, 5); + let b = _mm_setr_pi8(100, -100, i8::min_value(), 127, -1, 1, 3, -3); + let e = _mm_setr_pi8( i8::min_value(), i8::max_value(), i8::max_value(), @@ -606,128 +605,128 @@ mod tests { -8, 8, ); - assert_eq!(e, i8x8::from(mmx::_mm_subs_pi8(a.into(), b.into()))); - assert_eq!(e, i8x8::from(mmx::_m_psubsb(a.into(), b.into()))); + assert_eq!(e, _mm_subs_pi8(a, b)); + assert_eq!(e, _m_psubsb(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_subs_pi16() { - let a = i16x4::new(-20000, 20000, 0, 0); - let b = i16x4::new(20000, -20000, -1, 1); - let e = i16x4::new(i16::min_value(), i16::max_value(), 1, -1); - assert_eq!(e, i16x4::from(mmx::_mm_subs_pi16(a.into(), b.into()))); - assert_eq!(e, i16x4::from(mmx::_m_psubsw(a.into(), b.into()))); + unsafe fn test_mm_subs_pi16() { + let a = _mm_setr_pi16(-20000, 20000, 0, 0); + let b = _mm_setr_pi16(20000, -20000, -1, 1); + let e = _mm_setr_pi16(i16::min_value(), i16::max_value(), 1, -1); + assert_eq!(e, _mm_subs_pi16(a, b)); + assert_eq!(e, _m_psubsw(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_subs_pu8() { - let a = u8x8::new(50, 10, 20, 30, 40, 60, 70, 80); - let b = u8x8::new(60, 20, 30, 40, 30, 20, 10, 0); - let e = u8x8::new(0, 0, 0, 0, 10, 40, 60, 80); - assert_eq!(e, u8x8::from(mmx::_mm_subs_pu8(a.into(), b.into()))); - assert_eq!(e, u8x8::from(mmx::_m_psubusb(a.into(), b.into()))); + unsafe fn test_mm_subs_pu8() { + let a = _mm_setr_pi8(50, 10, 20, 30, 40, 60, 70, 80); + let b = _mm_setr_pi8(60, 20, 30, 40, 30, 20, 10, 0); + let e = _mm_setr_pi8(0, 0, 0, 0, 10, 40, 60, 80); + assert_eq!(e, _mm_subs_pu8(a, b)); + assert_eq!(e, _m_psubusb(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_subs_pu16() { - let a = u16x4::new(10000, 200, 0, 44444); - let b = u16x4::new(20000, 300, 1, 11111); - let e = u16x4::new(0, 0, 0, 33333); - assert_eq!(e, u16x4::from(mmx::_mm_subs_pu16(a.into(), b.into()))); - assert_eq!(e, u16x4::from(mmx::_m_psubusw(a.into(), b.into()))); + unsafe fn test_mm_subs_pu16() { + let a = _mm_setr_pi16(10000, 200, 0, 44444u16 as i16); + let b = _mm_setr_pi16(20000, 300, 1, 11111); + let e = _mm_setr_pi16(0, 0, 0, 33333u16 as i16); + assert_eq!(e, _mm_subs_pu16(a, b)); + assert_eq!(e, _m_psubusw(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_packs_pi16() { - let a = i16x4::new(-1, 2, -3, 4); - let b = i16x4::new(-5, 6, -7, 8); - let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8); - assert_eq!(r, i8x8::from(mmx::_mm_packs_pi16(a.into(), b.into()))); + unsafe fn test_mm_packs_pi16() { + let a = _mm_setr_pi16(-1, 2, -3, 4); + let b = _mm_setr_pi16(-5, 6, -7, 8); + let r = _mm_setr_pi8(-1, 2, -3, 4, -5, 6, -7, 8); + assert_eq!(r, _mm_packs_pi16(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_packs_pi32() { - let a = i32x2::new(-1, 2); - let b = i32x2::new(-5, 6); - let r = i16x4::new(-1, 2, -5, 6); - assert_eq!(r, i16x4::from(mmx::_mm_packs_pi32(a.into(), b.into()))); + unsafe fn test_mm_packs_pi32() { + let a = _mm_setr_pi32(-1, 2); + let b = _mm_setr_pi32(-5, 6); + let r = _mm_setr_pi16(-1, 2, -5, 6); + assert_eq!(r, _mm_packs_pi32(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_cmpgt_pi8() { - let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1); - let r = i8x8::new(0, 0, 0, 0, 0, -1, -1, -1); - assert_eq!(r, i8x8::from(mmx::_mm_cmpgt_pi8(a.into(), b.into()))); + unsafe fn test_mm_cmpgt_pi8() { + let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_pi8(8, 7, 6, 5, 4, 3, 2, 1); + let r = _mm_setr_pi8(0, 0, 0, 0, 0, -1, -1, -1); + assert_eq!(r, _mm_cmpgt_pi8(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_cmpgt_pi16() { - let a = i16x4::new(0, 1, 2, 3); - let b = i16x4::new(4, 3, 2, 1); - let r = i16x4::new(0, 0, 0, -1); - assert_eq!(r, i16x4::from(mmx::_mm_cmpgt_pi16(a.into(), b.into()))); + unsafe fn test_mm_cmpgt_pi16() { + let a = _mm_setr_pi16(0, 1, 2, 3); + let b = _mm_setr_pi16(4, 3, 2, 1); + let r = _mm_setr_pi16(0, 0, 0, -1); + assert_eq!(r, _mm_cmpgt_pi16(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_cmpgt_pi32() { - let a = i32x2::new(0, 3); - let b = i32x2::new(1, 2); - let r0 = i32x2::new(0, -1); - let r1 = i32x2::new(-1, 0); - - assert_eq!(r0, mmx::_mm_cmpgt_pi32(a.into(), b.into()).into()); - assert_eq!(r1, mmx::_mm_cmpgt_pi32(b.into(), a.into()).into()); + unsafe fn test_mm_cmpgt_pi32() { + let a = _mm_setr_pi32(0, 3); + let b = _mm_setr_pi32(1, 2); + let r0 = _mm_setr_pi32(0, -1); + let r1 = _mm_setr_pi32(-1, 0); + + assert_eq!(r0, _mm_cmpgt_pi32(a, b)); + assert_eq!(r1, _mm_cmpgt_pi32(b, a)); } #[simd_test = "mmx"] - unsafe fn _mm_unpackhi_pi8() { - let a = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15); - let b = i8x8::new(1, 2, 5, 6, 9, 10, 13, 14); - let r = i8x8::new(8, 9, 11, 10, 12, 13, 15, 14); + unsafe fn test_mm_unpackhi_pi8() { + let a = _mm_setr_pi8(0, 3, 4, 7, 8, 11, 12, 15); + let b = _mm_setr_pi8(1, 2, 5, 6, 9, 10, 13, 14); + let r = _mm_setr_pi8(8, 9, 11, 10, 12, 13, 15, 14); - assert_eq!(r, mmx::_mm_unpackhi_pi8(a.into(), b.into()).into()); + assert_eq!(r, _mm_unpackhi_pi8(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_unpacklo_pi8() { - let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b = i8x8::new(8, 9, 10, 11, 12, 13, 14, 15); - let r = i8x8::new(0, 8, 1, 9, 2, 10, 3, 11); - assert_eq!(r, i8x8::from(mmx::_mm_unpacklo_pi8(a.into(), b.into()))); + unsafe fn test_mm_unpacklo_pi8() { + let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm_setr_pi8(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_setr_pi8(0, 8, 1, 9, 2, 10, 3, 11); + assert_eq!(r, _mm_unpacklo_pi8(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_unpackhi_pi16() { - let a = i16x4::new(0, 1, 2, 3); - let b = i16x4::new(4, 5, 6, 7); - let r = i16x4::new(2, 6, 3, 7); - assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into()))); + unsafe fn test_mm_unpackhi_pi16() { + let a = _mm_setr_pi16(0, 1, 2, 3); + let b = _mm_setr_pi16(4, 5, 6, 7); + let r = _mm_setr_pi16(2, 6, 3, 7); + assert_eq!(r, _mm_unpackhi_pi16(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_unpacklo_pi16() { - let a = i16x4::new(0, 1, 2, 3); - let b = i16x4::new(4, 5, 6, 7); - let r = i16x4::new(0, 4, 1, 5); - assert_eq!(r, i16x4::from(mmx::_mm_unpacklo_pi16(a.into(), b.into()))); + unsafe fn test_mm_unpacklo_pi16() { + let a = _mm_setr_pi16(0, 1, 2, 3); + let b = _mm_setr_pi16(4, 5, 6, 7); + let r = _mm_setr_pi16(0, 4, 1, 5); + assert_eq!(r, _mm_unpacklo_pi16(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_unpackhi_pi32() { - let a = i32x2::new(0, 3); - let b = i32x2::new(1, 2); - let r = i32x2::new(3, 2); + unsafe fn test_mm_unpackhi_pi32() { + let a = _mm_setr_pi32(0, 3); + let b = _mm_setr_pi32(1, 2); + let r = _mm_setr_pi32(3, 2); - assert_eq!(r, mmx::_mm_unpackhi_pi32(a.into(), b.into()).into()); + assert_eq!(r, _mm_unpackhi_pi32(a, b)); } #[simd_test = "mmx"] - unsafe fn _mm_unpacklo_pi32() { - let a = i32x2::new(0, 3); - let b = i32x2::new(1, 2); - let r = i32x2::new(0, 1); + unsafe fn test_mm_unpacklo_pi32() { + let a = _mm_setr_pi32(0, 3); + let b = _mm_setr_pi32(1, 2); + let r = _mm_setr_pi32(0, 1); - assert_eq!(r, mmx::_mm_unpacklo_pi32(a.into(), b.into()).into()); + assert_eq!(r, _mm_unpacklo_pi32(a, b)); } } diff --git a/coresimd/src/x86/i686/ssse3.rs b/coresimd/src/x86/i686/ssse3.rs index 573d067bdefe8..e609150270667 100644 --- a/coresimd/src/x86/i686/ssse3.rs +++ b/coresimd/src/x86/i686/ssse3.rs @@ -222,144 +222,139 @@ extern "C" { mod tests { use stdsimd_test::simd_test; - use v64::*; - use x86::i686::ssse3; + use x86::*; #[simd_test = "ssse3"] - unsafe fn _mm_abs_pi8() { - let r = u8x8::from(ssse3::_mm_abs_pi8(i8x8::splat(-5).into())); - assert_eq!(r, u8x8::splat(5)); + unsafe fn test_mm_abs_pi8() { + let r = _mm_abs_pi8(_mm_set1_pi8(-5)); + assert_eq!(r, _mm_set1_pi8(5)); } #[simd_test = "ssse3"] - unsafe fn _mm_abs_pi16() { - let r = u16x4::from(ssse3::_mm_abs_pi16(i16x4::splat(-5).into())); - assert_eq!(r, u16x4::splat(5)); + unsafe fn test_mm_abs_pi16() { + let r = _mm_abs_pi16(_mm_set1_pi16(-5)); + assert_eq!(r, _mm_set1_pi16(5)); } #[simd_test = "ssse3"] - unsafe fn _mm_abs_pi32() { - let r = u32x2::from(ssse3::_mm_abs_pi32(i32x2::splat(-5).into())); - assert_eq!(r, u32x2::splat(5)); + unsafe fn test_mm_abs_pi32() { + let r = _mm_abs_pi32(_mm_set1_pi32(-5)); + assert_eq!(r, _mm_set1_pi32(5)); } #[simd_test = "ssse3"] - unsafe fn _mm_shuffle_pi8() { - let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b = u8x8::new(4, 128, 4, 3, 24, 12, 6, 19); - let expected = u8x8::new(5, 0, 5, 4, 1, 5, 7, 4); - let r = u8x8::from(ssse3::_mm_shuffle_pi8(a.into(), b.into())); + unsafe fn test_mm_shuffle_pi8() { + let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19); + let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4); + let r = _mm_shuffle_pi8(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_alignr_pi8() { - let a = u32x2::new(0x89ABCDEF_u32, 0x01234567_u32); - let b = u32x2::new(0xBBAA9988_u32, 0xFFDDEECC_u32); - let r = ssse3::_mm_alignr_pi8( - u8x8::from(a).into(), - u8x8::from(b).into(), - 4, - ); + unsafe fn test_mm_alignr_pi8() { + let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32); + let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32); + let r = _mm_alignr_pi8(a, b, 4); assert_eq!(r, ::std::mem::transmute(0x89abcdefffddeecc_u64)); } #[simd_test = "ssse3"] - unsafe fn _mm_hadd_pi16() { - let a = i16x4::new(1, 2, 3, 4); - let b = i16x4::new(4, 128, 4, 3); - let expected = i16x4::new(3, 7, 132, 7); - let r = i16x4::from(ssse3::_mm_hadd_pi16(a.into(), b.into())); + unsafe fn test_mm_hadd_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(4, 128, 4, 3); + let expected = _mm_setr_pi16(3, 7, 132, 7); + let r = _mm_hadd_pi16(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_hadd_pi32() { - let a = i32x2::new(1, 2); - let b = i32x2::new(4, 128); - let expected = i32x2::new(3, 132); - let r = i32x2::from(ssse3::_mm_hadd_pi32(a.into(), b.into())); + unsafe fn test_mm_hadd_pi32() { + let a = _mm_setr_pi32(1, 2); + let b = _mm_setr_pi32(4, 128); + let expected = _mm_setr_pi32(3, 132); + let r = _mm_hadd_pi32(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_hadds_pi16() { - let a = i16x4::new(1, 2, 3, 4); - let b = i16x4::new(32767, 1, -32768, -1); - let expected = i16x4::new(3, 7, 32767, -32768); - let r = i16x4::from(ssse3::_mm_hadds_pi16(a.into(), b.into())); + unsafe fn test_mm_hadds_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(32767, 1, -32768, -1); + let expected = _mm_setr_pi16(3, 7, 32767, -32768); + let r = _mm_hadds_pi16(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_hsub_pi16() { - let a = i16x4::new(1, 2, 3, 4); - let b = i16x4::new(4, 128, 4, 3); - let expected = i16x4::new(-1, -1, -124, 1); - let r = i16x4::from(ssse3::_mm_hsub_pi16(a.into(), b.into())); + unsafe fn test_mm_hsub_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(4, 128, 4, 3); + let expected = _mm_setr_pi16(-1, -1, -124, 1); + let r = _mm_hsub_pi16(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_hsub_pi32() { - let a = i32x2::new(1, 2); - let b = i32x2::new(4, 128); - let expected = i32x2::new(-1, -124); - let r = i32x2::from(ssse3::_mm_hsub_pi32(a.into(), b.into())); + unsafe fn test_mm_hsub_pi32() { + let a = _mm_setr_pi32(1, 2); + let b = _mm_setr_pi32(4, 128); + let expected = _mm_setr_pi32(-1, -124); + let r = _mm_hsub_pi32(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_hsubs_pi16() { - let a = i16x4::new(1, 2, 3, 4); - let b = i16x4::new(4, 128, 4, 3); - let expected = i16x4::new(-1, -1, -124, 1); - let r = i16x4::from(ssse3::_mm_hsubs_pi16(a.into(), b.into())); + unsafe fn test_mm_hsubs_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(4, 128, 4, 3); + let expected = _mm_setr_pi16(-1, -1, -124, 1); + let r = _mm_hsubs_pi16(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_maddubs_pi16() { - let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b = i8x8::new(4, 63, 4, 3, 24, 12, 6, 19); - let expected = i16x4::new(130, 24, 192, 194); - let r = i16x4::from(ssse3::_mm_maddubs_pi16(a.into(), b.into())); + unsafe fn test_mm_maddubs_pi16() { + let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19); + let expected = _mm_setr_pi16(130, 24, 192, 194); + let r = _mm_maddubs_pi16(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_mulhrs_pi16() { - let a = i16x4::new(1, 2, 3, 4); - let b = i16x4::new(4, 32767, -1, -32768); - let expected = i16x4::new(0, 2, 0, -4); - let r = i16x4::from(ssse3::_mm_mulhrs_pi16(a.into(), b.into())); + unsafe fn test_mm_mulhrs_pi16() { + let a = _mm_setr_pi16(1, 2, 3, 4); + let b = _mm_setr_pi16(4, 32767, -1, -32768); + let expected = _mm_setr_pi16(0, 2, 0, -4); + let r = _mm_mulhrs_pi16(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_sign_pi8() { - let a = i8x8::new(1, 2, 3, 4, -5, -6, 7, 8); - let b = i8x8::new(4, 64, 0, 3, 1, -1, -2, 1); - let expected = i8x8::new(1, 2, 0, 4, -5, 6, -7, 8); - let r = i8x8::from(ssse3::_mm_sign_pi8(a.into(), b.into())); + unsafe fn test_mm_sign_pi8() { + let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8); + let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1); + let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8); + let r = _mm_sign_pi8(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_sign_pi16() { - let a = i16x4::new(-1, 2, 3, 4); - let b = i16x4::new(1, -1, 1, 0); - let expected = i16x4::new(-1, -2, 3, 0); - let r = i16x4::from(ssse3::_mm_sign_pi16(a.into(), b.into())); + unsafe fn test_mm_sign_pi16() { + let a = _mm_setr_pi16(-1, 2, 3, 4); + let b = _mm_setr_pi16(1, -1, 1, 0); + let expected = _mm_setr_pi16(-1, -2, 3, 0); + let r = _mm_sign_pi16(a, b); assert_eq!(r, expected); } #[simd_test = "ssse3"] - unsafe fn _mm_sign_pi32() { - let a = i32x2::new(-1, 2); - let b = i32x2::new(1, 0); - let expected = i32x2::new(-1, 0); - let r = i32x2::from(ssse3::_mm_sign_pi32(a.into(), b.into())); + unsafe fn test_mm_sign_pi32() { + let a = _mm_setr_pi32(-1, 2); + let b = _mm_setr_pi32(1, 0); + let expected = _mm_setr_pi32(-1, 0); + let r = _mm_sign_pi32(a, b); assert_eq!(r, expected); } } diff --git a/examples/play.rs b/examples/play.rs deleted file mode 100644 index e4160e2bd3e3c..0000000000000 --- a/examples/play.rs +++ /dev/null @@ -1,68 +0,0 @@ -#![cfg_attr(feature = "strict", deny(warnings))] -#![feature(target_feature)] -#![cfg_attr(feature = "cargo-clippy", - allow(similar_names, missing_docs_in_private_items, - cast_sign_loss, cast_possible_truncation, - cast_possible_wrap, option_unwrap_used, use_debug, - print_stdout))] - -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -mod example { - - extern crate stdsimd; - - use std::env; - use self::stdsimd::simd as s; - use self::stdsimd::vendor; - - #[inline(never)] - #[target_feature(enable = "sse4.2")] - unsafe fn index(needle: &str, haystack: &str) -> usize { - assert!(needle.len() <= 16 && haystack.len() <= 16); - - let (needle_len, hay_len) = (needle.len(), haystack.len()); - - let mut needle = needle.to_string().into_bytes(); - needle.resize(16, 0); - let vneedle = s::__m128i::from(s::u8x16::load(&needle, 0)); - - let mut haystack = haystack.to_string().into_bytes(); - haystack.resize(16, 0); - let vhaystack = s::__m128i::from(s::u8x16::load(&haystack, 0)); - - vendor::_mm_cmpestri( - vneedle, - needle_len as i32, - vhaystack, - hay_len as i32, - vendor::_SIDD_CMP_EQUAL_ORDERED, - ) as usize - } - - pub fn main() { - // let x0: f64 = env::args().nth(1).unwrap().parse().unwrap(); - // let x1: f64 = env::args().nth(2).unwrap().parse().unwrap(); - // let x2: f64 = env::args().nth(3).unwrap().parse().unwrap(); - // let x3: f64 = env::args().nth(4).unwrap().parse().unwrap(); - // let y0: i32 = env::args().nth(5).unwrap().parse().unwrap(); - // let y1: i32 = env::args().nth(6).unwrap().parse().unwrap(); - // let y2: i32 = env::args().nth(7).unwrap().parse().unwrap(); - // let y3: i32 = env::args().nth(8).unwrap().parse().unwrap(); - - // let a = s::f64x2::new(x0, x1); - // let b = s::f64x2::new(x2, x3); - // let r = s::_mm_cmplt_sd(a, b); - // let r = foobar(a, b); - - let needle = env::args().nth(1).unwrap(); - let haystack = env::args().nth(2).unwrap(); - unsafe { - println!("{:?}", index(&needle, &haystack)); - } - } -} - -fn main() { - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - example::main(); -}