diff --git a/coresimd/src/lib.rs b/coresimd/src/lib.rs
index ade8eb3130096..a3c02fcaffc72 100644
--- a/coresimd/src/lib.rs
+++ b/coresimd/src/lib.rs
@@ -15,7 +15,7 @@
            simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
            const_atomic_usize_new, stmt_expr_attributes, core_intrinsics,
            crate_in_paths)]
-#![cfg_attr(test, feature(proc_macro, test, repr_align, attr_literals))]
+#![cfg_attr(test, feature(proc_macro, test, attr_literals))]
 #![cfg_attr(feature = "cargo-clippy",
             allow(inline_always, too_many_arguments, cast_sign_loss,
                   cast_lossless, cast_possible_wrap,
diff --git a/coresimd/src/x86/i686/sse.rs b/coresimd/src/x86/i686/sse.rs
index 63764b2300e5b..1cb830eff8831 100644
--- a/coresimd/src/x86/i686/sse.rs
+++ b/coresimd/src/x86/i686/sse.rs
@@ -44,8 +44,8 @@ extern "C" {
 
 /// Compares the packed 16-bit signed integers of `a` and `b` writing the
 /// greatest value into the result.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pmaxsw))]
 pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 {
     pmaxsw(a, b)
@@ -53,8 +53,8 @@ pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 {
 
 /// Compares the packed 16-bit signed integers of `a` and `b` writing the
 /// greatest value into the result.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pmaxsw))]
 pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 {
     _mm_max_pi16(a, b)
@@ -62,8 +62,8 @@ pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 {
 
 /// Compares the packed 8-bit signed integers of `a` and `b` writing the
 /// greatest value into the result.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pmaxub))]
 pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 {
     pmaxub(a, b)
@@ -71,8 +71,8 @@ pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 {
 
 /// Compares the packed 8-bit signed integers of `a` and `b` writing the
 /// greatest value into the result.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pmaxub))]
 pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
     _mm_max_pu8(a, b)
@@ -80,8 +80,8 @@ pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
 
 /// Compares the packed 16-bit signed integers of `a` and `b` writing the
 /// smallest value into the result.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pminsw))]
 pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 {
     pminsw(a, b)
@@ -89,8 +89,8 @@ pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 {
 
 /// Compares the packed 16-bit signed integers of `a` and `b` writing the
 /// smallest value into the result.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pminsw))]
 pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 {
     _mm_min_pi16(a, b)
@@ -98,8 +98,8 @@ pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 {
 
 /// Compares the packed 8-bit signed integers of `a` and `b` writing the
 /// smallest value into the result.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pminub))]
 pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 {
     pminub(a, b)
@@ -107,8 +107,8 @@ pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 {
 
 /// Compares the packed 8-bit signed integers of `a` and `b` writing the
 /// smallest value into the result.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pminub))]
 pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
     _mm_min_pu8(a, b)
@@ -117,8 +117,8 @@ pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
 /// Multiplies packed 16-bit unsigned integer values and writes the
 /// high-order 16 bits of each 32-bit product to the corresponding bits in
 /// the destination.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pmulhuw))]
 pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 {
     pmulhuw(a, b)
@@ -127,8 +127,8 @@ pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 {
 /// Multiplies packed 16-bit unsigned integer values and writes the
 /// high-order 16 bits of each 32-bit product to the corresponding bits in
 /// the destination.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pmulhuw))]
 pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 {
     _mm_mulhi_pu16(a, b)
@@ -137,8 +137,8 @@ pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 {
 /// Computes the rounded averages of the packed unsigned 8-bit integer
 /// values and writes the averages to the corresponding bits in the
 /// destination.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pavgb))]
 pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 {
     pavgb(a, b)
@@ -147,8 +147,8 @@ pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 {
 /// Computes the rounded averages of the packed unsigned 8-bit integer
 /// values and writes the averages to the corresponding bits in the
 /// destination.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pavgb))]
 pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
     _mm_avg_pu8(a, b)
@@ -157,8 +157,8 @@ pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
 /// Computes the rounded averages of the packed unsigned 16-bit integer
 /// values and writes the averages to the corresponding bits in the
 /// destination.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pavgw))]
 pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 {
     pavgw(a, b)
@@ -167,8 +167,8 @@ pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 {
 /// Computes the rounded averages of the packed unsigned 16-bit integer
 /// values and writes the averages to the corresponding bits in the
 /// destination.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pavgw))]
 pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 {
     _mm_avg_pu16(a, b)
@@ -178,8 +178,8 @@ pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 {
 /// 64-bit vector operands and computes the absolute value for each of the
 /// difference. Then sum of the 8 absolute differences is written to the
 /// bits [15:0] of the destination; the remaining bits [63:16] are cleared.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(psadbw))]
 pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 {
     psadbw(a, b)
@@ -189,8 +189,8 @@ pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 {
 /// 64-bit vector operands and computes the absolute value for each of the
 /// difference. Then sum of the 8 absolute differences is written to the
 /// bits [15:0] of the destination; the remaining bits [63:16] are cleared.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(psadbw))]
 pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
     _mm_sad_pu8(a, b)
@@ -200,8 +200,8 @@ pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
 /// floating point values and writes them to the lower 64-bits of the
 /// destination. The remaining higher order elements of the destination are
 /// copied from the corresponding elements in the first operand.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvtpi2ps))]
 pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 {
     cvtpi2ps(a, mem::transmute(b))
@@ -211,16 +211,16 @@ pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 {
 /// floating point values and writes them to the lower 64-bits of the
 /// destination. The remaining higher order elements of the destination are
 /// copied from the corresponding elements in the first operand.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvtpi2ps))]
 pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 {
     _mm_cvtpi32_ps(a, b)
 }
 
 /// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvtpi2ps))]
 pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> __m128 {
     let b = _mm_setzero_si64();
@@ -230,8 +230,8 @@ pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> __m128 {
 }
 
 /// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvtpi2ps))]
 pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> __m128 {
     let b = _mm_setzero_si64();
@@ -240,8 +240,8 @@ pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> __m128 {
 }
 
 /// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvtpi2ps))]
 pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> __m128 {
     let b = _mm_setzero_si64();
@@ -255,8 +255,8 @@ pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> __m128 {
 }
 
 /// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvtpi2ps))]
 pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 {
     let b = _mm_setzero_si64();
@@ -270,8 +270,8 @@ pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 {
 
 /// Converts the two 32-bit signed integer values from each 64-bit vector
 /// operand of [2 x i32] into a 128-bit vector of [4 x float].
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvtpi2ps))]
 pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 {
     let c = i586::_mm_setzero_ps();
@@ -287,8 +287,8 @@ pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 {
 ///
 /// To minimize caching, the data is flagged as non-temporal
 /// (unlikely to be used again soon).
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(maskmovq))]
 pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) {
     maskmovq(a, mask, mem_addr)
@@ -301,8 +301,8 @@ pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) {
 ///
 /// To minimize caching, the data is flagged as non-temporal
 /// (unlikely to be used again soon).
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(maskmovq))]
 pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
     _mm_maskmove_si64(a, mask, mem_addr)
@@ -310,8 +310,8 @@ pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
 
 /// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
 /// returns it, as specified by the immediate integer operand.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
 pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i16 {
     macro_rules! call {
@@ -322,8 +322,8 @@ pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i16 {
 
 /// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
 /// returns it, as specified by the immediate integer operand.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
 pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i16 {
     _mm_extract_pi16(a, imm2)
@@ -332,8 +332,8 @@ pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i16 {
 /// Copies data from the 64-bit vector of [4 x i16] to the destination,
 /// and inserts the lower 16-bits of an integer operand at the 16-bit offset
 /// specified by the immediate operand `n`.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
 pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
     macro_rules! call {
@@ -345,8 +345,8 @@ pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
 /// Copies data from the 64-bit vector of [4 x i16] to the destination,
 /// and inserts the lower 16-bits of an integer operand at the 16-bit offset
 /// specified by the immediate operand `n`.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
 pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
     _mm_insert_pi16(a, d, imm2)
@@ -355,8 +355,8 @@ pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
 /// Takes the most significant bit from each 8-bit element in a 64-bit
 /// integer vector to create a 16-bit mask value. Zero-extends the value to
 /// 32-bit integer and writes it to the destination.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pmovmskb))]
 pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 {
     pmovmskb(mem::transmute(a))
@@ -365,8 +365,8 @@ pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 {
 /// Takes the most significant bit from each 8-bit element in a 64-bit
 /// integer vector to create a 16-bit mask value. Zero-extends the value to
 /// 32-bit integer and writes it to the destination.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pmovmskb))]
 pub unsafe fn _m_pmovmskb(a: __m64) -> i32 {
     _mm_movemask_pi8(a)
@@ -374,8 +374,8 @@ pub unsafe fn _m_pmovmskb(a: __m64) -> i32 {
 
 /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
 /// destination, as specified by the immediate value operand.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
 pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
     macro_rules! call {
@@ -386,8 +386,8 @@ pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
 
 /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
 /// destination, as specified by the immediate value operand.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
 pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
     _mm_shuffle_pi16(a, imm8)
@@ -395,8 +395,8 @@ pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
 
 /// Convert the two lower packed single-precision (32-bit) floating-point
 /// elements in `a` to packed 32-bit integers with truncation.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvttps2pi))]
 pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 {
     mem::transmute(cvttps2pi(a))
@@ -404,8 +404,8 @@ pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 {
 
 /// Convert the two lower packed single-precision (32-bit) floating-point
 /// elements in `a` to packed 32-bit integers with truncation.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvttps2pi))]
 pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 {
     _mm_cvttps_pi32(a)
@@ -413,8 +413,8 @@ pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 {
 
 /// Convert the two lower packed single-precision (32-bit) floating-point
 /// elements in `a` to packed 32-bit integers.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvtps2pi))]
 pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 {
     cvtps2pi(a)
@@ -422,8 +422,8 @@ pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 {
 
 /// Convert the two lower packed single-precision (32-bit) floating-point
 /// elements in `a` to packed 32-bit integers.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvtps2pi))]
 pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 {
     _mm_cvtps_pi32(a)
@@ -431,8 +431,8 @@ pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 {
 
 /// Convert packed single-precision (32-bit) floating-point elements in `a` to
 /// packed 16-bit integers.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvtps2pi))]
 pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 {
     let b = _mm_cvtps_pi32(a);
@@ -444,8 +444,8 @@ pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 {
 /// Convert packed single-precision (32-bit) floating-point elements in `a` to
 /// packed 8-bit integers, and returns theem in the lower 4 elements of the
 /// result.
-#[inline(always)]
-#[target_feature(enable = "sse")]
+#[inline]
+#[target_feature(enable = "sse,mmx")]
 #[cfg_attr(test, assert_instr(cvtps2pi))]
 pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {
     let b = _mm_cvtps_pi16(a);
@@ -458,7 +458,7 @@ mod tests {
     use x86::*;
     use stdsimd_test::simd_test;
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_max_pi16() {
         let a = _mm_setr_pi16(-1, 6, -3, 8);
         let b = _mm_setr_pi16(5, -2, 7, -4);
@@ -468,7 +468,7 @@ mod tests {
         assert_eq_m64(r, _m_pmaxsw(a, b));
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_max_pu8() {
         let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
         let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
@@ -478,7 +478,7 @@ mod tests {
         assert_eq_m64(r, _m_pmaxub(a, b));
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_min_pi16() {
         let a = _mm_setr_pi16(-1, 6, -3, 8);
         let b = _mm_setr_pi16(5, -2, 7, -4);
@@ -488,7 +488,7 @@ mod tests {
         assert_eq_m64(r, _m_pminsw(a, b));
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_min_pu8() {
         let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
         let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
@@ -498,21 +498,21 @@ mod tests {
         assert_eq_m64(r, _m_pminub(a, b));
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_mulhi_pu16() {
         let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
         let r = _mm_mulhi_pu16(a, b);
         assert_eq_m64(r, _mm_set1_pi16(15));
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_m_pmulhuw() {
         let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
         let r = _m_pmulhuw(a, b);
         assert_eq_m64(r, _mm_set1_pi16(15));
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_avg_pu8() {
         let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9));
         let r = _mm_avg_pu8(a, b);
@@ -522,7 +522,7 @@ mod tests {
         assert_eq_m64(r, _mm_set1_pi8(6));
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_avg_pu16() {
         let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9));
         let r = _mm_avg_pu16(a, b);
@@ -532,7 +532,7 @@ mod tests {
         assert_eq_m64(r, _mm_set1_pi16(6));
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_sad_pu8() {
         let a = _mm_setr_pi8(255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
                              1, 2, 3, 4);
@@ -544,7 +544,7 @@ mod tests {
         assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_cvtpi32_ps() {
         let a = _mm_setr_ps(0., 0., 3., 4.);
         let b = _mm_setr_pi32(1, 2);
@@ -556,7 +556,7 @@ mod tests {
         assert_eq_m128(r, expected);
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_cvtpi16_ps() {
         let a = _mm_setr_pi16(1, 2, 3, 4);
         let expected = _mm_setr_ps(1., 2., 3., 4.);
@@ -564,7 +564,7 @@ mod tests {
         assert_eq_m128(r, expected);
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_cvtpu16_ps() {
         let a = _mm_setr_pi16(1, 2, 3, 4);
         let expected = _mm_setr_ps(1., 2., 3., 4.);
@@ -572,7 +572,7 @@ mod tests {
         assert_eq_m128(r, expected);
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_cvtpi8_ps() {
         let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
         let expected = _mm_setr_ps(1., 2., 3., 4.);
@@ -580,7 +580,7 @@ mod tests {
         assert_eq_m128(r, expected);
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_cvtpu8_ps() {
         let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
         let expected = _mm_setr_ps(1., 2., 3., 4.);
@@ -588,7 +588,7 @@ mod tests {
         assert_eq_m128(r, expected);
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_cvtpi32x2_ps() {
         let a = _mm_setr_pi32(1, 2);
         let b = _mm_setr_pi32(3, 4);
@@ -597,7 +597,7 @@ mod tests {
         assert_eq_m128(r, expected);
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_maskmove_si64() {
         let a = _mm_set1_pi8(9);
         let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0);
@@ -615,7 +615,7 @@ mod tests {
         assert_eq_m64(r, e);
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_extract_pi16() {
         let a = _mm_setr_pi16(1, 2, 3, 4);
         let r = _mm_extract_pi16(a, 0);
@@ -627,7 +627,7 @@ mod tests {
         assert_eq!(r, 2);
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_insert_pi16() {
         let a = _mm_setr_pi16(1, 2, 3, 4);
         let r = _mm_insert_pi16(a, 0, 0b0);
@@ -641,7 +641,7 @@ mod tests {
         assert_eq_m64(r, expected);
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_movemask_pi8() {
         let a = _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
         let r = _mm_movemask_pi8(a);
@@ -651,7 +651,7 @@ mod tests {
         assert_eq!(r, 0b10001);
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_shuffle_pi16() {
         let a = _mm_setr_pi16(1, 2, 3, 4);
         let r = _mm_shuffle_pi16(a, 0b00_01_01_11);
@@ -662,7 +662,7 @@ mod tests {
         assert_eq_m64(r, expected);
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_cvtps_pi32() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let r = _mm_setr_pi32(1, 2);
@@ -671,7 +671,7 @@ mod tests {
         assert_eq_m64(r, _mm_cvt_ps2pi(a));
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_cvttps_pi32() {
         let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
         let r = _mm_setr_pi32(7, 2);
@@ -680,14 +680,14 @@ mod tests {
         assert_eq_m64(r, _mm_cvtt_ps2pi(a));
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_cvtps_pi16() {
         let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
         let r = _mm_setr_pi16(7, 2, 3, 4);
         assert_eq_m64(r, _mm_cvtps_pi16(a));
     }
 
-    #[simd_test = "sse"]
+    #[simd_test = "sse,mmx"]
     unsafe fn test_mm_cvtps_pi8() {
         let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
         let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0);
diff --git a/coresimd/src/x86/i686/sse2.rs b/coresimd/src/x86/i686/sse2.rs
index 075be5b868f3e..f82c4372a9630 100644
--- a/coresimd/src/x86/i686/sse2.rs
+++ b/coresimd/src/x86/i686/sse2.rs
@@ -11,7 +11,7 @@ use stdsimd_test::assert_instr;
 /// Adds two signed or unsigned 64-bit integer values, returning the
 /// lower 64 bits of the sum.
 #[inline(always)]
-#[target_feature(enable = "sse2")]
+#[target_feature(enable = "sse2,mmx")]
 #[cfg_attr(test, assert_instr(paddq))]
 pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 {
     paddq(a, b)
@@ -21,7 +21,7 @@ pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 {
 /// of the two 64-bit integer vectors and returns the 64-bit unsigned
 /// product.
 #[inline(always)]
-#[target_feature(enable = "sse2")]
+#[target_feature(enable = "sse2,mmx")]
 #[cfg_attr(test, assert_instr(pmuludq))]
 pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 {
     pmuludq(mem::transmute(a), mem::transmute(b))
@@ -30,7 +30,7 @@ pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 {
 /// Subtracts signed or unsigned 64-bit integer values and writes the
 /// difference to the corresponding bits in the destination.
 #[inline(always)]
-#[target_feature(enable = "sse2")]
+#[target_feature(enable = "sse2,mmx")]
 #[cfg_attr(test, assert_instr(psubq))]
 pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 {
     psubq(a, b)
@@ -40,7 +40,7 @@ pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 {
 /// [2 x i32] into two double-precision floating-point values, returned in a
 /// 128-bit vector of [2 x double].
 #[inline(always)]
-#[target_feature(enable = "sse2")]
+#[target_feature(enable = "sse2,mmx")]
 #[cfg_attr(test, assert_instr(cvtpi2pd))]
 pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d {
     cvtpi2pd(a)
@@ -49,7 +49,7 @@ pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d {
 /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
 /// the specified 64-bit integer values.
 #[inline(always)]
-#[target_feature(enable = "sse2")]
+#[target_feature(enable = "sse2,mmx")]
 // no particular instruction to test
 pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i {
     _mm_set_epi64x(mem::transmute(e1), mem::transmute(e0))
@@ -58,7 +58,7 @@ pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i {
 /// Initializes both values in a 128-bit vector of [2 x i64] with the
 /// specified 64-bit value.
 #[inline(always)]
-#[target_feature(enable = "sse2")]
+#[target_feature(enable = "sse2,mmx")]
 // no particular instruction to test
 pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i {
     _mm_set_epi64x(mem::transmute(a), mem::transmute(a))
@@ -67,7 +67,7 @@ pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i {
 /// Constructs a 128-bit integer vector, initialized in reverse order
 /// with the specified 64-bit integral values.
 #[inline(always)]
-#[target_feature(enable = "sse2")]
+#[target_feature(enable = "sse2,mmx")]
 // no particular instruction to test
 pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i {
     _mm_set_epi64x(mem::transmute(e0), mem::transmute(e1))
@@ -76,7 +76,7 @@ pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i {
 /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
 /// integer.
 #[inline(always)]
-#[target_feature(enable = "sse2")]
+#[target_feature(enable = "sse2,mmx")]
 // #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong
 // instr?
 pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 {
@@ -86,7 +86,7 @@ pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 {
 /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
 /// upper bits.
 #[inline(always)]
-#[target_feature(enable = "sse2")]
+#[target_feature(enable = "sse2,mmx")]
 // #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong
 // instr?
 pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i {
@@ -97,7 +97,7 @@ pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i {
 /// 128-bit vector of [2 x double] into two signed 32-bit integer values,
 /// returned in a 64-bit vector of [2 x i32].
 #[inline(always)]
-#[target_feature(enable = "sse2")]
+#[target_feature(enable = "sse2,mmx")]
 #[cfg_attr(test, assert_instr(cvtpd2pi))]
 pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 {
     cvtpd2pi(a)
@@ -109,7 +109,7 @@ pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 {
 /// If the result of either conversion is inexact, the result is truncated
 /// (rounded towards zero) regardless of the current MXCSR setting.
 #[inline(always)]
-#[target_feature(enable = "sse2")]
+#[target_feature(enable = "sse2,mmx")]
 #[cfg_attr(test, assert_instr(cvttpd2pi))]
 pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 {
     cvttpd2pi(a)
@@ -139,7 +139,7 @@ mod tests {
 
     use x86::*;
 
-    #[simd_test = "sse2"]
+    #[simd_test = "sse2,mmx"]
     unsafe fn test_mm_add_si64() {
         let a = 1i64;
         let b = 2i64;
@@ -148,7 +148,7 @@ mod tests {
         assert_eq!(mem::transmute::<__m64, i64>(r), expected);
     }
 
-    #[simd_test = "sse2"]
+    #[simd_test = "sse2,mmx"]
     unsafe fn test_mm_mul_su32() {
         let a = _mm_setr_pi32(1, 2);
         let b = _mm_setr_pi32(3, 4);
@@ -157,7 +157,7 @@ mod tests {
         assert_eq_m64(r, mem::transmute(expected));
     }
 
-    #[simd_test = "sse2"]
+    #[simd_test = "sse2,mmx"]
     unsafe fn test_mm_sub_si64() {
         let a = 1i64;
         let b = 2i64;
@@ -166,7 +166,7 @@ mod tests {
         assert_eq!(mem::transmute::<__m64, i64>(r), expected);
     }
 
-    #[simd_test = "sse2"]
+    #[simd_test = "sse2,mmx"]
     unsafe fn test_mm_cvtpi32_pd() {
         let a = _mm_setr_pi32(1, 2);
         let expected = _mm_setr_pd(1., 2.);
@@ -174,33 +174,33 @@ mod tests {
         assert_eq_m128d(r, expected);
     }
 
-    #[simd_test = "sse2"]
+    #[simd_test = "sse2,mmx"]
     unsafe fn test_mm_set_epi64() {
         let r =
             _mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64));
         assert_eq_m128i(r, _mm_setr_epi64x(2, 1));
     }
 
-    #[simd_test = "sse2"]
+    #[simd_test = "sse2,mmx"]
     unsafe fn test_mm_set1_epi64() {
         let r = _mm_set1_epi64(mem::transmute(1i64));
         assert_eq_m128i(r, _mm_setr_epi64x(1, 1));
     }
 
-    #[simd_test = "sse2"]
+    #[simd_test = "sse2,mmx"]
     unsafe fn test_mm_setr_epi64() {
         let r =
             _mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64));
         assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
     }
 
-    #[simd_test = "sse2"]
+    #[simd_test = "sse2,mmx"]
     unsafe fn test_mm_movepi64_pi64() {
         let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0));
         assert_eq_m64(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
     }
 
-    #[simd_test = "sse2"]
+    #[simd_test = "sse2,mmx"]
     unsafe fn test_mm_movpi64_epi64() {
         let r = _mm_movpi64_epi64(_mm_setr_pi8(
             5,
@@ -215,14 +215,14 @@ mod tests {
         assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
     }
 
-    #[simd_test = "sse2"]
+    #[simd_test = "sse2,mmx"]
     unsafe fn test_mm_cvtpd_pi32() {
         let a = _mm_setr_pd(5., 0.);
         let r = _mm_cvtpd_pi32(a);
         assert_eq_m64(r, _mm_setr_pi32(5, 0));
     }
 
-    #[simd_test = "sse2"]
+    #[simd_test = "sse2,mmx"]
     unsafe fn test_mm_cvttpd_pi32() {
         use std::{f64, i32};
 
diff --git a/coresimd/src/x86/i686/ssse3.rs b/coresimd/src/x86/i686/ssse3.rs
index b5962f27ef5af..647074096cdf3 100644
--- a/coresimd/src/x86/i686/ssse3.rs
+++ b/coresimd/src/x86/i686/ssse3.rs
@@ -8,7 +8,7 @@ use x86::*;
 /// Compute the absolute value of packed 8-bit integers in `a` and
 /// return the unsigned results.
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(pabsb))]
 pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
     pabsb(a)
@@ -17,7 +17,7 @@ pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
 /// Compute the absolute value of packed 8-bit integers in `a`, and return the
 /// unsigned results.
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(pabsw))]
 pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
     pabsw(a)
@@ -26,7 +26,7 @@ pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
 /// Compute the absolute value of packed 32-bit integers in `a`, and return the
 /// unsigned results.
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(pabsd))]
 pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
     pabsd(a)
@@ -35,7 +35,7 @@ pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
 /// Shuffle packed 8-bit integers in `a` according to shuffle control mask in
 /// the corresponding 8-bit element of `b`, and return the results
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(pshufb))]
 pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
     pshufb(a, b)
@@ -44,7 +44,7 @@ pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
 /// Concatenates the two 64-bit integer vector operands, and right-shifts
 /// the result by the number of bytes specified in the immediate operand.
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(palignr, n = 15))]
 pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
     macro_rules! call {
@@ -58,7 +58,7 @@ pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
 /// Horizontally add the adjacent pairs of values contained in 2 packed
 /// 64-bit vectors of [4 x i16].
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(phaddw))]
 pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
     phaddw(a, b)
@@ -67,7 +67,7 @@ pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
 /// Horizontally add the adjacent pairs of values contained in 2 packed
 /// 64-bit vectors of [2 x i32].
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(phaddd))]
 pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
     phaddd(a, b)
@@ -77,7 +77,7 @@ pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
 /// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
 /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(phaddsw))]
 pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
     phaddsw(a, b)
@@ -86,7 +86,7 @@ pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
 /// Horizontally subtracts the adjacent pairs of values contained in 2
 /// packed 64-bit vectors of [4 x i16].
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(phsubw))]
 pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
     phsubw(a, b)
@@ -95,7 +95,7 @@ pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
 /// Horizontally subtracts the adjacent pairs of values contained in 2
 /// packed 64-bit vectors of [2 x i32].
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(phsubd))]
 pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
     phsubd(a, b)
@@ -106,7 +106,7 @@ pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
 /// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
 /// saturated to 8000h.
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(phsubsw))]
 pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
     phsubsw(a, b)
@@ -118,7 +118,7 @@ pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
 /// contiguous products with signed saturation, and writes the 16-bit sums to
 /// the corresponding bits in the destination.
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(pmaddubsw))]
 pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
     pmaddubsw(a, b)
@@ -128,7 +128,7 @@ pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
 /// products to the 18 most significant bits by right-shifting, rounds the
 /// truncated value by adding 1, and writes bits [16:1] to the destination.
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(pmulhrsw))]
 pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
     pmulhrsw(a, b)
@@ -139,7 +139,7 @@ pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
 /// Element in result are zeroed out when the corresponding element in `b` is
 /// zero.
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(psignb))]
 pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
     psignb(a, b)
@@ -150,7 +150,7 @@ pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
 /// Element in result are zeroed out when the corresponding element in `b` is
 /// zero.
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(psignw))]
 pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
     psignw(a, b)
@@ -161,7 +161,7 @@ pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
 /// Element in result are zeroed out when the corresponding element in `b` is
 /// zero.
 #[inline(always)]
-#[target_feature(enable = "ssse3")]
+#[target_feature(enable = "ssse3,mmx")]
 #[cfg_attr(test, assert_instr(psignd))]
 pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 {
     psignd(a, b)
@@ -224,25 +224,25 @@ mod tests {
 
     use x86::*;
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_abs_pi8() {
         let r = _mm_abs_pi8(_mm_set1_pi8(-5));
         assert_eq_m64(r, _mm_set1_pi8(5));
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_abs_pi16() {
         let r = _mm_abs_pi16(_mm_set1_pi16(-5));
         assert_eq_m64(r, _mm_set1_pi16(5));
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_abs_pi32() {
         let r = _mm_abs_pi32(_mm_set1_pi32(-5));
         assert_eq_m64(r, _mm_set1_pi32(5));
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_shuffle_pi8() {
         let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19);
@@ -251,7 +251,7 @@ mod tests {
         assert_eq_m64(r, expected);
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_alignr_pi8() {
         let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32);
         let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32);
@@ -259,7 +259,7 @@ mod tests {
         assert_eq_m64(r, ::std::mem::transmute(0x89abcdefffddeecc_u64));
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_hadd_pi16() {
         let a = _mm_setr_pi16(1, 2, 3, 4);
         let b = _mm_setr_pi16(4, 128, 4, 3);
@@ -268,7 +268,7 @@ mod tests {
         assert_eq_m64(r, expected);
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_hadd_pi32() {
         let a = _mm_setr_pi32(1, 2);
         let b = _mm_setr_pi32(4, 128);
@@ -277,7 +277,7 @@ mod tests {
         assert_eq_m64(r, expected);
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_hadds_pi16() {
         let a = _mm_setr_pi16(1, 2, 3, 4);
         let b = _mm_setr_pi16(32767, 1, -32768, -1);
@@ -286,7 +286,7 @@ mod tests {
         assert_eq_m64(r, expected);
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_hsub_pi16() {
         let a = _mm_setr_pi16(1, 2, 3, 4);
         let b = _mm_setr_pi16(4, 128, 4, 3);
@@ -295,7 +295,7 @@ mod tests {
         assert_eq_m64(r, expected);
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_hsub_pi32() {
         let a = _mm_setr_pi32(1, 2);
         let b = _mm_setr_pi32(4, 128);
@@ -304,7 +304,7 @@ mod tests {
         assert_eq_m64(r, expected);
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_hsubs_pi16() {
         let a = _mm_setr_pi16(1, 2, 3, 4);
         let b = _mm_setr_pi16(4, 128, 4, 3);
@@ -313,7 +313,7 @@ mod tests {
         assert_eq_m64(r, expected);
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_maddubs_pi16() {
         let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19);
@@ -322,7 +322,7 @@ mod tests {
         assert_eq_m64(r, expected);
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_mulhrs_pi16() {
         let a = _mm_setr_pi16(1, 2, 3, 4);
         let b = _mm_setr_pi16(4, 32767, -1, -32768);
@@ -331,7 +331,7 @@ mod tests {
         assert_eq_m64(r, expected);
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_sign_pi8() {
         let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8);
         let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1);
@@ -340,7 +340,7 @@ mod tests {
         assert_eq_m64(r, expected);
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_sign_pi16() {
         let a = _mm_setr_pi16(-1, 2, 3, 4);
         let b = _mm_setr_pi16(1, -1, 1, 0);
@@ -349,7 +349,7 @@ mod tests {
         assert_eq_m64(r, expected);
     }
 
-    #[simd_test = "ssse3"]
+    #[simd_test = "ssse3,mmx"]
     unsafe fn test_mm_sign_pi32() {
         let a = _mm_setr_pi32(-1, 2);
         let b = _mm_setr_pi32(1, 0);
diff --git a/coresimd/src/x86/mod.rs b/coresimd/src/x86/mod.rs
index 1c11a70191056..05e99a9b9c628 100644
--- a/coresimd/src/x86/mod.rs
+++ b/coresimd/src/x86/mod.rs
@@ -415,11 +415,7 @@ pub use self::i586::*;
 //
 // This module is not available for `i586` targets,
 // but available for all `i686` targets by default
-#[cfg(any(all(target_arch = "x86", target_feature = "sse2"),
-          target_arch = "x86_64"))]
 mod i686;
-#[cfg(any(all(target_arch = "x86", target_feature = "sse2"),
-          target_arch = "x86_64"))]
 pub use self::i686::*;
 
 #[cfg(target_arch = "x86_64")]